summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Widenius <monty@askmonty.org>2013-03-26 00:03:13 +0200
committerMichael Widenius <monty@askmonty.org>2013-03-26 00:03:13 +0200
commit068c61978e3a81836d52b8caf11e044290159ad1 (patch)
tree2cbca861ab2cebe3bd99379ca9668bb483ca0d2a
parent35bc8f9f4353b64da215e52ff6f1612a8ce66f43 (diff)
downloadmariadb-git-068c61978e3a81836d52b8caf11e044290159ad1.tar.gz
Temporary commit of 10.0-merge
-rw-r--r--.bzrignore1
-rwxr-xr-xBUILD/SETUP.sh3
-rw-r--r--CMakeLists.txt9
-rw-r--r--client/CMakeLists.txt6
-rw-r--r--client/mysql.cc13
-rw-r--r--client/mysql_upgrade.c2
-rw-r--r--client/mysqladmin.cc3
-rw-r--r--client/mysqlbinlog.cc2
-rw-r--r--client/mysqlcheck.c4
-rw-r--r--client/mysqltest.cc4
-rw-r--r--cmake/configure.pl20
-rw-r--r--cmake/cpu_info.cmake30
-rw-r--r--cmake/libevent.cmake89
-rw-r--r--cmake/libutils.cmake8
-rw-r--r--cmake/ssl.cmake198
-rw-r--r--dbug/dbug.c4
-rw-r--r--extra/comp_err.c11
-rw-r--r--extra/my_print_defaults.c2
-rw-r--r--include/big_endian.h82
-rw-r--r--include/byte_order_generic.h95
-rw-r--r--include/byte_order_generic_x86.h97
-rw-r--r--include/byte_order_generic_x86_64.h83
-rw-r--r--include/crypt_genhash_impl.h32
-rw-r--r--include/errmsg.h12
-rw-r--r--include/ft_global.h21
-rw-r--r--include/little_endian.h75
-rw-r--r--include/m_ctype.h4
-rw-r--r--include/my_base.h58
-rw-r--r--include/my_byteorder.h54
-rw-r--r--include/my_default.h50
-rw-r--r--include/my_getopt.h5
-rw-r--r--include/my_global.h304
-rw-r--r--include/my_handler_errors.h4
-rw-r--r--include/my_md5.h94
-rw-r--r--include/my_rnd.h32
-rw-r--r--include/my_sys.h65
-rw-r--r--include/my_time.h4
-rw-r--r--include/mysql/client_authentication.h13
-rw-r--r--include/mysql/plugin.h12
-rw-r--r--include/mysql/plugin_audit.h.pp2
-rw-r--r--include/mysql/plugin_auth.h.pp2
-rw-r--r--include/mysql/plugin_ftparser.h.pp2
-rw-r--r--include/mysql/psi/mysql_file.h249
-rw-r--r--include/mysql/psi/mysql_idle.h4
-rw-r--r--include/mysql/psi/mysql_socket.h178
-rw-r--r--include/mysql/psi/mysql_stage.h4
-rw-r--r--include/mysql/psi/mysql_statement.h33
-rw-r--r--include/mysql/psi/mysql_table.h69
-rw-r--r--include/mysql/psi/mysql_thread.h80
-rw-r--r--include/mysql/psi/psi.h120
-rw-r--r--include/mysql/psi/psi_abi_v1.h.pp28
-rw-r--r--include/mysql/service_debug_sync.h11
-rw-r--r--include/mysql/service_my_plugin_log.h64
-rw-r--r--include/mysql_com.h44
-rw-r--r--include/password.h2
-rw-r--r--include/sha1.h81
-rw-r--r--include/thread_pool_priv.h1
-rw-r--r--libevent/CMakeLists.txt80
-rw-r--r--libevent/ChangeLog190
-rw-r--r--libevent/Doxyfile230
-rw-r--r--libevent/Makefile.am124
-rw-r--r--libevent/README57
-rw-r--r--libevent/WIN32-Code/event-config.h244
-rw-r--r--libevent/WIN32-Code/misc.c93
-rw-r--r--libevent/WIN32-Code/misc.h11
-rw-r--r--libevent/WIN32-Code/tree.h1354
-rw-r--r--libevent/WIN32-Code/win32.c486
-rw-r--r--libevent/WIN32-Prj/libevent.dsw74
-rw-r--r--libevent/WIN32-Prj/libevent.sln53
-rw-r--r--libevent/autogen.sh11
-rw-r--r--libevent/buffer.c451
-rw-r--r--libevent/cmake_install.cmake34
-rw-r--r--libevent/compat/sys/_time.h163
-rw-r--r--libevent/compat/sys/queue.h488
-rw-r--r--libevent/configure.in387
-rw-r--r--libevent/devpoll.c417
-rw-r--r--libevent/epoll.c373
-rw-r--r--libevent/epoll_sub.c52
-rw-r--r--libevent/evbuffer.c455
-rw-r--r--libevent/evdns.3322
-rw-r--r--libevent/evdns.c3200
-rw-r--r--libevent/evdns.h528
-rw-r--r--libevent/event-internal.h102
-rw-r--r--libevent/event.3624
-rw-r--r--libevent/event.c1025
-rw-r--r--libevent/event.h1175
-rw-r--r--libevent/event_rpcgen.py1417
-rw-r--r--libevent/event_tagging.c443
-rw-r--r--libevent/evhttp.h371
-rw-r--r--libevent/evport.c513
-rw-r--r--libevent/evrpc-internal.h87
-rw-r--r--libevent/evrpc.c661
-rw-r--r--libevent/evrpc.h486
-rw-r--r--libevent/evsignal.h54
-rw-r--r--libevent/evutil.c245
-rw-r--r--libevent/evutil.h185
-rw-r--r--libevent/http-internal.h154
-rw-r--r--libevent/http.c2830
-rw-r--r--libevent/kqueue.c449
-rw-r--r--libevent/log.c187
-rw-r--r--libevent/log.h51
-rw-r--r--libevent/min_heap.h150
-rw-r--r--libevent/poll.c379
-rw-r--r--libevent/sample/Makefile.am14
-rw-r--r--libevent/sample/event-test.c139
-rw-r--r--libevent/sample/signal-test.c63
-rw-r--r--libevent/sample/time-test.c70
-rw-r--r--libevent/select.c356
-rw-r--r--libevent/signal.c357
-rw-r--r--libevent/strlcpy-internal.h23
-rw-r--r--libevent/strlcpy.c76
-rw-r--r--libevent/test/Makefile.am35
-rw-r--r--libevent/test/bench.c188
-rw-r--r--libevent/test/regress.c1703
-rw-r--r--libevent/test/regress.gen.c872
-rw-r--r--libevent/test/regress.gen.h183
-rw-r--r--libevent/test/regress.h45
-rw-r--r--libevent/test/regress.rpc20
-rw-r--r--libevent/test/regress_dns.c376
-rw-r--r--libevent/test/regress_http.c1476
-rw-r--r--libevent/test/regress_rpc.c631
-rw-r--r--libevent/test/test-eof.c82
-rw-r--r--libevent/test/test-init.c33
-rw-r--r--libevent/test/test-time.c82
-rw-r--r--libevent/test/test-weof.c80
-rw-r--r--libevent/test/test.sh91
-rw-r--r--libmysql/CMakeLists.txt5
-rw-r--r--libmysql/errmsg.c2
-rw-r--r--libmysql/libmysql.c10
-rw-r--r--mysys/CMakeLists.txt7
-rw-r--r--mysys/array.c4
-rw-r--r--mysys/lf_alloc-pin.c2
-rw-r--r--mysys/lf_dynarray.c2
-rw-r--r--mysys/ma_dyncol.c19
-rw-r--r--mysys/mf_dirname.c2
-rw-r--r--mysys/mf_format.c2
-rw-r--r--mysys/mf_iocache.c8
-rw-r--r--mysys/my_aes.c227
-rw-r--r--mysys/my_alloc.c2
-rw-r--r--mysys/my_bitmap.c2
-rw-r--r--mysys/my_compare.c4
-rw-r--r--mysys/my_compress.c2
-rw-r--r--mysys/my_conio.c6
-rw-r--r--mysys/my_default.c (renamed from mysys/default.c)7
-rw-r--r--mysys/my_error.c176
-rw-r--r--mysys/my_file.c8
-rw-r--r--mysys/my_getopt.c1
-rw-r--r--mysys/my_rnd.c37
-rw-r--r--mysys/my_thr_init.c14
-rw-r--r--mysys/my_uuid.c3
-rw-r--r--mysys/psi_noop.c49
-rw-r--r--mysys/rijndael.c1379
-rw-r--r--mysys/sha1.c422
-rw-r--r--mysys/stacktrace.c4
-rw-r--r--mysys/string.c74
-rw-r--r--mysys/testhash.c4
-rw-r--r--mysys_ssl/CMakeLists.txt48
-rw-r--r--mysys_ssl/CTestTestfile.cmake6
-rw-r--r--mysys_ssl/cmake_install.cmake34
-rw-r--r--mysys_ssl/crypt_genhash_impl.cc454
-rw-r--r--mysys_ssl/my_aes.cc278
-rw-r--r--mysys_ssl/my_md5.cc68
-rw-r--r--mysys_ssl/my_rnd.cc103
-rw-r--r--mysys_ssl/my_sha1.cc141
-rw-r--r--mysys_ssl/my_sha2.cc68
-rw-r--r--sql-common/client.c2
-rw-r--r--sql-common/client_authentication.cc253
-rw-r--r--sql-common/my_time.c2
-rw-r--r--sql/CMakeLists.txt8
-rw-r--r--sql/debug_sync.cc16
-rw-r--r--sql/derror.cc18
-rw-r--r--sql/field.cc36
-rw-r--r--sql/field.h51
-rw-r--r--sql/filesort.cc8
-rw-r--r--sql/ha_ndbcluster.cc2
-rw-r--r--sql/handler.cc37
-rw-r--r--sql/handler.h439
-rw-r--r--sql/item.cc28
-rw-r--r--sql/item_buff.cc4
-rw-r--r--sql/item_cmpfunc.cc48
-rw-r--r--sql/item_create.cc2
-rw-r--r--sql/item_func.cc34
-rw-r--r--sql/item_func.h2
-rw-r--r--sql/item_strfunc.cc22
-rw-r--r--sql/item_strfunc.h2
-rw-r--r--sql/item_sum.cc14
-rw-r--r--sql/item_sum.h4
-rw-r--r--sql/item_timefunc.cc30
-rw-r--r--sql/item_timefunc.h4
-rw-r--r--sql/key.cc20
-rw-r--r--sql/log.cc8
-rw-r--r--sql/log_event.cc10
-rw-r--r--sql/log_event_old.cc2
-rw-r--r--sql/mdl.h2
-rw-r--r--sql/multi_range_read.cc4
-rw-r--r--sql/mysqld.cc8
-rw-r--r--sql/mysqld.h1
-rw-r--r--sql/net_serv.cc4
-rw-r--r--sql/opt_range.cc36
-rw-r--r--sql/opt_range.h4
-rw-r--r--sql/opt_range_mrr.cc2
-rw-r--r--sql/opt_subselect.cc6
-rw-r--r--sql/opt_table_elimination.cc4
-rw-r--r--sql/password.c115
-rw-r--r--sql/protocol.cc10
-rw-r--r--sql/rpl_mi.cc4
-rw-r--r--sql/rpl_record.cc2
-rw-r--r--sql/rpl_rli.cc4
-rw-r--r--sql/rpl_utility.cc2
-rw-r--r--sql/rpl_utility.h2
-rw-r--r--sql/share/errmsg-utf8.txt1221
-rw-r--r--sql/signal_handler.cc2
-rw-r--r--sql/slave.cc10
-rw-r--r--sql/sp_head.cc4
-rw-r--r--sql/spatial.h4
-rw-r--r--sql/sql_acl.cc2
-rw-r--r--sql/sql_alter.h395
-rw-r--r--sql/sql_analyse.cc14
-rw-r--r--sql/sql_cache.cc18
-rw-r--r--sql/sql_class.cc89
-rw-r--r--sql/sql_class.h36
-rw-r--r--sql/sql_client.cc2
-rw-r--r--sql/sql_connect.cc4
-rw-r--r--sql/sql_const.h11
-rw-r--r--sql/sql_error.cc2
-rw-r--r--sql/sql_join_cache.cc4
-rw-r--r--sql/sql_join_cache.h4
-rw-r--r--sql/sql_lex.h105
-rw-r--r--sql/sql_load.cc2
-rw-r--r--sql/sql_parse.cc4
-rw-r--r--sql/sql_partition.cc2
-rw-r--r--sql/sql_plugin.cc2
-rw-r--r--sql/sql_prepare.cc2
-rw-r--r--sql/sql_profile.cc2
-rw-r--r--sql/sql_repl.cc18
-rw-r--r--sql/sql_select.cc28
-rw-r--r--sql/sql_show.cc16
-rw-r--r--sql/sql_string.cc71
-rw-r--r--sql/sql_string.h10
-rw-r--r--sql/sql_table.cc2
-rw-r--r--sql/sql_yacc.yy4
-rw-r--r--sql/structs.h8
-rw-r--r--sql/sys_vars.cc7
-rw-r--r--sql/table.cc55
-rw-r--r--sql/table.h6
-rw-r--r--sql/thr_malloc.cc2
-rw-r--r--sql/tztime.cc10
-rw-r--r--sql/unireg.cc4
-rw-r--r--storage/archive/ha_archive.cc2
-rw-r--r--storage/csv/ha_tina.cc4
-rw-r--r--storage/federated/ha_federated.cc6
-rw-r--r--storage/federatedx/ha_federatedx.cc4
-rw-r--r--storage/heap/ha_heap.cc12
-rw-r--r--storage/heap/hp_create.c10
-rw-r--r--storage/heap/hp_test2.c4
-rw-r--r--storage/innobase/CMakeLists.txt59
-rw-r--r--storage/innobase/api/api0api.cc3859
-rw-r--r--storage/innobase/api/api0misc.cc206
-rw-r--r--storage/innobase/btr/btr0btr.cc591
-rw-r--r--storage/innobase/btr/btr0cur.cc728
-rw-r--r--storage/innobase/btr/btr0pcur.cc73
-rw-r--r--storage/innobase/btr/btr0sea.cc28
-rw-r--r--storage/innobase/buf/buf0buddy.cc2
-rw-r--r--storage/innobase/buf/buf0buf.cc294
-rw-r--r--storage/innobase/buf/buf0dblwr.cc161
-rw-r--r--storage/innobase/buf/buf0dump.cc11
-rw-r--r--storage/innobase/buf/buf0flu.cc707
-rw-r--r--storage/innobase/buf/buf0lru.cc607
-rw-r--r--storage/innobase/buf/buf0rea.cc35
-rw-r--r--storage/innobase/dict/dict0boot.cc49
-rw-r--r--storage/innobase/dict/dict0crea.cc577
-rw-r--r--storage/innobase/dict/dict0dict.cc1488
-rw-r--r--storage/innobase/dict/dict0load.cc857
-rw-r--r--storage/innobase/dict/dict0mem.cc195
-rw-r--r--storage/innobase/dict/dict0stats.cc2841
-rw-r--r--storage/innobase/dict/dict0stats_bg.cc392
-rw-r--r--storage/innobase/fil/fil0fil.cc2919
-rw-r--r--storage/innobase/fsp/fsp0fsp.cc250
-rw-r--r--storage/innobase/fts/fts0ast.cc98
-rw-r--r--storage/innobase/fts/fts0blex.cc232
-rw-r--r--storage/innobase/fts/fts0blex.l2
-rw-r--r--storage/innobase/fts/fts0config.cc62
-rw-r--r--storage/innobase/fts/fts0fts.cc846
-rw-r--r--storage/innobase/fts/fts0opt.cc305
-rw-r--r--storage/innobase/fts/fts0pars.cc2
-rw-r--r--storage/innobase/fts/fts0que.cc571
-rw-r--r--storage/innobase/fts/fts0sql.cc12
-rw-r--r--storage/innobase/fts/fts0tlex.cc160
-rw-r--r--storage/innobase/fts/fts0tlex.l2
-rw-r--r--storage/innobase/ha/ha0ha.cc12
-rw-r--r--storage/innobase/ha/hash0hash.cc8
-rw-r--r--storage/innobase/handler/ha_innodb.cc4229
-rw-r--r--storage/innobase/handler/ha_innodb.h287
-rw-r--r--storage/innobase/handler/handler0alter.cc5559
-rw-r--r--storage/innobase/handler/i_s.cc1476
-rw-r--r--storage/innobase/handler/i_s.h56
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc469
-rw-r--r--storage/innobase/include/api0api.h1282
-rw-r--r--storage/innobase/include/api0misc.h78
-rw-r--r--storage/innobase/include/btr0btr.h152
-rw-r--r--storage/innobase/include/btr0btr.ic19
-rw-r--r--storage/innobase/include/btr0cur.h142
-rw-r--r--storage/innobase/include/btr0cur.ic13
-rw-r--r--storage/innobase/include/btr0pcur.h32
-rw-r--r--storage/innobase/include/btr0pcur.ic39
-rw-r--r--storage/innobase/include/btr0sea.h12
-rw-r--r--storage/innobase/include/btr0sea.ic2
-rw-r--r--storage/innobase/include/btr0types.h8
-rw-r--r--storage/innobase/include/buf0buf.h88
-rw-r--r--storage/innobase/include/buf0buf.ic18
-rw-r--r--storage/innobase/include/buf0dblwr.h5
-rw-r--r--storage/innobase/include/buf0flu.h96
-rw-r--r--storage/innobase/include/buf0flu.ic7
-rw-r--r--storage/innobase/include/buf0lru.h29
-rw-r--r--storage/innobase/include/buf0types.h29
-rw-r--r--storage/innobase/include/data0data.h142
-rw-r--r--storage/innobase/include/data0data.ic28
-rw-r--r--storage/innobase/include/data0type.h10
-rw-r--r--storage/innobase/include/data0type.ic31
-rw-r--r--storage/innobase/include/data0types.h4
-rw-r--r--storage/innobase/include/db0err.h39
-rw-r--r--storage/innobase/include/dict0boot.h70
-rw-r--r--storage/innobase/include/dict0boot.ic23
-rw-r--r--storage/innobase/include/dict0crea.h69
-rw-r--r--storage/innobase/include/dict0dict.h752
-rw-r--r--storage/innobase/include/dict0dict.ic328
-rw-r--r--storage/innobase/include/dict0load.h105
-rw-r--r--storage/innobase/include/dict0mem.h297
-rw-r--r--storage/innobase/include/dict0stats.h146
-rw-r--r--storage/innobase/include/dict0stats.ic250
-rw-r--r--storage/innobase/include/dict0stats_bg.h116
-rw-r--r--storage/innobase/include/dict0types.h25
-rw-r--r--storage/innobase/include/dyn0dyn.h7
-rw-r--r--storage/innobase/include/dyn0dyn.ic4
-rw-r--r--storage/innobase/include/fil0fil.h374
-rw-r--r--storage/innobase/include/fsp0fsp.h86
-rw-r--r--storage/innobase/include/fsp0fsp.ic148
-rw-r--r--storage/innobase/include/fts0ast.h76
-rw-r--r--storage/innobase/include/fts0fts.h183
-rw-r--r--storage/innobase/include/fts0priv.h195
-rw-r--r--storage/innobase/include/fts0priv.ic49
-rw-r--r--storage/innobase/include/fts0types.h52
-rw-r--r--storage/innobase/include/fts0types.ic40
-rw-r--r--storage/innobase/include/ha0ha.h5
-rw-r--r--storage/innobase/include/ha0storage.h2
-rw-r--r--storage/innobase/include/ha0storage.ic6
-rw-r--r--storage/innobase/include/ha_prototypes.h216
-rw-r--r--storage/innobase/include/handler0alter.h86
-rw-r--r--storage/innobase/include/hash0hash.h16
-rw-r--r--storage/innobase/include/hash0hash.ic4
-rw-r--r--storage/innobase/include/ibuf0ibuf.h65
-rw-r--r--storage/innobase/include/ibuf0ibuf.ic31
-rw-r--r--storage/innobase/include/ibuf0types.h2
-rw-r--r--storage/innobase/include/lock0iter.h4
-rw-r--r--storage/innobase/include/lock0lock.h89
-rw-r--r--storage/innobase/include/lock0priv.h10
-rw-r--r--storage/innobase/include/lock0types.h4
-rw-r--r--storage/innobase/include/log0log.h43
-rw-r--r--storage/innobase/include/log0log.ic12
-rw-r--r--storage/innobase/include/log0recv.h41
-rw-r--r--storage/innobase/include/mach0data.h34
-rw-r--r--storage/innobase/include/mach0data.ic99
-rw-r--r--storage/innobase/include/mem0dbg.h2
-rw-r--r--storage/innobase/include/mem0mem.h12
-rw-r--r--storage/innobase/include/mem0mem.ic11
-rw-r--r--storage/innobase/include/mem0pool.h9
-rw-r--r--storage/innobase/include/mtr0log.h21
-rw-r--r--storage/innobase/include/mtr0mtr.h15
-rw-r--r--storage/innobase/include/mtr0mtr.ic4
-rw-r--r--storage/innobase/include/mtr0types.h2
-rw-r--r--storage/innobase/include/os0file.h101
-rw-r--r--storage/innobase/include/os0sync.h74
-rw-r--r--storage/innobase/include/os0sync.ic10
-rw-r--r--storage/innobase/include/page0cur.h32
-rw-r--r--storage/innobase/include/page0cur.ic29
-rw-r--r--storage/innobase/include/page0page.h24
-rw-r--r--storage/innobase/include/page0page.ic37
-rw-r--r--storage/innobase/include/page0types.h40
-rw-r--r--storage/innobase/include/page0zip.h64
-rw-r--r--storage/innobase/include/page0zip.ic73
-rw-r--r--storage/innobase/include/pars0pars.h61
-rw-r--r--storage/innobase/include/pars0sym.h8
-rw-r--r--storage/innobase/include/pars0types.h36
-rw-r--r--storage/innobase/include/que0que.h12
-rw-r--r--storage/innobase/include/que0types.h9
-rw-r--r--storage/innobase/include/read0read.h23
-rw-r--r--storage/innobase/include/read0read.ic93
-rw-r--r--storage/innobase/include/read0types.h4
-rw-r--r--storage/innobase/include/rem0cmp.h27
-rw-r--r--storage/innobase/include/rem0rec.h354
-rw-r--r--storage/innobase/include/rem0rec.ic29
-rw-r--r--storage/innobase/include/rem0types.h11
-rw-r--r--storage/innobase/include/row0ext.h2
-rw-r--r--storage/innobase/include/row0ftsort.h42
-rw-r--r--storage/innobase/include/row0import.h91
-rw-r--r--storage/innobase/include/row0import.ic25
-rw-r--r--storage/innobase/include/row0ins.h129
-rw-r--r--storage/innobase/include/row0log.h241
-rw-r--r--storage/innobase/include/row0log.ic84
-rw-r--r--storage/innobase/include/row0merge.h284
-rw-r--r--storage/innobase/include/row0mysql.h199
-rw-r--r--storage/innobase/include/row0purge.h19
-rw-r--r--storage/innobase/include/row0quiesce.h74
-rw-r--r--storage/innobase/include/row0quiesce.ic26
-rw-r--r--storage/innobase/include/row0row.h92
-rw-r--r--storage/innobase/include/row0row.ic27
-rw-r--r--storage/innobase/include/row0sel.h33
-rw-r--r--storage/innobase/include/row0types.h38
-rw-r--r--storage/innobase/include/row0uins.h8
-rw-r--r--storage/innobase/include/row0umod.h8
-rw-r--r--storage/innobase/include/row0undo.h2
-rw-r--r--storage/innobase/include/row0upd.h61
-rw-r--r--storage/innobase/include/row0upd.ic19
-rw-r--r--storage/innobase/include/row0vers.h18
-rw-r--r--storage/innobase/include/srv0mon.h142
-rw-r--r--storage/innobase/include/srv0srv.h263
-rw-r--r--storage/innobase/include/srv0start.h36
-rw-r--r--storage/innobase/include/sync0arr.h4
-rw-r--r--storage/innobase/include/sync0rw.h124
-rw-r--r--storage/innobase/include/sync0rw.ic118
-rw-r--r--storage/innobase/include/sync0sync.h75
-rw-r--r--storage/innobase/include/sync0sync.ic87
-rw-r--r--storage/innobase/include/sync0types.h5
-rw-r--r--storage/innobase/include/trx0i_s.h48
-rw-r--r--storage/innobase/include/trx0purge.h22
-rw-r--r--storage/innobase/include/trx0rec.h41
-rw-r--r--storage/innobase/include/trx0rec.ic2
-rw-r--r--storage/innobase/include/trx0roll.h44
-rw-r--r--storage/innobase/include/trx0rseg.h8
-rw-r--r--storage/innobase/include/trx0sys.h22
-rw-r--r--storage/innobase/include/trx0trx.h139
-rw-r--r--storage/innobase/include/trx0trx.ic13
-rw-r--r--storage/innobase/include/trx0types.h51
-rw-r--r--storage/innobase/include/trx0undo.h16
-rw-r--r--storage/innobase/include/trx0undo.ic15
-rw-r--r--storage/innobase/include/univ.i45
-rw-r--r--storage/innobase/include/usr0sess.h2
-rw-r--r--storage/innobase/include/usr0types.h2
-rw-r--r--storage/innobase/include/ut0bh.h4
-rw-r--r--storage/innobase/include/ut0counter.h203
-rw-r--r--storage/innobase/include/ut0crc32.h3
-rw-r--r--storage/innobase/include/ut0dbg.h4
-rw-r--r--storage/innobase/include/ut0list.h11
-rw-r--r--storage/innobase/include/ut0lst.h3
-rw-r--r--storage/innobase/include/ut0rbt.h20
-rw-r--r--storage/innobase/include/ut0ut.h38
-rw-r--r--storage/innobase/include/ut0vec.h12
-rw-r--r--storage/innobase/include/ut0vec.ic13
-rw-r--r--storage/innobase/include/ut0wqueue.h6
-rw-r--r--storage/innobase/lock/lock0lock.cc441
-rw-r--r--storage/innobase/lock/lock0wait.cc49
-rw-r--r--storage/innobase/log/log0log.cc270
-rw-r--r--storage/innobase/log/log0recv.cc470
-rw-r--r--storage/innobase/mem/mem0dbg.cc5
-rw-r--r--storage/innobase/mem/mem0pool.cc6
-rw-r--r--storage/innobase/mtr/mtr0log.cc73
-rw-r--r--storage/innobase/mtr/mtr0mtr.cc107
-rw-r--r--storage/innobase/os/os0file.cc1877
-rw-r--r--storage/innobase/os/os0sync.cc45
-rw-r--r--storage/innobase/os/os0thread.cc6
-rw-r--r--storage/innobase/page/page0cur.cc87
-rw-r--r--storage/innobase/page/page0page.cc96
-rw-r--r--storage/innobase/page/page0zip.cc117
-rw-r--r--storage/innobase/pars/lexyy.cc927
-rw-r--r--storage/innobase/pars/pars0lex.l2
-rw-r--r--storage/innobase/pars/pars0opt.cc4
-rw-r--r--storage/innobase/pars/pars0pars.cc86
-rw-r--r--storage/innobase/pars/pars0sym.cc2
-rw-r--r--storage/innobase/que/que0que.cc4
-rw-r--r--storage/innobase/read/read0read.cc76
-rw-r--r--storage/innobase/rem/rem0cmp.cc278
-rw-r--r--storage/innobase/rem/rem0rec.cc332
-rw-r--r--storage/innobase/row/row0ext.cc2
-rw-r--r--storage/innobase/row/row0ftsort.cc216
-rw-r--r--storage/innobase/row/row0import.cc3806
-rw-r--r--storage/innobase/row/row0ins.cc1229
-rw-r--r--storage/innobase/row/row0log.cc3219
-rw-r--r--storage/innobase/row/row0merge.cc2428
-rw-r--r--storage/innobase/row/row0mysql.cc1670
-rw-r--r--storage/innobase/row/row0purge.cc443
-rw-r--r--storage/innobase/row/row0quiesce.cc702
-rw-r--r--storage/innobase/row/row0row.cc199
-rw-r--r--storage/innobase/row/row0sel.cc348
-rw-r--r--storage/innobase/row/row0uins.cc194
-rw-r--r--storage/innobase/row/row0umod.cc510
-rw-r--r--storage/innobase/row/row0undo.cc23
-rw-r--r--storage/innobase/row/row0upd.cc507
-rw-r--r--storage/innobase/row/row0vers.cc88
-rw-r--r--storage/innobase/srv/srv0conc.cc29
-rw-r--r--storage/innobase/srv/srv0mon.cc221
-rw-r--r--storage/innobase/srv/srv0srv.cc588
-rw-r--r--storage/innobase/srv/srv0start.cc1765
-rw-r--r--storage/innobase/sync/sync0arr.cc28
-rw-r--r--storage/innobase/sync/sync0rw.cc186
-rw-r--r--storage/innobase/sync/sync0sync.cc157
-rw-r--r--storage/innobase/trx/trx0i_s.cc20
-rw-r--r--storage/innobase/trx/trx0purge.cc78
-rw-r--r--storage/innobase/trx/trx0rec.cc167
-rw-r--r--storage/innobase/trx/trx0roll.cc64
-rw-r--r--storage/innobase/trx/trx0sys.cc68
-rw-r--r--storage/innobase/trx/trx0trx.cc333
-rw-r--r--storage/innobase/trx/trx0undo.cc28
-rw-r--r--storage/innobase/ut/ut0crc32.cc10
-rw-r--r--storage/innobase/ut/ut0mem.cc7
-rw-r--r--storage/innobase/ut/ut0rbt.cc2
-rw-r--r--storage/innobase/ut/ut0ut.cc98
-rw-r--r--storage/innobase/ut/ut0vec.cc4
-rw-r--r--storage/innobase/ut/ut0wqueue.cc2
-rw-r--r--storage/maria/ha_maria.cc12
-rw-r--r--storage/maria/ma_bitmap.c10
-rw-r--r--storage/maria/ma_blockrec.c8
-rw-r--r--storage/maria/ma_cache.c2
-rw-r--r--storage/maria/ma_check.c13
-rw-r--r--storage/maria/ma_checkpoint.c8
-rw-r--r--storage/maria/ma_close.c33
-rw-r--r--storage/maria/ma_commit.c4
-rw-r--r--storage/maria/ma_create.c16
-rw-r--r--storage/maria/ma_delete.c6
-rw-r--r--storage/maria/ma_dynrec.c2
-rw-r--r--storage/maria/ma_extra.c2
-rw-r--r--storage/maria/ma_ft_boolean_search.c4
-rw-r--r--storage/maria/ma_info.c2
-rw-r--r--storage/maria/ma_key_recover.c12
-rw-r--r--storage/maria/ma_loghandler.c6
-rw-r--r--storage/maria/ma_open.c41
-rw-r--r--storage/maria/ma_packrec.c4
-rw-r--r--storage/maria/ma_recovery.c2
-rw-r--r--storage/maria/ma_rt_mbr.c24
-rw-r--r--storage/maria/ma_sort.c8
-rw-r--r--storage/maria/ma_test1.c2
-rw-r--r--storage/maria/ma_test2.c2
-rw-r--r--storage/maria/ma_write.c4
-rw-r--r--storage/maria/maria_def.h1
-rw-r--r--storage/maria/maria_pack.c4
-rw-r--r--storage/maria/trnman.c2
-rwxr-xr-xstorage/maria/unittest/ma_test_all-t4
-rw-r--r--storage/myisam/ha_myisam.cc59
-rw-r--r--storage/myisam/mi_cache.c2
-rw-r--r--storage/myisam/mi_check.c16
-rw-r--r--storage/myisam/mi_close.c9
-rw-r--r--storage/myisam/mi_create.c20
-rw-r--r--storage/myisam/mi_dynrec.c27
-rw-r--r--storage/myisam/mi_extra.c2
-rw-r--r--storage/myisam/mi_open.c51
-rw-r--r--storage/myisam/mi_packrec.c4
-rw-r--r--storage/myisam/mi_search.c15
-rw-r--r--storage/myisam/mi_test1.c2
-rw-r--r--storage/myisam/mi_test2.c2
-rw-r--r--storage/myisam/myisamchk.c1
-rw-r--r--storage/myisam/myisamlog.c2
-rw-r--r--storage/myisam/myisampack.c7
-rw-r--r--storage/myisam/rt_mbr.c24
-rw-r--r--storage/myisam/sort.c8
-rw-r--r--storage/myisammrg/ha_myisammrg.cc2
-rw-r--r--storage/perfschema/CMakeLists.txt9
-rw-r--r--storage/perfschema/cursor_by_thread_connect_attr.cc71
-rw-r--r--storage/perfschema/cursor_by_thread_connect_attr.h81
-rw-r--r--storage/perfschema/gen_pfs_lex_token.cc4
-rw-r--r--storage/perfschema/ha_perfschema.cc20
-rw-r--r--storage/perfschema/ha_perfschema.h3
-rw-r--r--storage/perfschema/pfs.cc595
-rw-r--r--storage/perfschema/pfs_account.cc5
-rw-r--r--storage/perfschema/pfs_account.h4
-rw-r--r--storage/perfschema/pfs_atomic.h79
-rw-r--r--storage/perfschema/pfs_autosize.cc366
-rw-r--r--storage/perfschema/pfs_digest.cc205
-rw-r--r--storage/perfschema/pfs_digest.h54
-rw-r--r--storage/perfschema/pfs_engine_table.cc281
-rw-r--r--storage/perfschema/pfs_engine_table.h2
-rw-r--r--storage/perfschema/pfs_events.h2
-rw-r--r--storage/perfschema/pfs_events_waits.cc10
-rw-r--r--storage/perfschema/pfs_global.cc72
-rw-r--r--storage/perfschema/pfs_global.h15
-rw-r--r--storage/perfschema/pfs_host.cc5
-rw-r--r--storage/perfschema/pfs_host.h4
-rw-r--r--storage/perfschema/pfs_instr.cc328
-rw-r--r--storage/perfschema/pfs_instr.h98
-rw-r--r--storage/perfschema/pfs_instr_class.cc69
-rw-r--r--storage/perfschema/pfs_instr_class.h64
-rw-r--r--storage/perfschema/pfs_lock.h107
-rw-r--r--storage/perfschema/pfs_server.cc9
-rw-r--r--storage/perfschema/pfs_server.h113
-rw-r--r--storage/perfschema/pfs_setup_actor.cc10
-rw-r--r--storage/perfschema/pfs_setup_actor.h4
-rw-r--r--storage/perfschema/pfs_setup_object.cc10
-rw-r--r--storage/perfschema/pfs_setup_object.h4
-rw-r--r--storage/perfschema/pfs_stat.h167
-rw-r--r--storage/perfschema/pfs_timer.cc71
-rw-r--r--storage/perfschema/pfs_user.cc5
-rw-r--r--storage/perfschema/pfs_user.h4
-rw-r--r--storage/perfschema/pfs_visitor.cc89
-rw-r--r--storage/perfschema/table_esgs_by_thread_by_event_name.cc4
-rw-r--r--storage/perfschema/table_esgs_by_thread_by_event_name.h2
-rw-r--r--storage/perfschema/table_esgs_global_by_event_name.cc3
-rw-r--r--storage/perfschema/table_esms_by_digest.cc24
-rw-r--r--storage/perfschema/table_esms_by_thread_by_event_name.cc4
-rw-r--r--storage/perfschema/table_esms_by_thread_by_event_name.h2
-rw-r--r--storage/perfschema/table_esms_global_by_event_name.cc3
-rw-r--r--storage/perfschema/table_events_stages.cc4
-rw-r--r--storage/perfschema/table_events_stages.h2
-rw-r--r--storage/perfschema/table_events_statements.cc6
-rw-r--r--storage/perfschema/table_events_statements.h2
-rw-r--r--storage/perfschema/table_events_waits.cc7
-rw-r--r--storage/perfschema/table_events_waits.h2
-rw-r--r--storage/perfschema/table_events_waits_summary.cc10
-rw-r--r--storage/perfschema/table_ews_by_thread_by_event_name.cc4
-rw-r--r--storage/perfschema/table_ews_by_thread_by_event_name.h2
-rw-r--r--storage/perfschema/table_ews_global_by_event_name.cc8
-rw-r--r--storage/perfschema/table_helper.cc27
-rw-r--r--storage/perfschema/table_helper.h4
-rw-r--r--storage/perfschema/table_host_cache.cc3
-rw-r--r--storage/perfschema/table_os_global_by_type.cc9
-rw-r--r--storage/perfschema/table_session_account_connect_attrs.cc70
-rw-r--r--storage/perfschema/table_session_account_connect_attrs.h50
-rw-r--r--storage/perfschema/table_session_connect.cc268
-rw-r--r--storage/perfschema/table_session_connect.h77
-rw-r--r--storage/perfschema/table_session_connect_attrs.cc43
-rw-r--r--storage/perfschema/table_session_connect_attrs.h47
-rw-r--r--storage/perfschema/table_setup_actors.cc41
-rw-r--r--storage/perfschema/table_setup_objects.cc39
-rw-r--r--storage/perfschema/table_socket_instances.cc4
-rw-r--r--storage/perfschema/table_socket_instances.h2
-rw-r--r--storage/perfschema/table_sync_instances.cc8
-rw-r--r--storage/perfschema/table_sync_instances.h4
-rw-r--r--storage/perfschema/table_threads.cc41
-rw-r--r--storage/perfschema/table_threads.h6
-rw-r--r--storage/perfschema/table_tiws_by_index_usage.cc12
-rw-r--r--storage/perfschema/unittest/CMakeLists.txt37
-rw-r--r--storage/perfschema/unittest/pfs-t.cc59
-rw-r--r--storage/perfschema/unittest/pfs_account-oom-t.cc1
-rw-r--r--storage/perfschema/unittest/pfs_connect_attr-t.cc345
-rw-r--r--storage/perfschema/unittest/pfs_host-oom-t.cc1
-rw-r--r--storage/perfschema/unittest/pfs_instr-oom-t.cc19
-rw-r--r--storage/perfschema/unittest/pfs_instr-t.cc42
-rw-r--r--storage/perfschema/unittest/pfs_instr_class-t.cc2
-rw-r--r--strings/ctype-big5.c4
-rw-r--r--strings/ctype-bin.c10
-rw-r--r--strings/ctype-gbk.c4
-rw-r--r--strings/ctype-mb.c6
-rw-r--r--strings/ctype-simple.c8
-rw-r--r--strings/ctype-tis620.c4
-rw-r--r--strings/ctype-uca.c2
-rw-r--r--strings/ctype-ucs2.c10
-rw-r--r--strings/ctype-utf8.c4
-rw-r--r--strings/ctype.c141
-rw-r--r--strings/decimal.c48
-rw-r--r--strings/dtoa.c6
-rw-r--r--strings/my_vsnprintf.c10
-rw-r--r--strings/str2int.c2
649 files changed, 87932 insertions, 22445 deletions
diff --git a/.bzrignore b/.bzrignore
index d190c8be0f7..6ff0b3d3505 100644
--- a/.bzrignore
+++ b/.bzrignore
@@ -1067,6 +1067,7 @@ vio/viotest.cpp
ylwrap
zlib/*.ds?
sql-bench/test-table-elimination
+sql/share/bulgarian
sql/share/czech
sql/share/danish
sql/share/dutch
diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh
index a4853c823ab..a51d13f75fb 100755
--- a/BUILD/SETUP.sh
+++ b/BUILD/SETUP.sh
@@ -164,8 +164,7 @@ valgrind_flags="$valgrind_flags -DMYSQL_SERVER_SUFFIX=-valgrind-max"
valgrind_configs="--with-valgrind"
#
# Used in -debug builds
-debug_cflags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG"
-debug_cflags="$debug_cflags -DSAFE_MUTEX -DSAFEMALLOC"
+debug_cflags="-DEXTRA_DEBUG -DSAFE_MUTEX -DSAFEMALLOC"
error_inject="--with-error-inject "
#
# Base C++ flags for all builds
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1b29c559415..201406084f4 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -143,7 +143,9 @@ INCLUDE(install_layout)
# Add macros
INCLUDE(character_sets)
+INCLUDE(cpu_info)
INCLUDE(zlib)
+INCLUDE(libevent)
INCLUDE(ssl)
INCLUDE(readline)
INCLUDE(libutils)
@@ -208,7 +210,7 @@ ENDFOREACH()
# Add safemutex for debug configurations, except on Windows
# (safemutex has never worked on Windows)
-IF(NOT WIN32)
+IF(WITH_DEBUG AND NOT WIN32 AND NOT WITH_INNODB_MEMCACHED)
FOREACH(LANG C CXX)
SET(CMAKE_${LANG}_FLAGS_DEBUG "${CMAKE_${LANG}_FLAGS_DEBUG} -DSAFE_MUTEX")
ENDFOREACH()
@@ -260,10 +262,12 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}/include)
# Add bundled or system zlib.
MYSQL_CHECK_ZLIB_WITH_COMPRESS()
-# Optionally add bundled yassl/taocrypt or system openssl.
+# Add bundled yassl/taocrypt or system openssl.
MYSQL_CHECK_SSL()
# Add readline or libedit.
MYSQL_CHECK_READLINE()
+# Add libevent
+MYSQL_CHECK_LIBEVENT()
#
# Setup maintainer mode options. Platform checks are
@@ -301,6 +305,7 @@ ADD_SUBDIRECTORY(strings)
ADD_SUBDIRECTORY(vio)
ADD_SUBDIRECTORY(regex)
ADD_SUBDIRECTORY(mysys)
+ADD_SUBDIRECTORY(mysys_ssl)
ADD_SUBDIRECTORY(libmysql)
ADD_SUBDIRECTORY(client)
ADD_SUBDIRECTORY(extra)
diff --git a/client/CMakeLists.txt b/client/CMakeLists.txt
index e4507f9c8ba..9fed5b4ea19 100644
--- a/client/CMakeLists.txt
+++ b/client/CMakeLists.txt
@@ -15,6 +15,7 @@
INCLUDE_DIRECTORIES(
${CMAKE_SOURCE_DIR}/include
+ ${CMAKE_SOURCE_DIR}/mysys_ssl
${ZLIB_INCLUDE_DIR}
${SSL_INCLUDE_DIRS}
${CMAKE_SOURCE_DIR}/libmysql
@@ -25,6 +26,9 @@ INCLUDE_DIRECTORIES(
${CMAKE_CURRENT_BINARY_DIR}
)
+## We will need libeay32.dll and ssleay32.dll when running client executables.
+COPY_OPENSSL_DLLS(copy_openssl_client)
+
ADD_DEFINITIONS(${SSL_DEFINES})
MYSQL_ADD_EXECUTABLE(mysql completion_hash.cc mysql.cc readline.cc
${CMAKE_SOURCE_DIR}/sql/sql_string.cc)
@@ -78,7 +82,7 @@ ENDIF(WIN32)
ADD_EXECUTABLE(async_example async_example.c)
TARGET_LINK_LIBRARIES(async_example mysqlclient)
-SET_TARGET_PROPERTIES (mysqlcheck mysqldump mysqlimport mysql_upgrade mysqlshow mysqlslap mysql_plugin
+SET_TARGET_PROPERTIES (mysqlcheck mysqldump mysqlimport mysql_upgrade mysqlshow mysqlslap mysql_plugin async_example
PROPERTIES HAS_CXX TRUE)
ADD_DEFINITIONS(-DHAVE_DLOPEN)
diff --git a/client/mysql.cc b/client/mysql.cc
index 331302757a6..bb57c43674e 100644
--- a/client/mysql.cc
+++ b/client/mysql.cc
@@ -3339,9 +3339,9 @@ print_table_data(MYSQL_RES *result)
{
uint length= column_names ? field->name_length : 0;
if (quick)
- length=max(length,field->length);
+ length= MY_MAX(length,field->length);
else
- length=max(length,field->max_length);
+ length= MY_MAX(length,field->max_length);
if (length < 4 && !IS_NOT_NULL(field->flags))
length=4; // Room for "NULL"
field->max_length=length;
@@ -3361,8 +3361,8 @@ print_table_data(MYSQL_RES *result)
field->name,
field->name + name_length);
uint display_length= field->max_length + name_length - numcells;
- tee_fprintf(PAGER, " %-*s |",(int) min(display_length,
- MAX_COLUMN_LENGTH),
+ tee_fprintf(PAGER, " %-*s |",(int) MY_MIN(display_length,
+ MAX_COLUMN_LENGTH),
field->name);
num_flag[off]= IS_NUM(field->type);
}
@@ -3451,9 +3451,9 @@ static int get_field_disp_length(MYSQL_FIELD *field)
uint length= column_names ? field->name_length : 0;
if (quick)
- length= max(length, field->length);
+ length= MY_MAX(length, field->length);
else
- length= max(length, field->max_length);
+ length= MY_MAX(length, field->max_length);
if (length < 4 && !IS_NOT_NULL(field->flags))
length= 4; /* Room for "NULL" */
@@ -3469,6 +3469,7 @@ static int get_field_disp_length(MYSQL_FIELD *field)
@returns The max number of characters in any row of this result
*/
+
static int get_result_width(MYSQL_RES *result)
{
unsigned int len= 0;
diff --git a/client/mysql_upgrade.c b/client/mysql_upgrade.c
index feaf23b15ba..9a3c56507e7 100644
--- a/client/mysql_upgrade.c
+++ b/client/mysql_upgrade.c
@@ -576,7 +576,7 @@ static int extract_variable_from_show(DYNAMIC_STRING* ds, char* value)
if ((value_end= strchr(value_start, '\n')) == NULL)
return 1; /* Unexpected result */
- strncpy(value, value_start, min(FN_REFLEN, value_end-value_start));
+ strncpy(value, value_start, MY_MIN(FN_REFLEN, value_end-value_start));
return 0;
}
diff --git a/client/mysqladmin.cc b/client/mysqladmin.cc
index daa1115514e..a45c3b85596 100644
--- a/client/mysqladmin.cc
+++ b/client/mysqladmin.cc
@@ -23,7 +23,8 @@
#include <sys/stat.h>
#include <mysql.h>
#include <sql_common.h>
-#include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
+#include <welcome_copyright_notice.h>
+#include <my_rnd.h>
#define ADMIN_VERSION "9.1"
#define MAX_MYSQL_VAR 512
diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc
index 87198347b10..32c02d03f37 100644
--- a/client/mysqlbinlog.cc
+++ b/client/mysqlbinlog.cc
@@ -2310,7 +2310,7 @@ static Exit_status dump_local_log_entries(PRINT_EVENT_INFO *print_event_info,
my_off_t length,tmp;
for (length= start_position_mot ; length > 0 ; length-=tmp)
{
- tmp=min(length,sizeof(buff));
+ tmp= MY_MIN(length,sizeof(buff));
if (my_b_read(file, buff, (uint) tmp))
{
error("Failed reading from file.");
diff --git a/client/mysqlcheck.c b/client/mysqlcheck.c
index 60778e12f06..f429837c1dd 100644
--- a/client/mysqlcheck.c
+++ b/client/mysqlcheck.c
@@ -787,8 +787,8 @@ static int handle_request_for_tables(char *tables, uint length)
org= ptr= strmov(strmov(query, op), " TABLE ");
ptr= fix_table_name(ptr, tables);
- strmake(table_name_buff, org, min((int) sizeof(table_name_buff)-1,
- (int) (ptr - org)));
+ strmake(table_name_buff, org, MY_MIN((int) sizeof(table_name_buff)-1,
+ (int) (ptr - org)));
table_name= table_name_buff;
ptr= strxmov(ptr, " ", options, NullS);
query_length= (uint) (ptr - query);
diff --git a/client/mysqltest.cc b/client/mysqltest.cc
index e2a939dbd73..9e4b9da54af 100644
--- a/client/mysqltest.cc
+++ b/client/mysqltest.cc
@@ -6489,9 +6489,9 @@ int read_line(char *buf, int size)
}
else if ((c == '{' &&
(!my_strnncoll_simple(charset_info, (const uchar*) "while", 5,
- (uchar*) buf, min(5, p - buf), 0) ||
+ (uchar*) buf, MY_MIN(5, p - buf), 0) ||
!my_strnncoll_simple(charset_info, (const uchar*) "if", 2,
- (uchar*) buf, min(2, p - buf), 0))))
+ (uchar*) buf, MY_MIN(2, p - buf), 0))))
{
/* Only if and while commands can be terminated by { */
*p++= c;
diff --git a/cmake/configure.pl b/cmake/configure.pl
index 51e83c2815c..d8e09a1bad9 100644
--- a/cmake/configure.pl
+++ b/cmake/configure.pl
@@ -150,6 +150,16 @@ foreach my $option (@ARGV)
$cmakeargs = $cmakeargs." -DWITH_ZLIB=system";
next;
}
+ if($option =~ /with-libevent=/)
+ {
+ $cmakeargs = $cmakeargs." -DWITH_LIBEVENT=system";
+ next;
+ }
+ if($option =~ /with-libevent/)
+ {
+ $cmakeargs = $cmakeargs." -DWITH_LIBEVENT=bundled";
+ next;
+ }
if($option =~ /with-ssl=/)
{
$cmakeargs = $cmakeargs." -DWITH_SSL=yes";
@@ -237,6 +247,16 @@ foreach my $option (@ARGV)
print("configure.pl : ignoring $option\n");
next;
}
+ if ($option =~ /with-client-ldflags/)
+ {
+ print("configure.pl : ignoring $option\n");
+ next;
+ }
+ if ($option =~ /with-mysqld-ldflags=/)
+ {
+ print("configure.pl : ignoring $option\n");
+ next;
+ }
$option = uc($option);
$option =~ s/-/_/g;
diff --git a/cmake/cpu_info.cmake b/cmake/cpu_info.cmake
new file mode 100644
index 00000000000..32b98142ace
--- /dev/null
+++ b/cmake/cpu_info.cmake
@@ -0,0 +1,30 @@
+# Copyright (c) 2009, 2011, 2012 Oracle and/or its affiliates. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+# Symbols with information about the CPU.
+
+FIND_PROGRAM(GETCONF getconf)
+MARK_AS_ADVANCED(GETCONF)
+
+IF(GETCONF)
+ EXECUTE_PROCESS(
+ COMMAND ${GETCONF} LEVEL1_DCACHE_LINESIZE
+ OUTPUT_VARIABLE CPU_LEVEL1_DCACHE_LINESIZE
+ )
+ENDIF()
+IF(CPU_LEVEL1_DCACHE_LINESIZE AND CPU_LEVEL1_DCACHE_LINESIZE GREATER 0)
+ELSE()
+ SET(CPU_LEVEL1_DCACHE_LINESIZE 64)
+ENDIF()
diff --git a/cmake/libevent.cmake b/cmake/libevent.cmake
new file mode 100644
index 00000000000..54498e1bb15
--- /dev/null
+++ b/cmake/libevent.cmake
@@ -0,0 +1,89 @@
+# Copyright (C) 2011 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+MACRO (MYSQL_USE_BUNDLED_LIBEVENT)
+ SET(LIBEVENT_LIBRARY event)
+ SET(LIBEVENT_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/libevent)
+ SET(LIBEVENT_FOUND TRUE)
+ SET(WITH_LIBEVENT "bundled" CACHE STRING "Use bundled libevent")
+ ADD_SUBDIRECTORY(libevent)
+ GET_TARGET_PROPERTY(src libevent SOURCES)
+ FOREACH(file ${src})
+ SET(LIBEVENT_SOURCES ${LIBEVENT_SOURCES} ${CMAKE_SOURCE_DIR}/libevent/${file})
+ ENDFOREACH()
+ENDMACRO()
+
+# MYSQL_CHECK_LIBEVENT
+#
+# Provides the following configure options:
+# WITH_LIBEVENT_BUNDLED
+# If this is set,we use bindled libevent
+# If this is not set,search for system libevent.
+# if system libevent is not found, use bundled copy
+# LIBEVENT_LIBRARIES, LIBEVENT_INCLUDE_DIR and LIBEVENT_SOURCES
+# are set after this macro has run
+
+MACRO (MYSQL_CHECK_LIBEVENT)
+
+ IF (NOT WITH_LIBEVENT)
+ SET(WITH_LIBEVENT "bundled" CACHE STRING "By default use bundled libevent on this platform")
+ ENDIF()
+
+ IF(WITH_LIBEVENT STREQUAL "bundled")
+ MYSQL_USE_BUNDLED_LIBEVENT()
+ ELSEIF(WITH_LIBEVENT STREQUAL "system" OR WITH_LIBEVENT STREQUAL "yes")
+ SET(LIBEVENT_FIND_QUIETLY TRUE)
+
+ IF (NOT LIBEVENT_INCLUDE_PATH)
+ set(LIBEVENT_INCLUDE_PATH /usr/local/include /opt/local/include)
+ ENDIF()
+
+ find_path(LIBEVENT_INCLUDE_DIR event.h PATHS ${LIBEVENT_INCLUDE_PATH})
+
+ if (NOT LIBEVENT_INCLUDE_DIR)
+ MESSAGE(SEND_ERROR "Cannot find appropriate event.h in /usr/local/include or /opt/local/include. Use bundled libevent")
+ endif()
+
+ IF (NOT LIBEVENT_LIB_PATHS)
+ set(LIBEVENT_LIB_PATHS /usr/local/lib /opt/local/lib)
+ ENDIF()
+
+ find_library(LIBEVENT_LIB event PATHS ${LIBEVENT_LIB_PATHS})
+
+ if (NOT LIBEVENT_LIB)
+ MESSAGE(SEND_ERROR "Cannot find appropriate event lib in /usr/local/lib or /opt/local/lib. Use bundled libevent")
+ endif()
+
+ IF (LIBEVENT_LIB AND LIBEVENT_INCLUDE_DIR)
+ set(LIBEVENT_FOUND TRUE)
+ set(LIBEVENT_LIBS ${LIBEVENT_LIB})
+ ELSE()
+ set(LIBEVENT_FOUND FALSE)
+ ENDIF()
+
+ IF(LIBEVENT_FOUND)
+ SET(LIBEVENT_SOURCES "")
+ SET(LIBEVENT_LIBRARIES ${LIBEVENT_LIBS})
+ SET(LIBEVENT_INCLUDE_DIRS ${LIBEVENT_INCLUDE_DIR})
+ SET(LIBEVENT_DEFINES "-DHAVE_LIBEVENT")
+ ELSE()
+ IF(WITH_LIBEVENT STREQUAL "system")
+ MESSAGE(SEND_ERROR "Cannot find appropriate system libraries for libevent. Use bundled libevent")
+ ENDIF()
+ MYSQL_USE_BUNDLED_LIBEVENT()
+ ENDIF()
+
+ ENDIF()
+ENDMACRO()
diff --git a/cmake/libutils.cmake b/cmake/libutils.cmake
index 7c13df05ca4..e161b67d25f 100644
--- a/cmake/libutils.cmake
+++ b/cmake/libutils.cmake
@@ -304,12 +304,15 @@ FUNCTION(GET_DEPENDEND_OS_LIBS target result)
SET(${result} ${ret} PARENT_SCOPE)
ENDFUNCTION()
-MACRO(RESTRICT_SYMBOL_EXPORTS target)
+# We try to hide the symbols in yassl/zlib to avoid name clashes with
+# other libraries like openssl.
+FUNCTION(RESTRICT_SYMBOL_EXPORTS target)
SET(VISIBILITY_HIDDEN_FLAG)
IF(CMAKE_COMPILER_IS_GNUCXX AND UNIX)
CHECK_C_COMPILER_FLAG("-fvisibility=hidden" HAVE_VISIBILITY_HIDDEN)
IF(HAVE_VISIBILITY_HIDDEN)
+ MESSAGE(STATUS "HAVE_VISIBILITY_HIDDEN")
SET(VISIBILITY_HIDDEN_FLAG "-fvisibility=hidden")
ENDIF()
ENDIF()
@@ -327,5 +330,4 @@ MACRO(RESTRICT_SYMBOL_EXPORTS target)
SET_TARGET_PROPERTIES(${target} PROPERTIES
COMPILE_FLAGS "${COMPILE_FLAGS} ${VISIBILITY_HIDDEN_FLAG}")
ENDIF()
-
-ENDMACRO()
+ENDFUNCTION()
diff --git a/cmake/ssl.cmake b/cmake/ssl.cmake
index cabff530b47..a74ebc219e9 100644
--- a/cmake/ssl.cmake
+++ b/cmake/ssl.cmake
@@ -1,4 +1,4 @@
-# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -13,80 +13,222 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+# We support different versions of SSL:
+# - "bundled" uses source code in <source dir>/extra/yassl
+# - "system" (typically) uses headers/libraries in /usr/lib and /usr/lib64
+# - a custom installation of openssl can be used like this
+# - cmake -DCMAKE_PREFIX_PATH=</path/to/custom/openssl> -DWITH_SSL="system"
+# or
+# - cmake -DWITH_SSL=</path/to/custom/openssl>
+#
+# The default value for WITH_SSL is "bundled"
+# set in cmake/build_configurations/feature_set.cmake
+#
+# For custom build/install of openssl, see the accompanying README and
+# INSTALL* files. When building with gcc, you must build the shared libraries
+# (in addition to the static ones):
+# ./config --prefix=</path/to/custom/openssl> --shared; make; make install
+# On some platforms (mac) you need to choose 32/64 bit architecture.
+# Build/Install of openssl on windows is slightly different: you need to run
+# perl and nmake. You might also need to
+# 'set path=</path/to/custom/openssl>\bin;%PATH%
+# in order to find the .dll files at runtime.
+
+SET(WITH_SSL_DOC "bundled (use yassl)")
+SET(WITH_SSL_DOC
+ "${WITH_SSL_DOC}, yes (prefer os library if present, otherwise use bundled)")
+SET(WITH_SSL_DOC
+ "${WITH_SSL_DOC}, system (use os library)")
+SET(WITH_SSL_DOC
+ "${WITH_SSL_DOC}, </path/to/custom/installation>")
+
MACRO (CHANGE_SSL_SETTINGS string)
- SET(WITH_SSL ${string} CACHE STRING "Options are: no bundled yes(prefer os library if present otherwise use bundled) system(use os library)" FORCE)
+ SET(WITH_SSL ${string} CACHE STRING ${WITH_SSL_DOC} FORCE)
ENDMACRO()
MACRO (MYSQL_USE_BUNDLED_SSL)
SET(INC_DIRS
- ${CMAKE_SOURCE_DIR}/extra/yassl/include
- ${CMAKE_SOURCE_DIR}/extra/yassl/taocrypt/include
+ ${CMAKE_SOURCE_DIR}/extra/yassl/include
+ ${CMAKE_SOURCE_DIR}/extra/yassl/taocrypt/include
)
SET(SSL_LIBRARIES yassl taocrypt)
SET(SSL_INCLUDE_DIRS ${INC_DIRS})
SET(SSL_INTERNAL_INCLUDE_DIRS ${CMAKE_SOURCE_DIR}/extra/yassl/taocrypt/mySTL)
- SET(SSL_DEFINES "-DHAVE_YASSL -DYASSL_PURE_C -DYASSL_PREFIX -DHAVE_OPENSSL -DMULTI_THREADED")
+ SET(SSL_DEFINES "-DHAVE_YASSL -DYASSL_PREFIX -DHAVE_OPENSSL -DMULTI_THREADED")
CHANGE_SSL_SETTINGS("bundled")
- #Remove -fno-implicit-templates
- #(yassl sources cannot be compiled with it)
- SET(SAVE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
- IF(CMAKE_CXX_FLAGS)
- STRING(REPLACE "-fno-implicit-templates" "" CMAKE_CXX_FLAGS
- ${CMAKE_CXX_FLAGS})
- ENDIF()
ADD_SUBDIRECTORY(extra/yassl)
ADD_SUBDIRECTORY(extra/yassl/taocrypt)
- SET(CMAKE_CXX_FLAGS ${SAVE_CXX_FLAGS})
GET_TARGET_PROPERTY(src yassl SOURCES)
FOREACH(file ${src})
SET(SSL_SOURCES ${SSL_SOURCES} ${CMAKE_SOURCE_DIR}/extra/yassl/${file})
ENDFOREACH()
GET_TARGET_PROPERTY(src taocrypt SOURCES)
FOREACH(file ${src})
- SET(SSL_SOURCES ${SSL_SOURCES} ${CMAKE_SOURCE_DIR}/extra/yassl/taocrypt/${file})
+ SET(SSL_SOURCES ${SSL_SOURCES}
+ ${CMAKE_SOURCE_DIR}/extra/yassl/taocrypt/${file})
ENDFOREACH()
ENDMACRO()
# MYSQL_CHECK_SSL
#
# Provides the following configure options:
-# WITH_SSL=[yes|no|bundled]
+# WITH_SSL=[yes|bundled|system|<path/to/custom/installation>]
MACRO (MYSQL_CHECK_SSL)
IF(NOT WITH_SSL)
IF(WIN32)
CHANGE_SSL_SETTINGS("bundled")
- ELSE()
- CHANGE_SSL_SETTINGS("no")
ENDIF()
ENDIF()
+ # See if WITH_SSL is of the form </path/to/custom/installation>
+ FILE(GLOB WITH_SSL_HEADER ${WITH_SSL}/include/openssl/ssl.h)
+ IF (WITH_SSL_HEADER)
+ SET(WITH_SSL_PATH ${WITH_SSL} CACHE PATH "path to custom SSL installation")
+ ENDIF()
+
IF(WITH_SSL STREQUAL "bundled")
MYSQL_USE_BUNDLED_SSL()
- ELSEIF(WITH_SSL STREQUAL "system" OR WITH_SSL STREQUAL "yes")
- # Check for system library
- SET(OPENSSL_FIND_QUIETLY TRUE)
- INCLUDE(FindOpenSSL)
- FIND_LIBRARY(CRYPTO_LIBRARY crypto)
- MARK_AS_ADVANCED(CRYPTO_LIBRARY)
+ # Reset some variables, in case we switch from /path/to/ssl to "bundled".
+ IF (WITH_SSL_PATH)
+ UNSET(WITH_SSL_PATH)
+ UNSET(WITH_SSL_PATH CACHE)
+ ENDIF()
+ IF (OPENSSL_ROOT_DIR)
+ UNSET(OPENSSL_ROOT_DIR)
+ UNSET(OPENSSL_ROOT_DIR CACHE)
+ ENDIF()
+ IF (OPENSSL_INCLUDE_DIR)
+ UNSET(OPENSSL_INCLUDE_DIR)
+ UNSET(OPENSSL_INCLUDE_DIR CACHE)
+ ENDIF()
+ IF (WIN32 AND OPENSSL_APPLINK_C)
+ UNSET(OPENSSL_APPLINK_C)
+ UNSET(OPENSSL_APPLINK_C CACHE)
+ ENDIF()
+ IF (OPENSSL_LIBRARIES)
+ UNSET(OPENSSL_LIBRARIES)
+ UNSET(OPENSSL_LIBRARIES CACHE)
+ ENDIF()
+ ELSEIF(WITH_SSL STREQUAL "system" OR
+ WITH_SSL STREQUAL "yes" OR
+ WITH_SSL_PATH
+ )
+ # First search in WITH_SSL_PATH.
+ FIND_PATH(OPENSSL_ROOT_DIR
+ NAMES include/openssl/ssl.h
+ NO_CMAKE_PATH
+ NO_CMAKE_ENVIRONMENT_PATH
+ HINTS ${WITH_SSL_PATH}
+ )
+ # Then search in standard places (if not found above).
+ FIND_PATH(OPENSSL_ROOT_DIR
+ NAMES include/openssl/ssl.h
+ )
+
+ FIND_PATH(OPENSSL_INCLUDE_DIR
+ NAMES openssl/ssl.h
+ HINTS ${OPENSSL_ROOT_DIR}/include
+ )
+
+ IF (WIN32)
+ FIND_FILE(OPENSSL_APPLINK_C
+ NAMES openssl/applink.c
+ HINTS ${OPENSSL_ROOT_DIR}/include
+ )
+ MESSAGE(STATUS "OPENSSL_APPLINK_C ${OPENSSL_APPLINK_C}")
+ ENDIF()
+
+ # On mac this list is <.dylib;.so;.a>
+ # We prefer static libraries, so we revert it here.
+ LIST(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
+ MESSAGE(STATUS "suffixes <${CMAKE_FIND_LIBRARY_SUFFIXES}>")
+ FIND_LIBRARY(OPENSSL_LIBRARIES
+ NAMES ssl ssleay32 ssleay32MD
+ HINTS ${OPENSSL_ROOT_DIR}/lib)
+ FIND_LIBRARY(CRYPTO_LIBRARY
+ NAMES crypto libeay32
+ HINTS ${OPENSSL_ROOT_DIR}/lib)
+ LIST(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES)
+
+ # Verify version number. Version information looks like:
+ # #define OPENSSL_VERSION_NUMBER 0x1000103fL
+ # Encoded as MNNFFPPS: major minor fix patch status
+ FILE(STRINGS "${OPENSSL_INCLUDE_DIR}/openssl/opensslv.h"
+ OPENSSL_VERSION_NUMBER
+ REGEX "^#define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x[0-9].*"
+ )
+ STRING(REGEX REPLACE
+ "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9]).*$" "\\1"
+ OPENSSL_MAJOR_VERSION "${OPENSSL_VERSION_NUMBER}"
+ )
+
+ IF(OPENSSL_INCLUDE_DIR AND
+ OPENSSL_LIBRARIES AND
+ CRYPTO_LIBRARY AND
+ OPENSSL_MAJOR_VERSION STREQUAL "1"
+ )
+ SET(OPENSSL_FOUND TRUE)
+ ELSE()
+ SET(OPENSSL_FOUND FALSE)
+ ENDIF()
+
+ MESSAGE(STATUS "OPENSSL_INCLUDE_DIR = ${OPENSSL_INCLUDE_DIR}")
+ MESSAGE(STATUS "OPENSSL_LIBRARIES = ${OPENSSL_LIBRARIES}")
+ MESSAGE(STATUS "CRYPTO_LIBRARY = ${CRYPTO_LIBRARY}")
+ MESSAGE(STATUS "OPENSSL_MAJOR_VERSION = ${OPENSSL_MAJOR_VERSION}")
+
INCLUDE(CheckSymbolExists)
SET(CMAKE_REQUIRED_INCLUDES ${OPENSSL_INCLUDE_DIR})
CHECK_SYMBOL_EXISTS(SHA512_DIGEST_LENGTH "openssl/sha.h"
HAVE_SHA512_DIGEST_LENGTH)
- SET(CMAKE_REQUIRED_INCLUDES)
- IF(OPENSSL_FOUND AND CRYPTO_LIBRARY AND HAVE_SHA512_DIGEST_LENGTH)
+ IF(OPENSSL_FOUND AND HAVE_SHA512_DIGEST_LENGTH)
SET(SSL_SOURCES "")
SET(SSL_LIBRARIES ${OPENSSL_LIBRARIES} ${CRYPTO_LIBRARY})
+ IF(CMAKE_SYSTEM_NAME MATCHES "SunOS")
+ SET(SSL_LIBRARIES ${SSL_LIBRARIES} ${LIBSOCKET})
+ ENDIF()
+ IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
+ SET(SSL_LIBRARIES ${SSL_LIBRARIES} ${LIBDL})
+ ENDIF()
+ MESSAGE(STATUS "SSL_LIBRARIES = ${SSL_LIBRARIES}")
SET(SSL_INCLUDE_DIRS ${OPENSSL_INCLUDE_DIR})
SET(SSL_INTERNAL_INCLUDE_DIRS "")
SET(SSL_DEFINES "-DHAVE_OPENSSL")
- CHANGE_SSL_SETTINGS("system")
ELSE()
IF(WITH_SSL STREQUAL "system")
MESSAGE(SEND_ERROR "Cannot find appropriate system libraries for SSL. Use WITH_SSL=bundled to enable SSL support")
ENDIF()
MYSQL_USE_BUNDLED_SSL()
ENDIF()
- ELSEIF(NOT WITH_SSL STREQUAL "no")
- MESSAGE(SEND_ERROR "Wrong option for WITH_SSL. Valid values are : yes, no, bundled")
+ ELSE()
+ MESSAGE(SEND_ERROR
+ "Wrong option for WITH_SSL. Valid values are : "${WITH_SSL_DOC})
+ ENDIF()
+ENDMACRO()
+
+
+# Many executables will depend on libeay32.dll and ssleay32.dll at runtime.
+# In order to ensure we find the right version(s), we copy them into
+# the same directory as the executables.
+# NOTE: Using dlls will likely crash in malloc/free,
+# see INSTALL.W32 which comes with the openssl sources.
+# So we should be linking static versions of the libraries.
+MACRO (COPY_OPENSSL_DLLS target_name)
+ IF (WIN32 AND WITH_SSL_PATH)
+ GET_FILENAME_COMPONENT(CRYPTO_NAME "${CRYPTO_LIBRARY}" NAME_WE)
+ GET_FILENAME_COMPONENT(OPENSSL_NAME "${OPENSSL_LIBRARIES}" NAME_WE)
+ FILE(GLOB HAVE_CRYPTO_DLL "${WITH_SSL_PATH}/bin/${CRYPTO_NAME}.dll")
+ FILE(GLOB HAVE_OPENSSL_DLL "${WITH_SSL_PATH}/bin/${OPENSSL_NAME}.dll")
+ IF (HAVE_CRYPTO_DLL AND HAVE_OPENSSL_DLL)
+ ADD_CUSTOM_COMMAND(OUTPUT ${target_name}
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ "${WITH_SSL_PATH}/bin/${CRYPTO_NAME}.dll"
+ "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${CRYPTO_NAME}.dll"
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ "${WITH_SSL_PATH}/bin/${OPENSSL_NAME}.dll"
+ "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${OPENSSL_NAME}.dll"
+ )
+ ADD_CUSTOM_TARGET(${target_name} ALL)
+ ENDIF()
ENDIF()
ENDMACRO()
diff --git a/dbug/dbug.c b/dbug/dbug.c
index b285b32fa17..9ec8044eaf1 100644
--- a/dbug/dbug.c
+++ b/dbug/dbug.c
@@ -1332,7 +1332,7 @@ void _db_dump_(uint _line_, const char *keyword,
if (TRACING)
{
Indent(cs, cs->level + 1);
- pos= min(max(cs->level-cs->stack->sub_level,0)*INDENT,80);
+ pos= MY_MIN(MY_MAX(cs->level-cs->stack->sub_level,0)*INDENT,80);
}
else
{
@@ -1737,7 +1737,7 @@ static void Indent(CODE_STATE *cs, int indent)
{
int count;
- indent= max(indent-1-cs->stack->sub_level,0)*INDENT;
+ indent= MY_MAX(indent-1-cs->stack->sub_level,0)*INDENT;
for (count= 0; count < indent ; count++)
{
if ((count % INDENT) == 0)
diff --git a/extra/comp_err.c b/extra/comp_err.c
index fb51377ddc5..bf757122957 100644
--- a/extra/comp_err.c
+++ b/extra/comp_err.c
@@ -33,8 +33,9 @@
#include <assert.h>
#include <my_dir.h>
-#define MAX_ROWS 1000
+#define MAX_ROWS 2000
#define HEADER_LENGTH 32 /* Length of header in errmsg.sys */
+#define ERRMSG_VERSION 3 /* Version number of errmsg.sys */
#define DEFAULT_CHARSET_DIR "../sql/share/charsets"
#define ER_PREFIX "ER_"
#define ER_PREFIX2 "MARIA_ER_"
@@ -50,9 +51,9 @@ static char *default_dbug_option= (char*) "d:t:O,/tmp/comp_err.trace";
#endif
/* Header for errmsg.sys files */
-uchar file_head[]= { 254, 254, 2, 2 };
+uchar file_head[]= { 254, 254, 2, ERRMSG_VERSION };
/* Store positions to each error message row to store in errmsg.sys header */
-uint file_pos[MAX_ROWS];
+uint file_pos[MAX_ROWS+1];
const char *empty_string= ""; /* For empty states */
/*
@@ -379,9 +380,11 @@ static int create_sys_files(struct languages *lang_head,
if (my_fwrite(to, (uchar*) head, HEADER_LENGTH, MYF(MY_WME | MY_FNABP)))
goto err;
+ file_pos[row_count]= (ftell(to) - start_pos);
for (i= 0; i < row_count; i++)
{
- int2store(head, file_pos[i]);
+ /* Store length of each string */
+ int2store(head, file_pos[i+1] - file_pos[i]);
if (my_fwrite(to, (uchar*) head, 2, MYF(MY_WME | MY_FNABP)))
goto err;
}
diff --git a/extra/my_print_defaults.c b/extra/my_print_defaults.c
index 8a16e677cb9..5b661c0d04b 100644
--- a/extra/my_print_defaults.c
+++ b/extra/my_print_defaults.c
@@ -26,7 +26,7 @@
#include <my_sys.h>
#include <m_string.h>
#include <my_getopt.h>
-
+#include <my_default.h>
const char *config_file="my"; /* Default config file */
uint verbose= 0, opt_defaults_file_used= 0;
diff --git a/include/big_endian.h b/include/big_endian.h
new file mode 100644
index 00000000000..021b6abc383
--- /dev/null
+++ b/include/big_endian.h
@@ -0,0 +1,82 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1307 USA */
+
+/*
+ Data in big-endian format.
+*/
+#define float4store(T,A) do { *(T)= ((uchar *) &A)[3];\
+ *((T)+1)=(char) ((uchar *) &A)[2];\
+ *((T)+2)=(char) ((uchar *) &A)[1];\
+ *((T)+3)=(char) ((uchar *) &A)[0]; } while(0)
+
+#define float4get(V,M) do { float def_temp;\
+ ((uchar*) &def_temp)[0]=(M)[3];\
+ ((uchar*) &def_temp)[1]=(M)[2];\
+ ((uchar*) &def_temp)[2]=(M)[1];\
+ ((uchar*) &def_temp)[3]=(M)[0];\
+ (V)=def_temp; } while(0)
+
+#define float8store(T,V) do { *(T)= ((uchar *) &V)[7];\
+ *((T)+1)=(char) ((uchar *) &V)[6];\
+ *((T)+2)=(char) ((uchar *) &V)[5];\
+ *((T)+3)=(char) ((uchar *) &V)[4];\
+ *((T)+4)=(char) ((uchar *) &V)[3];\
+ *((T)+5)=(char) ((uchar *) &V)[2];\
+ *((T)+6)=(char) ((uchar *) &V)[1];\
+ *((T)+7)=(char) ((uchar *) &V)[0]; } while(0)
+
+#define float8get(V,M) do { double def_temp;\
+ ((uchar*) &def_temp)[0]=(M)[7];\
+ ((uchar*) &def_temp)[1]=(M)[6];\
+ ((uchar*) &def_temp)[2]=(M)[5];\
+ ((uchar*) &def_temp)[3]=(M)[4];\
+ ((uchar*) &def_temp)[4]=(M)[3];\
+ ((uchar*) &def_temp)[5]=(M)[2];\
+ ((uchar*) &def_temp)[6]=(M)[1];\
+ ((uchar*) &def_temp)[7]=(M)[0];\
+ (V) = def_temp; } while(0)
+
+#define ushortget(V,M) do { V = (uint16) (((uint16) ((uchar) (M)[1]))+\
+ ((uint16) ((uint16) (M)[0]) << 8)); } while(0)
+#define shortget(V,M) do { V = (short) (((short) ((uchar) (M)[1]))+\
+ ((short) ((short) (M)[0]) << 8)); } while(0)
+#define longget(V,M) do { int32 def_temp;\
+ ((uchar*) &def_temp)[0]=(M)[0];\
+ ((uchar*) &def_temp)[1]=(M)[1];\
+ ((uchar*) &def_temp)[2]=(M)[2];\
+ ((uchar*) &def_temp)[3]=(M)[3];\
+ (V)=def_temp; } while(0)
+#define ulongget(V,M) do { uint32 def_temp;\
+ ((uchar*) &def_temp)[0]=(M)[0];\
+ ((uchar*) &def_temp)[1]=(M)[1];\
+ ((uchar*) &def_temp)[2]=(M)[2];\
+ ((uchar*) &def_temp)[3]=(M)[3];\
+ (V)=def_temp; } while(0)
+#define shortstore(T,A) do { uint def_temp=(uint) (A) ;\
+ *(((char*)T)+1)=(char)(def_temp); \
+ *(((char*)T)+0)=(char)(def_temp >> 8); } while(0)
+#define longstore(T,A) do { *(((char*)T)+3)=((A));\
+ *(((char*)T)+2)=(((A) >> 8));\
+ *(((char*)T)+1)=(((A) >> 16));\
+ *(((char*)T)+0)=(((A) >> 24)); } while(0)
+
+#define floatget(V,M) memcpy(&V, (M), sizeof(float))
+/* Cast away type qualifiers (necessary as macro takes argument by value). */
+#define floatstore(T,V) memcpy((T), (void*) (&V), sizeof(float))
+#define doubleget(V,M) memcpy(&V, (M), sizeof(double))
+/* Cast away type qualifiers (necessary as macro takes argument by value). */
+#define doublestore(T,V) memcpy((T), (void*) &V, sizeof(double))
+#define longlongget(V,M) memcpy(&V, (M), sizeof(ulonglong))
+#define longlongstore(T,V) memcpy((T), &V, sizeof(ulonglong))
diff --git a/include/byte_order_generic.h b/include/byte_order_generic.h
new file mode 100644
index 00000000000..d4ac27eeb9c
--- /dev/null
+++ b/include/byte_order_generic.h
@@ -0,0 +1,95 @@
+/* Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1307 USA */
+
+/*
+ Endianness-independent definitions for architectures other
+ than the x86 architecture.
+*/
+#define sint2korr(A) (int16) (((int16) ((uchar) (A)[0])) +\
+ ((int16) ((int16) (A)[1]) << 8))
+#define sint3korr(A) ((int32) ((((uchar) (A)[2]) & 128) ? \
+ (((uint32) 255L << 24) | \
+ (((uint32) (uchar) (A)[2]) << 16) |\
+ (((uint32) (uchar) (A)[1]) << 8) | \
+ ((uint32) (uchar) (A)[0])) : \
+ (((uint32) (uchar) (A)[2]) << 16) |\
+ (((uint32) (uchar) (A)[1]) << 8) | \
+ ((uint32) (uchar) (A)[0])))
+#define sint4korr(A) (int32) (((int32) ((uchar) (A)[0])) +\
+ (((int32) ((uchar) (A)[1]) << 8)) +\
+ (((int32) ((uchar) (A)[2]) << 16)) +\
+ (((int32) ((int16) (A)[3]) << 24)))
+#define sint8korr(A) (longlong) uint8korr(A)
+#define uint2korr(A) (uint16) (((uint16) ((uchar) (A)[0])) +\
+ ((uint16) ((uchar) (A)[1]) << 8))
+#define uint3korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\
+ (((uint32) ((uchar) (A)[1])) << 8) +\
+ (((uint32) ((uchar) (A)[2])) << 16))
+#define uint4korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\
+ (((uint32) ((uchar) (A)[1])) << 8) +\
+ (((uint32) ((uchar) (A)[2])) << 16) +\
+ (((uint32) ((uchar) (A)[3])) << 24))
+#define uint5korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\
+ (((uint32) ((uchar) (A)[1])) << 8) +\
+ (((uint32) ((uchar) (A)[2])) << 16) +\
+ (((uint32) ((uchar) (A)[3])) << 24)) +\
+ (((ulonglong) ((uchar) (A)[4])) << 32))
+#define uint6korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) + \
+ (((uint32) ((uchar) (A)[1])) << 8) + \
+ (((uint32) ((uchar) (A)[2])) << 16) + \
+ (((uint32) ((uchar) (A)[3])) << 24)) + \
+ (((ulonglong) ((uchar) (A)[4])) << 32) + \
+ (((ulonglong) ((uchar) (A)[5])) << 40))
+#define uint8korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\
+ (((uint32) ((uchar) (A)[1])) << 8) +\
+ (((uint32) ((uchar) (A)[2])) << 16) +\
+ (((uint32) ((uchar) (A)[3])) << 24)) +\
+ (((ulonglong) (((uint32) ((uchar) (A)[4])) +\
+ (((uint32) ((uchar) (A)[5])) << 8) +\
+ (((uint32) ((uchar) (A)[6])) << 16) +\
+ (((uint32) ((uchar) (A)[7])) << 24))) <<\
+ 32))
+#define int2store(T,A) do { uint def_temp= (uint) (A) ;\
+ *((uchar*) (T))= (uchar)(def_temp); \
+ *((uchar*) (T)+1)=(uchar)((def_temp >> 8)); \
+ } while(0)
+#define int3store(T,A) do { /*lint -save -e734 */\
+ *((uchar*)(T))=(uchar) ((A));\
+ *((uchar*) (T)+1)=(uchar) (((A) >> 8));\
+ *((uchar*)(T)+2)=(uchar) (((A) >> 16)); \
+ /*lint -restore */} while(0)
+#define int4store(T,A) do { *((char *)(T))=(char) ((A));\
+ *(((char *)(T))+1)=(char) (((A) >> 8));\
+ *(((char *)(T))+2)=(char) (((A) >> 16));\
+ *(((char *)(T))+3)=(char) (((A) >> 24));\
+ } while(0)
+#define int5store(T,A) do { *((char *)(T))= (char)((A)); \
+ *(((char *)(T))+1)= (char)(((A) >> 8)); \
+ *(((char *)(T))+2)= (char)(((A) >> 16)); \
+ *(((char *)(T))+3)= (char)(((A) >> 24)); \
+ *(((char *)(T))+4)= (char)(((A) >> 32)); \
+ } while(0)
+#define int6store(T,A) do { *((char *)(T))= (char)((A)); \
+ *(((char *)(T))+1)= (char)(((A) >> 8)); \
+ *(((char *)(T))+2)= (char)(((A) >> 16)); \
+ *(((char *)(T))+3)= (char)(((A) >> 24)); \
+ *(((char *)(T))+4)= (char)(((A) >> 32)); \
+ *(((char *)(T))+5)= (char)(((A) >> 40)); \
+ } while(0)
+#define int8store(T,A) do { uint def_temp= (uint) (A), \
+ def_temp2= (uint) ((A) >> 32); \
+ int4store((T),def_temp); \
+ int4store((T+4),def_temp2);\
+ } while(0)
diff --git a/include/byte_order_generic_x86.h b/include/byte_order_generic_x86.h
new file mode 100644
index 00000000000..0a71a17829b
--- /dev/null
+++ b/include/byte_order_generic_x86.h
@@ -0,0 +1,97 @@
+/* Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1307 USA */
+
+/*
+ Optimized function-like macros for the x86 architecture (_WIN32 included).
+*/
+#define sint2korr(A) (*((const int16 *) (A)))
+#define sint3korr(A) ((int32) ((((uchar) (A)[2]) & 128) ? \
+ (((uint32) 255L << 24) | \
+ (((uint32) (uchar) (A)[2]) << 16) |\
+ (((uint32) (uchar) (A)[1]) << 8) | \
+ ((uint32) (uchar) (A)[0])) : \
+ (((uint32) (uchar) (A)[2]) << 16) |\
+ (((uint32) (uchar) (A)[1]) << 8) | \
+ ((uint32) (uchar) (A)[0])))
+#define sint4korr(A) (*((const long *) (A)))
+#define uint2korr(A) (*((const uint16 *) (A)))
+
+/*
+ Attention: Please, note, uint3korr reads 4 bytes (not 3)!
+ It means, that you have to provide enough allocated space.
+*/
+#if defined(HAVE_valgrind) && !defined(_WIN32)
+#define uint3korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\
+ (((uint32) ((uchar) (A)[1])) << 8) +\
+ (((uint32) ((uchar) (A)[2])) << 16))
+#else
+#define uint3korr(A) (long) (*((const unsigned int *) (A)) & 0xFFFFFF)
+#endif
+
+#define uint4korr(A) (*((const uint32 *) (A)))
+#define uint5korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\
+ (((uint32) ((uchar) (A)[1])) << 8) +\
+ (((uint32) ((uchar) (A)[2])) << 16) +\
+ (((uint32) ((uchar) (A)[3])) << 24)) +\
+ (((ulonglong) ((uchar) (A)[4])) << 32))
+#define uint6korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) + \
+ (((uint32) ((uchar) (A)[1])) << 8) + \
+ (((uint32) ((uchar) (A)[2])) << 16) + \
+ (((uint32) ((uchar) (A)[3])) << 24)) + \
+ (((ulonglong) ((uchar) (A)[4])) << 32) + \
+ (((ulonglong) ((uchar) (A)[5])) << 40))
+#define uint8korr(A) (*((const ulonglong *) (A)))
+#define sint8korr(A) (*((const longlong *) (A)))
+
+#define int2store(T,A) *((uint16*) (T))= (uint16) (A)
+#define int3store(T,A) do { *(T)= (uchar) ((A));\
+ *(T+1)=(uchar) (((uint) (A) >> 8));\
+ *(T+2)=(uchar) (((A) >> 16));\
+ } while (0)
+#define int4store(T,A) *((long *) (T))= (long) (A)
+#define int5store(T,A) do { *(T)= (uchar)((A));\
+ *((T)+1)=(uchar) (((A) >> 8));\
+ *((T)+2)=(uchar) (((A) >> 16));\
+ *((T)+3)=(uchar) (((A) >> 24));\
+ *((T)+4)=(uchar) (((A) >> 32));\
+ } while(0)
+#define int6store(T,A) do { *(T)= (uchar)((A)); \
+ *((T)+1)=(uchar) (((A) >> 8)); \
+ *((T)+2)=(uchar) (((A) >> 16)); \
+ *((T)+3)=(uchar) (((A) >> 24)); \
+ *((T)+4)=(uchar) (((A) >> 32)); \
+ *((T)+5)=(uchar) (((A) >> 40)); \
+ } while(0)
+#define int8store(T,A) *((ulonglong *) (T))= (ulonglong) (A)
+typedef union {
+ double v;
+ long m[2];
+} doubleget_union;
+#define doubleget(V,M) \
+do { doubleget_union _tmp; \
+ _tmp.m[0] = *((const long*)(M)); \
+ _tmp.m[1] = *(((const long*) (M))+1); \
+ (V) = _tmp.v; } while(0)
+#define doublestore(T,V) \
+do { *((long *) T) = ((const doubleget_union *)&V)->m[0]; \
+ *(((long *) T)+1) = ((const doubleget_union *)&V)->m[1]; \
+ } while (0)
+#define float4get(V,M) \
+do { *((float *) &(V)) = *((const float*) (M)); } while(0)
+#define float8get(V,M) doubleget((V),(M))
+#define float4store(V,M) memcpy((uchar*)(V), (uchar*)(&M), sizeof(float))
+#define floatstore(T,V) memcpy((uchar*)(T), (uchar*)(&V), sizeof(float))
+#define floatget(V,M) memcpy((uchar*)(&V),(uchar*) (M), sizeof(float))
+#define float8store(V,M) doublestore((V),(M))
diff --git a/include/byte_order_generic_x86_64.h b/include/byte_order_generic_x86_64.h
new file mode 100644
index 00000000000..877c1574dfa
--- /dev/null
+++ b/include/byte_order_generic_x86_64.h
@@ -0,0 +1,83 @@
+/* Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1307 USA */
+
+/*
+ Optimized function-like macros for the x86 architecture (_WIN32 included).
+*/
+#define sint2korr(A) (int16) (*((int16 *) (A)))
+#define sint3korr(A) ((int32) ((((uchar) (A)[2]) & 128) ? \
+ (((uint32) 255L << 24) | \
+ (((uint32) (uchar) (A)[2]) << 16) |\
+ (((uint32) (uchar) (A)[1]) << 8) | \
+ ((uint32) (uchar) (A)[0])) : \
+ (((uint32) (uchar) (A)[2]) << 16) |\
+ (((uint32) (uchar) (A)[1]) << 8) | \
+ ((uint32) (uchar) (A)[0])))
+#define sint4korr(A) (int32) (*((int32 *) (A)))
+#define uint2korr(A) (uint16) (*((uint16 *) (A)))
+/*
+ Attention: Please, note, uint3korr reads 4 bytes (not 3)!
+ It means, that you have to provide enough allocated space.
+*/
+#if defined(HAVE_purify) && !defined(_WIN32)
+#define uint3korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\
+ (((uint32) ((uchar) (A)[1])) << 8) +\
+ (((uint32) ((uchar) (A)[2])) << 16))
+#else
+#define uint3korr(A) (uint32) (*((unsigned int *) (A)) & 0xFFFFFF)
+#endif
+#define uint4korr(A) (uint32) (*((uint32 *) (A)))
+#define uint5korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\
+ (((uint32) ((uchar) (A)[1])) << 8) +\
+ (((uint32) ((uchar) (A)[2])) << 16) +\
+ (((uint32) ((uchar) (A)[3])) << 24)) +\
+ (((ulonglong) ((uchar) (A)[4])) << 32))
+#define uint6korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) + \
+ (((uint32) ((uchar) (A)[1])) << 8) + \
+ (((uint32) ((uchar) (A)[2])) << 16) + \
+ (((uint32) ((uchar) (A)[3])) << 24)) + \
+ (((ulonglong) ((uchar) (A)[4])) << 32) + \
+ (((ulonglong) ((uchar) (A)[5])) << 40))
+#define uint8korr(A) (ulonglong) (*((ulonglong *) (A)))
+#define sint8korr(A) (longlong) (*((longlong *) (A)))
+
+#define int2store(T,A) do { uchar *pT= (uchar*)(T);\
+ *((uint16*)(pT))= (uint16) (A);\
+ } while (0)
+
+#define int3store(T,A) do { *(T)= (uchar) ((A));\
+ *(T+1)=(uchar) (((uint) (A) >> 8));\
+ *(T+2)=(uchar) (((A) >> 16));\
+ } while (0)
+#define int4store(T,A) do { uchar *pT= (uchar*)(T);\
+ *((uint32 *) (pT))= (uint32) (A); \
+ } while (0)
+
+#define int5store(T,A) do { *(T)= (uchar)((A));\
+ *((T)+1)=(uchar) (((A) >> 8));\
+ *((T)+2)=(uchar) (((A) >> 16));\
+ *((T)+3)=(uchar) (((A) >> 24));\
+ *((T)+4)=(uchar) (((A) >> 32));\
+ } while(0)
+#define int6store(T,A) do { *(T)= (uchar)((A)); \
+ *((T)+1)=(uchar) (((A) >> 8)); \
+ *((T)+2)=(uchar) (((A) >> 16)); \
+ *((T)+3)=(uchar) (((A) >> 24)); \
+ *((T)+4)=(uchar) (((A) >> 32)); \
+ *((T)+5)=(uchar) (((A) >> 40)); \
+ } while(0)
+#define int8store(T,A) do { uchar *pT= (uchar*)(T);\
+ *((ulonglong *) (pT))= (ulonglong) (A);\
+ } while(0)
diff --git a/include/crypt_genhash_impl.h b/include/crypt_genhash_impl.h
new file mode 100644
index 00000000000..af5afd23e86
--- /dev/null
+++ b/include/crypt_genhash_impl.h
@@ -0,0 +1,32 @@
+/* defines and prototypes for using crypt_genhash_impl.cc */
+
+#ifndef CRYPT_HASHGEN_IMPL_H
+#define CRYPT_HASHGEN_IMPL_H
+#define ROUNDS_DEFAULT 5000
+#define ROUNDS_MIN 1000
+#define ROUNDS_MAX 999999999
+#define MIXCHARS 32
+#define CRYPT_SALT_LENGTH 20
+#define CRYPT_MAGIC_LENGTH 3
+#define CRYPT_PARAM_LENGTH 13
+#define SHA256_HASH_LENGTH 43
+#define CRYPT_MAX_PASSWORD_SIZE (CRYPT_SALT_LENGTH + \
+ SHA256_HASH_LENGTH + \
+ CRYPT_MAGIC_LENGTH + \
+ CRYPT_PARAM_LENGTH)
+
+int extract_user_salt(char **salt_begin,
+ char **salt_end);
+C_MODE_START
+char *
+my_crypt_genhash(char *ctbuffer,
+ size_t ctbufflen,
+ const char *plaintext,
+ int plaintext_len,
+ const char *switchsalt,
+ const char **params);
+void generate_user_salt(char *buffer, int buffer_len);
+void xor_string(char *to, int to_len, char *pattern, int pattern_len);
+
+C_MODE_END
+#endif
diff --git a/include/errmsg.h b/include/errmsg.h
index 64ec2df395c..b839060a881 100644
--- a/include/errmsg.h
+++ b/include/errmsg.h
@@ -16,8 +16,12 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
-/* Error messages for MySQL clients */
-/* (Error messages for the daemon are in sql/share/errmsg.txt) */
+/*
+ Error messages numbers for MySQL clients.
+ The error messages itself are in libmysql/errmsg.c
+
+ Error messages for the mysqld daemon are in sql/share/errmsg.txt
+*/
#ifdef __cplusplus
extern "C" {
@@ -102,7 +106,9 @@ extern const char *client_errors[]; /* Error messages */
#define CR_NEW_STMT_METADATA 2057
#define CR_ALREADY_CONNECTED 2058
#define CR_AUTH_PLUGIN_CANNOT_LOAD 2059
-#define CR_ERROR_LAST /*Copy last error nr:*/ 2059
+#define CR_DUPLICATE_CONNECTION_ATTR 2060
+#define CR_AUTH_PLUGIN_ERR 2061
+#define CR_ERROR_LAST /*Copy last error nr:*/ 2061
/* Add error numbers before CR_ERROR_LAST and change it accordingly. */
#endif /* ERRMSG_INCLUDED */
diff --git a/include/ft_global.h b/include/ft_global.h
index 8a1069d6e62..8a77cbca014 100644
--- a/include/ft_global.h
+++ b/include/ft_global.h
@@ -43,11 +43,32 @@ struct _ft_vft
void (*reinit_search)(FT_INFO *);
};
+typedef struct st_ft_info_ext FT_INFO_EXT;
+struct _ft_vft_ext
+{
+ uint (*get_version)(); // Extended API version
+ ulonglong (*get_flags)();
+ ulonglong (*get_docid)(FT_INFO_EXT *);
+ ulonglong (*count_matches)(FT_INFO_EXT *);
+};
+
+/* Flags for extended FT API */
+#define FTS_ORDERED_RESULT (LL(1) << 1)
+#define FTS_DOCID_IN_RESULT (LL(1) << 2)
+
+#define FTS_DOC_ID_COL_NAME "FTS_DOC_ID"
+
#ifndef FT_CORE
struct st_ft_info
{
struct _ft_vft *please; /* INTERCAL style :-) */
};
+
+struct st_ft_info_ext
+{
+ struct _ft_vft *please; /* INTERCAL style :-) */
+ struct _ft_vft_ext *could_you;
+};
#endif
extern const char *ft_stopword_file;
diff --git a/include/little_endian.h b/include/little_endian.h
new file mode 100644
index 00000000000..7223fea648f
--- /dev/null
+++ b/include/little_endian.h
@@ -0,0 +1,75 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1307 USA */
+
+/*
+ Data in little-endian format.
+*/
+
+#ifndef MY_BYTE_ORDER_ARCH_OPTIMIZED
+#define float4get(V,M) memcpy(&V, (M), sizeof(float))
+#define float4store(V,M) memcpy(V, (&M), sizeof(float))
+#define float8get(V,M) doubleget((V),(M))
+#define float8store(V,M) doublestore((V),(M))
+
+/* Bi-endian hardware.... */
+#if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN)
+#define doublestore(T,V) do { *(((char*)T)+0)=(char) ((uchar *) &V)[4];\
+ *(((char*)T)+1)=(char) ((uchar *) &V)[5];\
+ *(((char*)T)+2)=(char) ((uchar *) &V)[6];\
+ *(((char*)T)+3)=(char) ((uchar *) &V)[7];\
+ *(((char*)T)+4)=(char) ((uchar *) &V)[0];\
+ *(((char*)T)+5)=(char) ((uchar *) &V)[1];\
+ *(((char*)T)+6)=(char) ((uchar *) &V)[2];\
+ *(((char*)T)+7)=(char) ((uchar *) &V)[3]; }\
+ while(0)
+#define doubleget(V,M) do { double def_temp;\
+ ((uchar*) &def_temp)[0]=(M)[4];\
+ ((uchar*) &def_temp)[1]=(M)[5];\
+ ((uchar*) &def_temp)[2]=(M)[6];\
+ ((uchar*) &def_temp)[3]=(M)[7];\
+ ((uchar*) &def_temp)[4]=(M)[0];\
+ ((uchar*) &def_temp)[5]=(M)[1];\
+ ((uchar*) &def_temp)[6]=(M)[2];\
+ ((uchar*) &def_temp)[7]=(M)[3];\
+ (V) = def_temp; } while(0)
+#else /* Bi-endian hardware.... */
+
+/* Cast away type qualifiers (necessary as macro takes argument by value). */
+#define doublestore(T,V) memcpy((T), (void*) &V, sizeof(double))
+#define doubleget(V,M) memcpy(&V, (M), sizeof(double))
+
+#endif /* Bi-endian hardware.... */
+
+#endif /* !MY_BYTE_ORDER_ARCH_OPTIMIZED */
+
+#define ushortget(V,M) do { uchar *pM= (uchar*)(M);V = uint2korr(pM);} while(0)
+#define shortget(V,M) do { uchar *pM= (uchar*)(M);V = sint2korr(pM);} while(0)
+#define longget(V,M) do { uchar *pM= (uchar*)(M);V = sint4korr(pM);} while(0)
+#define ulongget(V,M) do { uchar *pM= (uchar*)(M);V = uint4korr(pM);} while(0)
+#define shortstore(T,V) int2store(T,V)
+#define longstore(T,V) int4store(T,V)
+
+#ifndef floatstore
+/* Cast away type qualifiers (necessary as macro takes argument by value). */
+#define floatstore(T,V) memcpy((T), (void*) (&V), sizeof(float))
+#define floatget(V,M) memcpy(&V, (M), sizeof(float))
+#endif
+#ifndef doubleget
+#define doubleget(V,M) memcpy(&V, (M), sizeof(double))
+#define doublestore(T,V) memcpy((T), (void *) &V, sizeof(double))
+#endif /* doubleget */
+
+#define longlongget(V,M) memcpy(&V, (M), sizeof(ulonglong))
+#define longlongstore(T,V) memcpy((T), &V, sizeof(ulonglong))
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 969cb0058ac..5d2a6f80b75 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -588,6 +588,10 @@ my_bool my_charset_is_ascii_compatible(CHARSET_INFO *cs);
extern size_t my_vsnprintf_ex(CHARSET_INFO *cs, char *to, size_t n,
const char* fmt, va_list ap);
+uint32 my_convert(char *to, uint32 to_length, const CHARSET_INFO *to_cs,
+ const char *from, uint32 from_length,
+ const CHARSET_INFO *from_cs, uint *errors);
+
#define _MY_U 01 /* Upper case */
#define _MY_L 02 /* Lower case */
#define _MY_NMR 04 /* Numeral (digit) */
diff --git a/include/my_base.h b/include/my_base.h
index 4cbcb00425b..18b75f88393 100644
--- a/include/my_base.h
+++ b/include/my_base.h
@@ -46,7 +46,8 @@
#define HA_OPEN_COPY 256 /* Open copy (for repair) */
/* Internal temp table, used for temporary results */
#define HA_OPEN_INTERNAL_TABLE 512
-#define HA_OPEN_MERGE_TABLE 1024
+#define HA_OPEN_NO_PSI_CALL 1024 /* Don't call/connect PSI */
+#define HA_OPEN_MERGE_TABLE 2048
/* The following is parameter to ha_rkey() how to use key */
@@ -194,6 +195,11 @@ enum ha_extra_function {
HA_EXTRA_ATTACH_CHILDREN,
HA_EXTRA_IS_ATTACHED_CHILDREN,
HA_EXTRA_DETACH_CHILDREN,
+ /*
+ Prepare table for export
+ (e.g. quiesce the table and write table metadata).
+ */
+ HA_EXTRA_EXPORT,
HA_EXTRA_DETACH_CHILD,
/* Inform handler we will force a close as part of flush */
HA_EXTRA_PREPARE_FOR_FORCED_CLOSE
@@ -317,6 +323,23 @@ enum ha_base_keytype {
#define HA_OPTION_RELIES_ON_SQL_LAYER 512
#define HA_OPTION_NULL_FIELDS 1024
#define HA_OPTION_PAGE_CHECKSUM 2048
+/*
+ STATS_PERSISTENT=1 has been specified in the SQL command (either CREATE
+ or ALTER TABLE). Table and index statistics that are collected by the
+ storage engine and used by the optimizer for query optimization will be
+ stored on disk and will not change after a server restart.
+*/
+#define HA_OPTION_STATS_PERSISTENT 4096
+/*
+ STATS_PERSISTENT=0 has been specified in CREATE/ALTER TABLE. Statistics
+ for the table will be wiped away on server shutdown and new ones recalculated
+ after the server is started again. If none of HA_OPTION_STATS_PERSISTENT or
+ HA_OPTION_NO_STATS_PERSISTENT is set, this means that the setting is not
+ explicitly set at table level and the corresponding table will use whatever
+ is the global server default.
+*/
+#define HA_OPTION_NO_STATS_PERSISTENT 8192
+
/* .frm has extra create options in linked-list format */
#define HA_OPTION_TEXT_CREATE_OPTIONS (1L << 14)
#define HA_OPTION_TEMP_COMPRESS_RECORD (1L << 15) /* set by isamchk */
@@ -334,7 +357,7 @@ enum ha_base_keytype {
#define HA_CREATE_PAGE_CHECKSUM 32
#define HA_CREATE_DELAY_KEY_WRITE 64
#define HA_CREATE_RELIES_ON_SQL_LAYER 128
-
+#define HA_CREATE_INTERNAL_TABLE 256
/* Flags used by start_bulk_insert */
@@ -458,7 +481,8 @@ enum ha_base_keytype {
/* It is not possible to log this statement */
#define HA_ERR_LOGGING_IMPOSSIBLE 170
/* The event was corrupt, leading to illegal data being read */
-#define HA_ERR_CORRUPT_EVENT 171
+#define HA_ERR_CORRUPT_EVENT 171 /* The event was corrupt, leading to
+ illegal data being read */
#define HA_ERR_NEW_FILE 172 /* New file format */
/* The event could not be processed no other handler error happened */
#define HA_ERR_ROWS_EVENT_APPLY 173
@@ -466,16 +490,19 @@ enum ha_base_keytype {
#define HA_ERR_FILE_TOO_SHORT 175 /* File too short */
#define HA_ERR_WRONG_CRC 176 /* Wrong CRC on page */
#define HA_ERR_TOO_MANY_CONCURRENT_TRXS 177 /*Too many active concurrent transactions */
+/* There's no explicitly listed partition in table for the given value */
#define HA_ERR_NOT_IN_LOCK_PARTITIONS 178
#define HA_ERR_INDEX_COL_TOO_LONG 179 /* Index column length exceeds limit */
#define HA_ERR_INDEX_CORRUPT 180 /* Index corrupted */
#define HA_ERR_UNDO_REC_TOO_BIG 181 /* Undo log record too big */
-#define HA_ERR_TABLE_IN_FK_CHECK 182 /* Table being used in foreign key check */
-#define HA_FTS_INVALID_DOCID 183 /* Invalid InnoDB Doc ID */
-#define HA_ERR_ROW_NOT_VISIBLE 184
-#define HA_ERR_ABORTED_BY_USER 185
-#define HA_ERR_DISK_FULL 186
-#define HA_ERR_LAST 186 /* Copy of last error nr */
+#define HA_FTS_INVALID_DOCID 182 /* Invalid InnoDB Doc ID */
+#define HA_ERR_TABLE_IN_FK_CHECK 183 /* Table being used in foreign key check */
+#define HA_ERR_TABLESPACE_EXISTS 184 /* The tablespace existed in storage engine */
+#define HA_ERR_TOO_MANY_FIELDS 185 /* Table has too many columns */
+#define HA_ERR_ROW_NOT_VISIBLE 186
+#define HA_ERR_ABORTED_BY_USER 187
+#define HA_ERR_DISK_FULL 188
+#define HA_ERR_LAST 188 /* Copy of last error nr */
/* Number of different errors */
#define HA_ERR_ERRORS (HA_ERR_LAST - HA_ERR_FIRST + 1)
@@ -608,4 +635,17 @@ C_MODE_START
typedef void (* invalidator_by_filename)(const char * filename);
C_MODE_END
+
+enum durability_properties
+{
+ /*
+ Preserves the durability properties defined by the engine */
+ HA_REGULAR_DURABILITY= 0,
+ /*
+ Ignore the durability properties defined by the engine and
+ write only in-memory entries.
+ */
+ HA_IGNORE_DURABILITY= 1
+};
+
#endif /* _my_base_h */
diff --git a/include/my_byteorder.h b/include/my_byteorder.h
new file mode 100644
index 00000000000..1f29248bfb2
--- /dev/null
+++ b/include/my_byteorder.h
@@ -0,0 +1,54 @@
+#ifndef MY_BYTEORDER_INCLUDED
+#define MY_BYTEORDER_INCLUDED
+
+/* Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+/*
+ Macro for reading 32-bit integer from network byte order (big-endian)
+ from an unaligned memory location.
+*/
+#define int4net(A) (int32) (((uint32) ((uchar) (A)[3])) | \
+ (((uint32) ((uchar) (A)[2])) << 8) | \
+ (((uint32) ((uchar) (A)[1])) << 16) | \
+ (((uint32) ((uchar) (A)[0])) << 24))
+
+/*
+ Function-like macros for reading and storing in machine independent
+ format (low byte first). There are 'korr' (assume 'corrector') variants
+ for integer types, but 'get' (assume 'getter') for floating point types.
+*/
+#if defined(__i386__) || defined(_WIN32)
+#define MY_BYTE_ORDER_ARCH_OPTIMIZED
+#include "byte_order_generic_x86.h"
+#elif defined(__x86_64__)
+#include "byte_order_generic_x86_64.h"
+#else
+#include "byte_order_generic.h"
+#endif
+
+/*
+ Function-like macros for reading and storing in machine format from/to
+ short/long to/from some place in memory V should be a variable (not on
+ a register) and M a pointer to byte.
+*/
+#ifdef WORDS_BIGENDIAN
+#include "big_endian.h"
+#else
+#include "little_endian.h"
+#endif
+
+#endif /* MY_BYTEORDER_INCLUDED */
diff --git a/include/my_default.h b/include/my_default.h
new file mode 100644
index 00000000000..1d556de69ee
--- /dev/null
+++ b/include/my_default.h
@@ -0,0 +1,50 @@
+/* Copyright (C) 2013 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+/* Definitions for mysys/my_default.c */
+
+#ifndef MY_DEFAULT_INCLUDED
+#define MY_DEFAULT_INCLUDED
+
+C_MODE_START
+
+extern const char *my_defaults_extra_file;
+extern const char *my_defaults_group_suffix;
+extern const char *my_defaults_file;
+extern my_bool my_getopt_use_args_separator;
+extern my_bool my_getopt_is_args_separator(const char* arg);
+
+/* Define the type of function to be passed to process_default_option_files */
+typedef int (*Process_option_func)(void *ctx, const char *group_name,
+ const char *option);
+
+extern int get_defaults_options(int argc, char **argv,
+ char **defaults, char **extra_defaults,
+ char **group_suffix);
+extern int my_load_defaults(const char *conf_file, const char **groups,
+ int *argc, char ***argv, const char ***);
+extern int load_defaults(const char *conf_file, const char **groups,
+ int *argc, char ***argv);
+extern int my_search_option_files(const char *conf_file, int *argc,
+ char ***argv, uint *args_used,
+ Process_option_func func, void *func_ctx,
+ const char **default_directories);
+extern void free_defaults(char **argv);
+extern void my_print_default_files(const char *conf_file);
+extern void print_defaults(const char *conf_file, const char **groups);
+
+C_MODE_END
+
+#endif /* MY_DEFAULT_INCLUDED */
diff --git a/include/my_getopt.h b/include/my_getopt.h
index 589d9c9880c..2cbbca9cab9 100644
--- a/include/my_getopt.h
+++ b/include/my_getopt.h
@@ -17,7 +17,9 @@
#ifndef _my_getopt_h
#define _my_getopt_h
-#include "my_sys.h" /* loglevel */
+#include "my_sys.h" /* loglevel */
+/* my_getopt and my_default are almost always used together */
+#include <my_default.h>
C_MODE_START
@@ -85,7 +87,6 @@ struct my_option
void *app_type; /**< To be used by an application */
};
-
typedef my_bool (*my_get_one_option)(int, const struct my_option *, char *);
/**
diff --git a/include/my_global.h b/include/my_global.h
index 7c133268f59..db71a60238f 100644
--- a/include/my_global.h
+++ b/include/my_global.h
@@ -49,11 +49,6 @@
#define _POSIX_THREAD_CPUTIME
#endif /* __CYGWIN__ */
-/* to make command line shorter we'll define USE_PRAGMA_INTERFACE here */
-#ifdef USE_PRAGMA_IMPLEMENTATION
-#define USE_PRAGMA_INTERFACE
-#endif
-
#if defined(__OpenBSD__) && (OpenBSD >= 200411)
#define HAVE_ERRNO_AS_DEFINE
#endif
@@ -117,6 +112,7 @@
/* Define missing access() modes. */
#define F_OK 0
#define W_OK 2
+#define R_OK 4 /* Test for read permission. */
/* Define missing file locking constants. */
#define F_RDLCK 1
@@ -335,6 +331,9 @@ C_MODE_END
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
+#ifdef HAVE_SYS_TIMEB_H
+#include <sys/timeb.h> /* Avoid warnings on SCO */
+#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
@@ -1047,296 +1046,7 @@ typedef char my_bool; /* Small bool */
#define MY_HOW_OFTEN_TO_ALARM 2 /* How often we want info on screen */
#define MY_HOW_OFTEN_TO_WRITE 10000 /* How often we want info on screen */
-/*
- Define-funktions for reading and storing in machine independent format
- (low byte first)
-*/
-
-/* Optimized store functions for Intel x86 */
-#if defined(__i386__) || defined(_WIN32)
-#define sint2korr(A) (*((const int16 *) (A)))
-#define sint3korr(A) ((int32) ((((uchar) (A)[2]) & 128) ? \
- (((uint32) 255L << 24) | \
- (((uint32) (uchar) (A)[2]) << 16) |\
- (((uint32) (uchar) (A)[1]) << 8) | \
- ((uint32) (uchar) (A)[0])) : \
- (((uint32) (uchar) (A)[2]) << 16) |\
- (((uint32) (uchar) (A)[1]) << 8) | \
- ((uint32) (uchar) (A)[0])))
-#define sint4korr(A) (*((const long *) (A)))
-#define uint2korr(A) (*((const uint16 *) (A)))
-#if defined(HAVE_valgrind) && !defined(_WIN32)
-#define uint3korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\
- (((uint32) ((uchar) (A)[1])) << 8) +\
- (((uint32) ((uchar) (A)[2])) << 16))
-#else
-/*
- ATTENTION !
-
- Please, note, uint3korr reads 4 bytes (not 3) !
- It means, that you have to provide enough allocated space !
-*/
-#define uint3korr(A) (long) (*((const unsigned int *) (A)) & 0xFFFFFF)
-#endif /* HAVE_valgrind && !_WIN32 */
-#define uint4korr(A) (*((const uint32 *) (A)))
-#define uint5korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\
- (((uint32) ((uchar) (A)[1])) << 8) +\
- (((uint32) ((uchar) (A)[2])) << 16) +\
- (((uint32) ((uchar) (A)[3])) << 24)) +\
- (((ulonglong) ((uchar) (A)[4])) << 32))
-#define uint6korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) + \
- (((uint32) ((uchar) (A)[1])) << 8) + \
- (((uint32) ((uchar) (A)[2])) << 16) + \
- (((uint32) ((uchar) (A)[3])) << 24)) + \
- (((ulonglong) ((uchar) (A)[4])) << 32) + \
- (((ulonglong) ((uchar) (A)[5])) << 40))
-#define uint8korr(A) (*((const ulonglong *) (A)))
-#define sint8korr(A) (*((const longlong *) (A)))
-#define int2store(T,A) *((uint16*) (T))= (uint16) (A)
-#define int3store(T,A) do { *(T)= (uchar) ((A));\
- *(T+1)=(uchar) (((uint) (A) >> 8));\
- *(T+2)=(uchar) (((A) >> 16)); } while (0)
-#define int4store(T,A) *((long *) (T))= (long) (A)
-#define int5store(T,A) do { *(T)= (uchar)((A));\
- *((T)+1)=(uchar) (((A) >> 8));\
- *((T)+2)=(uchar) (((A) >> 16));\
- *((T)+3)=(uchar) (((A) >> 24)); \
- *((T)+4)=(uchar) (((A) >> 32)); } while(0)
-#define int6store(T,A) do { *(T)= (uchar)((A)); \
- *((T)+1)=(uchar) (((A) >> 8)); \
- *((T)+2)=(uchar) (((A) >> 16)); \
- *((T)+3)=(uchar) (((A) >> 24)); \
- *((T)+4)=(uchar) (((A) >> 32)); \
- *((T)+5)=(uchar) (((A) >> 40)); } while(0)
-#define int8store(T,A) *((ulonglong *) (T))= (ulonglong) (A)
-
-typedef union {
- double v;
- long m[2];
-} doubleget_union;
-#define doubleget(V,M) \
-do { doubleget_union _tmp; \
- _tmp.m[0] = *((const long*)(M)); \
- _tmp.m[1] = *(((const long*) (M))+1); \
- (V) = _tmp.v; } while(0)
-#define doublestore(T,V) do { *((long *) T) = ((const doubleget_union *)&V)->m[0]; \
- *(((long *) T)+1) = ((const doubleget_union *)&V)->m[1]; \
- } while (0)
-#define float4get(V,M) do { *((float *) &(V)) = *((const float*) (M)); } while(0)
-#define float8get(V,M) doubleget((V),(M))
-#define float4store(V,M) memcpy((uchar*) V,(uchar*) (&M),sizeof(float))
-#define floatstore(T,V) memcpy((uchar*)(T), (uchar*)(&V),sizeof(float))
-#define floatget(V,M) memcpy((uchar*) &V,(uchar*) (M),sizeof(float))
-#define float8store(V,M) doublestore((V),(M))
-#else
-
-/*
- We're here if it's not a IA-32 architecture (Win32 and UNIX IA-32 defines
- were done before)
-*/
-#define sint2korr(A) (int16) (((int16) ((uchar) (A)[0])) +\
- ((int16) ((int16) (A)[1]) << 8))
-#define sint3korr(A) ((int32) ((((uchar) (A)[2]) & 128) ? \
- (((uint32) 255L << 24) | \
- (((uint32) (uchar) (A)[2]) << 16) |\
- (((uint32) (uchar) (A)[1]) << 8) | \
- ((uint32) (uchar) (A)[0])) : \
- (((uint32) (uchar) (A)[2]) << 16) |\
- (((uint32) (uchar) (A)[1]) << 8) | \
- ((uint32) (uchar) (A)[0])))
-#define sint4korr(A) (int32) (((int32) ((uchar) (A)[0])) +\
- (((int32) ((uchar) (A)[1]) << 8)) +\
- (((int32) ((uchar) (A)[2]) << 16)) +\
- (((int32) ((int16) (A)[3]) << 24)))
-#define sint8korr(A) (longlong) uint8korr(A)
-#define uint2korr(A) (uint16) (((uint16) ((uchar) (A)[0])) +\
- ((uint16) ((uchar) (A)[1]) << 8))
-#define uint3korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\
- (((uint32) ((uchar) (A)[1])) << 8) +\
- (((uint32) ((uchar) (A)[2])) << 16))
-#define uint4korr(A) (uint32) (((uint32) ((uchar) (A)[0])) +\
- (((uint32) ((uchar) (A)[1])) << 8) +\
- (((uint32) ((uchar) (A)[2])) << 16) +\
- (((uint32) ((uchar) (A)[3])) << 24))
-#define uint5korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\
- (((uint32) ((uchar) (A)[1])) << 8) +\
- (((uint32) ((uchar) (A)[2])) << 16) +\
- (((uint32) ((uchar) (A)[3])) << 24)) +\
- (((ulonglong) ((uchar) (A)[4])) << 32))
-#define uint6korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) + \
- (((uint32) ((uchar) (A)[1])) << 8) + \
- (((uint32) ((uchar) (A)[2])) << 16) + \
- (((uint32) ((uchar) (A)[3])) << 24)) + \
- (((ulonglong) ((uchar) (A)[4])) << 32) + \
- (((ulonglong) ((uchar) (A)[5])) << 40))
-#define uint8korr(A) ((ulonglong)(((uint32) ((uchar) (A)[0])) +\
- (((uint32) ((uchar) (A)[1])) << 8) +\
- (((uint32) ((uchar) (A)[2])) << 16) +\
- (((uint32) ((uchar) (A)[3])) << 24)) +\
- (((ulonglong) (((uint32) ((uchar) (A)[4])) +\
- (((uint32) ((uchar) (A)[5])) << 8) +\
- (((uint32) ((uchar) (A)[6])) << 16) +\
- (((uint32) ((uchar) (A)[7])) << 24))) <<\
- 32))
-#define int2store(T,A) do { uint def_temp= (uint) (A) ;\
- *((uchar*) (T))= (uchar)(def_temp); \
- *((uchar*) (T)+1)=(uchar)((def_temp >> 8)); \
- } while(0)
-#define int3store(T,A) do { /*lint -save -e734 */\
- *((uchar*)(T))=(uchar) ((A));\
- *((uchar*) (T)+1)=(uchar) (((A) >> 8));\
- *((uchar*)(T)+2)=(uchar) (((A) >> 16)); \
- /*lint -restore */} while(0)
-#define int4store(T,A) do { *((char *)(T))=(char) ((A));\
- *(((char *)(T))+1)=(char) (((A) >> 8));\
- *(((char *)(T))+2)=(char) (((A) >> 16));\
- *(((char *)(T))+3)=(char) (((A) >> 24)); } while(0)
-#define int5store(T,A) do { *((char *)(T))= (char)((A)); \
- *(((char *)(T))+1)= (char)(((A) >> 8)); \
- *(((char *)(T))+2)= (char)(((A) >> 16)); \
- *(((char *)(T))+3)= (char)(((A) >> 24)); \
- *(((char *)(T))+4)= (char)(((A) >> 32)); \
- } while(0)
-#define int6store(T,A) do { *((char *)(T))= (char)((A)); \
- *(((char *)(T))+1)= (char)(((A) >> 8)); \
- *(((char *)(T))+2)= (char)(((A) >> 16)); \
- *(((char *)(T))+3)= (char)(((A) >> 24)); \
- *(((char *)(T))+4)= (char)(((A) >> 32)); \
- *(((char *)(T))+5)= (char)(((A) >> 40)); \
- } while(0)
-#define int8store(T,A) do { uint def_temp= (uint) (A), def_temp2= (uint) ((A) >> 32); \
- int4store((T),def_temp); \
- int4store((T+4),def_temp2); } while(0)
-#ifdef WORDS_BIGENDIAN
-#define float4store(T,A) do { *(T)= ((uchar *) &A)[3];\
- *((T)+1)=(char) ((uchar *) &A)[2];\
- *((T)+2)=(char) ((uchar *) &A)[1];\
- *((T)+3)=(char) ((uchar *) &A)[0]; } while(0)
-
-#define float4get(V,M) do { float def_temp;\
- ((uchar*) &def_temp)[0]=(M)[3];\
- ((uchar*) &def_temp)[1]=(M)[2];\
- ((uchar*) &def_temp)[2]=(M)[1];\
- ((uchar*) &def_temp)[3]=(M)[0];\
- (V)=def_temp; } while(0)
-#define float8store(T,V) do { *(T)= ((uchar *) &V)[7];\
- *((T)+1)=(char) ((uchar *) &V)[6];\
- *((T)+2)=(char) ((uchar *) &V)[5];\
- *((T)+3)=(char) ((uchar *) &V)[4];\
- *((T)+4)=(char) ((uchar *) &V)[3];\
- *((T)+5)=(char) ((uchar *) &V)[2];\
- *((T)+6)=(char) ((uchar *) &V)[1];\
- *((T)+7)=(char) ((uchar *) &V)[0]; } while(0)
-
-#define float8get(V,M) do { double def_temp;\
- ((uchar*) &def_temp)[0]=(M)[7];\
- ((uchar*) &def_temp)[1]=(M)[6];\
- ((uchar*) &def_temp)[2]=(M)[5];\
- ((uchar*) &def_temp)[3]=(M)[4];\
- ((uchar*) &def_temp)[4]=(M)[3];\
- ((uchar*) &def_temp)[5]=(M)[2];\
- ((uchar*) &def_temp)[6]=(M)[1];\
- ((uchar*) &def_temp)[7]=(M)[0];\
- (V) = def_temp; } while(0)
-#else
-#define float4get(V,M) memcpy(&V, (M), sizeof(float))
-#define float4store(V,M) memcpy(V, (&M), sizeof(float))
-
-#if defined(__FLOAT_WORD_ORDER) && (__FLOAT_WORD_ORDER == __BIG_ENDIAN)
-#define doublestore(T,V) do { *(((char*)T)+0)=(char) ((uchar *) &V)[4];\
- *(((char*)T)+1)=(char) ((uchar *) &V)[5];\
- *(((char*)T)+2)=(char) ((uchar *) &V)[6];\
- *(((char*)T)+3)=(char) ((uchar *) &V)[7];\
- *(((char*)T)+4)=(char) ((uchar *) &V)[0];\
- *(((char*)T)+5)=(char) ((uchar *) &V)[1];\
- *(((char*)T)+6)=(char) ((uchar *) &V)[2];\
- *(((char*)T)+7)=(char) ((uchar *) &V)[3]; }\
- while(0)
-#define doubleget(V,M) do { double def_temp;\
- ((uchar*) &def_temp)[0]=(M)[4];\
- ((uchar*) &def_temp)[1]=(M)[5];\
- ((uchar*) &def_temp)[2]=(M)[6];\
- ((uchar*) &def_temp)[3]=(M)[7];\
- ((uchar*) &def_temp)[4]=(M)[0];\
- ((uchar*) &def_temp)[5]=(M)[1];\
- ((uchar*) &def_temp)[6]=(M)[2];\
- ((uchar*) &def_temp)[7]=(M)[3];\
- (V) = def_temp; } while(0)
-#endif /* __FLOAT_WORD_ORDER */
-
-#define float8get(V,M) doubleget((V),(M))
-#define float8store(V,M) doublestore((V),(M))
-#endif /* WORDS_BIGENDIAN */
-
-#endif /* __i386__ OR _WIN32 */
-
-/*
- Macro for reading 32-bit integer from network byte order (big-endian)
- from unaligned memory location.
-*/
-#define int4net(A) (int32) (((uint32) ((uchar) (A)[3])) |\
- (((uint32) ((uchar) (A)[2])) << 8) |\
- (((uint32) ((uchar) (A)[1])) << 16) |\
- (((uint32) ((uchar) (A)[0])) << 24))
-/*
- Define-funktions for reading and storing in machine format from/to
- short/long to/from some place in memory V should be a (not
- register) variable, M is a pointer to byte
-*/
-
-#ifdef WORDS_BIGENDIAN
-
-#define ushortget(V,M) do { V = (uint16) (((uint16) ((uchar) (M)[1]))+\
- ((uint16) ((uint16) (M)[0]) << 8)); } while(0)
-#define shortget(V,M) do { V = (short) (((short) ((uchar) (M)[1]))+\
- ((short) ((short) (M)[0]) << 8)); } while(0)
-#define longget(V,M) do { int32 def_temp;\
- ((uchar*) &def_temp)[0]=(M)[0];\
- ((uchar*) &def_temp)[1]=(M)[1];\
- ((uchar*) &def_temp)[2]=(M)[2];\
- ((uchar*) &def_temp)[3]=(M)[3];\
- (V)=def_temp; } while(0)
-#define ulongget(V,M) do { uint32 def_temp;\
- ((uchar*) &def_temp)[0]=(M)[0];\
- ((uchar*) &def_temp)[1]=(M)[1];\
- ((uchar*) &def_temp)[2]=(M)[2];\
- ((uchar*) &def_temp)[3]=(M)[3];\
- (V)=def_temp; } while(0)
-#define shortstore(T,A) do { uint def_temp=(uint) (A) ;\
- *(((char*)T)+1)=(char)(def_temp); \
- *(((char*)T)+0)=(char)(def_temp >> 8); } while(0)
-#define longstore(T,A) do { *(((char*)T)+3)=((A));\
- *(((char*)T)+2)=(((A) >> 8));\
- *(((char*)T)+1)=(((A) >> 16));\
- *(((char*)T)+0)=(((A) >> 24)); } while(0)
-
-#define floatget(V,M) memcpy(&V, (M), sizeof(float))
-#define floatstore(T,V) memcpy((T), (void*) (&V), sizeof(float))
-#define doubleget(V,M) memcpy(&V, (M), sizeof(double))
-#define doublestore(T,V) memcpy((T), (void *) &V, sizeof(double))
-#define longlongget(V,M) memcpy(&V, (M), sizeof(ulonglong))
-#define longlongstore(T,V) memcpy((T), &V, sizeof(ulonglong))
-
-#else
-
-#define ushortget(V,M) do { V = uint2korr(M); } while(0)
-#define shortget(V,M) do { V = sint2korr(M); } while(0)
-#define longget(V,M) do { V = sint4korr(M); } while(0)
-#define ulongget(V,M) do { V = uint4korr(M); } while(0)
-#define shortstore(T,V) int2store(T,V)
-#define longstore(T,V) int4store(T,V)
-#ifndef floatstore
-#define floatstore(T,V) memcpy((T), (void *) (&V), sizeof(float))
-#define floatget(V,M) memcpy(&V, (M), sizeof(float))
-#endif
-#ifndef doubleget
-#define doubleget(V,M) memcpy(&V, (M), sizeof(double))
-#define doublestore(T,V) memcpy((T), (void *) &V, sizeof(double))
-#endif /* doubleget */
-#define longlongget(V,M) memcpy(&V, (M), sizeof(ulonglong))
-#define longlongstore(T,V) memcpy((T), &V, sizeof(ulonglong))
-
-#endif /* WORDS_BIGENDIAN */
+#include <my_byteorder.h>
#ifdef HAVE_CHARSET_utf8
#define MYSQL_UNIVERSAL_CLIENT_CHARSET "utf8"
@@ -1397,10 +1107,6 @@ static inline char *dlerror(void)
#endif
/* Define some useful general macros (should be done after all headers). */
-#if !defined(max)
-#define max(a, b) ((a) > (b) ? (a) : (b))
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#endif
#define MY_MAX(a, b) ((a) > (b) ? (a) : (b))
#define MY_MIN(a, b) ((a) < (b) ? (a) : (b))
diff --git a/include/my_handler_errors.h b/include/my_handler_errors.h
index f2c51773e83..24b977c38ce 100644
--- a/include/my_handler_errors.h
+++ b/include/my_handler_errors.h
@@ -84,8 +84,10 @@ static const char *handler_error_messages[]=
"Index column length exceeds limit",
"Index corrupted",
"Undo record too big",
- "Table is being used in foreign key check",
"Invalid InnoDB FTS Doc ID",
+ "Table is being used in foreign key check",
+ "Tablespace already exists",
+ "Too many columns",
"Row is not visible by the current transaction",
"Operation was interrupted by end user (probably kill command?)",
"Disk full"
diff --git a/include/my_md5.h b/include/my_md5.h
index 4f90541067b..5a0c60e7bfa 100644
--- a/include/my_md5.h
+++ b/include/my_md5.h
@@ -1,7 +1,8 @@
#ifndef MY_MD5_INCLUDED
#define MY_MD5_INCLUDED
-/* Copyright (C) 2000 MySQL AB
+/* Copyright (c) 2000, 2012, Oracle and/or its affiliates.
+ Copyright (c) 2013 Monty Program Ab
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -16,79 +17,36 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-/* See md5.c for explanation and copyright information. */
+#include "m_string.h"
-/*
- * $FreeBSD: src/contrib/cvs/lib/md5.h,v 1.2 1999/12/11 15:10:02 peter Exp $
- */
+#define MD5_HASH_SIZE 16 /* Hash size in bytes */
-#if defined(HAVE_YASSL) || defined(HAVE_OPENSSL)
/*
- Use MD5 implementation provided by the SSL libraries.
+ Wrapper function for MD5 implementation.
*/
-
-#if defined(HAVE_YASSL)
-
-C_MODE_START
-
-void my_md5_hash(char *digest, const char *buf, int len);
-
-C_MODE_END
-
-#else /* HAVE_YASSL */
-
-#include <openssl/md5.h>
-
-#define MY_MD5_HASH(digest, buf, len) \
-do { \
- MD5_CTX ctx; \
- MD5_Init (&ctx); \
- MD5_Update (&ctx, buf, len); \
- MD5_Final (digest, &ctx); \
-} while (0)
-
-#endif /* HAVE_YASSL */
-
-#else /* HAVE_YASSL || HAVE_OPENSSL */
-/* Fallback to the MySQL's implementation. */
-
-/* Unlike previous versions of this code, uint32 need not be exactly
- 32 bits, merely 32 bits or more. Choosing a data type which is 32
- bits instead of 64 is not important; speed is considerably more
- important. ANSI guarantees that "unsigned long" will be big enough,
- and always using it seems to have few disadvantages. */
-typedef uint32 cvs_uint32;
-
-typedef struct {
- cvs_uint32 buf[4];
- cvs_uint32 bits[2];
- unsigned char in[64];
-} my_MD5Context;
-
-C_MODE_START
-
-void my_MD5Init (my_MD5Context *context);
-void my_MD5Update (my_MD5Context *context,
- unsigned char const *buf, unsigned len);
-void my_MD5Final (unsigned char digest[16],
- my_MD5Context *context);
-
-C_MODE_END
-
-#define MY_MD5_HASH(digest,buf,len) \
-do { \
- my_MD5Context ctx; \
- my_MD5Init (&ctx); \
- my_MD5Update (&ctx, buf, len); \
- my_MD5Final (digest, &ctx); \
-} while (0)
-
-#endif /* defined(HAVE_YASSL) || defined(HAVE_OPENSSL) */
-
-C_MODE_START
+#ifdef __cplusplus
+extern "C" {
+#endif
void compute_md5_hash(char *digest, const char *buf, int len);
-C_MODE_END
+/*
+ Convert an array of bytes to a hexadecimal representation.
+
+ Used to generate a hexadecimal representation of a message digest.
+*/
+static inline void array_to_hex(char *to, const unsigned char *str, uint len)
+{
+ const unsigned char *str_end= str + len;
+ for (; str != str_end; ++str)
+ {
+ *to++= _dig_vec_lower[((uchar) *str) >> 4];
+ *to++= _dig_vec_lower[((uchar) *str) & 0x0F];
+ }
+}
+
+#ifdef __cplusplus
+}
+#endif
#endif /* MY_MD5_INCLUDED */
diff --git a/include/my_rnd.h b/include/my_rnd.h
new file mode 100644
index 00000000000..b4a5d735811
--- /dev/null
+++ b/include/my_rnd.h
@@ -0,0 +1,32 @@
+/* Copyright (C) 2013 Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 or later of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+
+#ifndef _my_rnd_h
+#define _my_rnd_h
+
+C_MODE_START
+
+struct my_rnd_struct {
+ unsigned long seed1,seed2,max_value;
+ double max_value_dbl;
+};
+
+void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2);
+double my_rnd(struct my_rnd_struct *rand_st);
+double my_rnd_ssl(struct my_rnd_struct *rand_st);
+
+C_MODE_END
+
+#endif /* _my_rnd_h */
diff --git a/include/my_sys.h b/include/my_sys.h
index 42ee9c915da..1a9c5f887a8 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -173,8 +173,6 @@ extern void *my_memdup(const void *from,size_t length,myf MyFlags);
extern char *my_strdup(const char *from,myf MyFlags);
extern char *my_strndup(const char *from, size_t length, myf MyFlags);
-extern int sf_leaking_memory; /* set to 1 to disable memleak detection */
-
#ifdef HAVE_LARGE_PAGES
extern uint my_get_large_page_size(void);
extern uchar * my_large_malloc(size_t size, myf my_flags);
@@ -198,14 +196,18 @@ extern void my_large_free(uchar *ptr);
#endif /* GNUC */
#define my_alloca(SZ) alloca((size_t) (SZ))
#define my_afree(PTR) ((void)0)
+#define my_safe_alloca(size, max_alloca_sz) ((size <= max_alloca_sz) ? \
+ my_alloca(size) : \
+ my_malloc(size, MYF(0)))
+#define my_safe_afree(ptr, size, max_alloca_sz) if (size > max_alloca_sz) \
+ my_free(ptr)
#else
#define my_alloca(SZ) my_malloc(SZ,MYF(MY_FAE))
#define my_afree(PTR) my_free(PTR)
+#define my_safe_alloca(size, max_alloca_sz) my_alloca(size)
+#define my_safe_afree(ptr, size, max_alloca_sz) my_afree(ptr)
#endif /* HAVE_ALLOCA */
-#define my_safe_alloca(size, min_length) ((size <= min_length) ? my_alloca(size) : my_malloc(size,MYF(MY_FAE)))
-#define my_safe_afree(ptr, size, min_length) ((size <= min_length) ? my_afree(ptr) : my_free(ptr))
-
#ifndef errno /* did we already get it? */
#ifdef HAVE_ERRNO_AS_DEFINE
#include <errno.h> /* errno is a define */
@@ -222,6 +224,7 @@ extern void (*fatal_error_handler_hook)(uint my_err, const char *str,
myf MyFlags);
extern uint my_file_limit;
extern ulonglong my_thread_stack_size;
+extern int sf_leaking_memory; /* set to 1 to disable memleak detection */
extern void (*proc_info_hook)(void *, const PSI_stage_info *, PSI_stage_info *,
const char *, const char *, const unsigned int);
@@ -264,11 +267,6 @@ extern my_bool my_disable_locking, my_disable_async_io,
extern my_bool my_disable_sync;
extern char wild_many,wild_one,wild_prefix;
extern const char *charsets_dir;
-/* from default.c */
-extern const char *my_defaults_extra_file;
-extern const char *my_defaults_group_suffix;
-extern const char *my_defaults_file;
-
extern my_bool timed_mutexes;
enum loglevel {
@@ -565,13 +563,8 @@ my_off_t my_b_safe_tell(IO_CACHE* info); /* picks the correct tell() */
typedef uint32 ha_checksum;
extern ulong my_crc_dbug_check;
-/* Define the type of function to be passed to process_default_option_files */
-typedef int (*Process_option_func)(void *ctx, const char *group_name,
- const char *option);
-
#include <my_alloc.h>
-
/* Prototypes for mysys and my_func functions */
extern int my_copy(const char *from,const char *to,myf MyFlags);
@@ -630,6 +623,13 @@ extern int my_access(const char *path, int amode);
extern int check_if_legal_filename(const char *path);
extern int check_if_legal_tablename(const char *path);
+#ifdef __WIN__
+extern my_bool is_filename_allowed(const char *name, size_t length,
+ my_bool allow_current_dir);
+#else /* __WIN__ */
+# define is_filename_allowed(name, length, allow_cwd) (TRUE)
+#endif /* __WIN__ */
+
#ifdef _WIN32
extern int nt_share_delete(const char *name,myf MyFlags);
#define my_delete_allow_opened(fname,flags) nt_share_delete((fname),(flags))
@@ -662,15 +662,16 @@ extern void thr_set_sync_wait_callback(void (*before_sync)(void),
extern int my_sync(File fd, myf my_flags);
extern int my_sync_dir(const char *dir_name, myf my_flags);
extern int my_sync_dir_by_file(const char *file_name, myf my_flags);
-extern void my_error(int nr,myf MyFlags, ...);
+extern const char *my_get_err_msg(uint nr);
+extern void my_error(uint nr,myf MyFlags, ...);
extern void my_printf_error(uint my_err, const char *format,
myf MyFlags, ...)
ATTRIBUTE_FORMAT(printf, 2, 4);
extern void my_printv_error(uint error, const char *format, myf MyFlags,
va_list ap);
extern int my_error_register(const char** (*get_errmsgs) (),
- int first, int last);
-extern const char **my_error_unregister(int first, int last);
+ uint first, uint last);
+extern const char **my_error_unregister(uint first, uint last);
extern void my_message(uint my_err, const char *str,myf MyFlags);
extern void my_message_stderr(uint my_err, const char *str, myf MyFlags);
extern my_bool my_init(void);
@@ -781,7 +782,8 @@ extern size_t my_b_gets(IO_CACHE *info, char *to, size_t max_length);
extern my_off_t my_b_filelength(IO_CACHE *info);
extern size_t my_b_write_backtick_quote(IO_CACHE *info, const char *str,
size_t len);
-extern size_t my_b_printf(IO_CACHE *info, const char* fmt, ...);
+extern size_t my_b_printf(IO_CACHE *info, const char* fmt, ...)
+ ATTRIBUTE_FORMAT(printf, 2, 3);
extern size_t my_b_vprintf(IO_CACHE *info, const char* fmt, va_list ap);
extern my_bool open_cached_file(IO_CACHE *cache,const char *dir,
const char *prefix, size_t cache_size,
@@ -860,22 +862,6 @@ static inline char *safe_strdup_root(MEM_ROOT *root, const char *str)
}
extern char *strmake_root(MEM_ROOT *root,const char *str,size_t len);
extern void *memdup_root(MEM_ROOT *root,const void *str, size_t len);
-extern int get_defaults_options(int argc, char **argv,
- char **defaults, char **extra_defaults,
- char **group_suffix);
-extern my_bool my_getopt_use_args_separator;
-extern my_bool my_getopt_is_args_separator(const char* arg);
-extern int my_load_defaults(const char *conf_file, const char **groups,
- int *argc, char ***argv, const char ***);
-extern int load_defaults(const char *conf_file, const char **groups,
- int *argc, char ***argv);
-extern int my_search_option_files(const char *conf_file, int *argc,
- char ***argv, uint *args_used,
- Process_option_func func, void *func_ctx,
- const char **default_directories);
-extern void free_defaults(char **argv);
-extern void my_print_default_files(const char *conf_file);
-extern void print_defaults(const char *conf_file, const char **groups);
extern my_bool my_compress(uchar *, size_t *, size_t *);
extern my_bool my_uncompress(uchar *, size_t , size_t *);
extern uchar *my_compress_alloc(const uchar *packet, size_t *len,
@@ -967,14 +953,6 @@ void my_uuid(uchar *guid);
void my_uuid2str(const uchar *guid, char *s);
void my_uuid_end();
-struct my_rnd_struct {
- unsigned long seed1,seed2,max_value;
- double max_value_dbl;
-};
-
-void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2);
-double my_rnd(struct my_rnd_struct *rand_st);
-
/* character sets */
extern uint get_charset_number(const char *cs_name, uint cs_flags);
extern uint get_collation_number(const char *name);
@@ -1037,6 +1015,5 @@ void my_init_mysys_psi_keys(void);
struct st_mysql_file;
extern struct st_mysql_file *mysql_stdin;
-
C_MODE_END
#endif /* _my_sys_h */
diff --git a/include/my_time.h b/include/my_time.h
index 9bd545bb850..c7a3e17d236 100644
--- a/include/my_time.h
+++ b/include/my_time.h
@@ -138,8 +138,8 @@ void my_init_time(void);
estimate.
RETURN VALUES
- FALSE The value seems sane
- TRUE The MYSQL_TIME value is definitely out of range
+ TRUE The value seems sane
+ FALSE The MYSQL_TIME value is definitely out of range
*/
static inline my_bool validate_timestamp_range(const MYSQL_TIME *t)
diff --git a/include/mysql/client_authentication.h b/include/mysql/client_authentication.h
new file mode 100644
index 00000000000..2bd2fc98bac
--- /dev/null
+++ b/include/mysql/client_authentication.h
@@ -0,0 +1,13 @@
+#ifndef CLIENT_AUTHENTICATION_H
+#define CLIENT_AUTHENTICATION_H
+#include "mysql.h"
+#include "mysql/client_plugin.h"
+
+C_MODE_START
+int sha256_password_auth_client(MYSQL_PLUGIN_VIO *vio, MYSQL *mysql);
+int sha256_password_init(char *, size_t, int, va_list);
+int sha256_password_deinit(void);
+C_MODE_END
+
+#endif
+
diff --git a/include/mysql/plugin.h b/include/mysql/plugin.h
index 38573180232..4220e73ee11 100644
--- a/include/mysql/plugin.h
+++ b/include/mysql/plugin.h
@@ -45,6 +45,8 @@ class Item;
#define MYSQL_THD void*
#endif
+typedef void * MYSQL_PLUGIN;
+
#include <mysql/services.h>
#define MYSQL_XIDDATASIZE 128
@@ -69,10 +71,10 @@ typedef struct st_mysql_xid MYSQL_XID;
*/
/* MySQL plugin interface version */
-#define MYSQL_PLUGIN_INTERFACE_VERSION 0x0103
+#define MYSQL_PLUGIN_INTERFACE_VERSION 0x0104
/* MariaDB plugin interface version */
-#define MARIA_PLUGIN_INTERFACE_VERSION 0x0104
+#define MARIA_PLUGIN_INTERFACE_VERSION 0x0105
/*
The allowable types of plugins
@@ -85,7 +87,8 @@ typedef struct st_mysql_xid MYSQL_XID;
#define MYSQL_AUDIT_PLUGIN 5 /* The Audit plugin type */
#define MYSQL_REPLICATION_PLUGIN 6 /* The replication plugin type */
#define MYSQL_AUTHENTICATION_PLUGIN 7 /* The authentication plugin type */
-#define MYSQL_MAX_PLUGIN_TYPE_NUM 8 /* The number of plugin types */
+#define MYSQL_VALIDATE_PASSWORD_PLUGIN 8 /* validate password plugin type */
+#define MYSQL_MAX_PLUGIN_TYPE_NUM 9 /* The number of plugin types */
/* We use the following strings to define licenses for plugins */
#define PLUGIN_LICENSE_PROPRIETARY 0
@@ -558,7 +561,7 @@ struct handlerton;
/*
API for Replication plugin. (MYSQL_REPLICATION_PLUGIN)
*/
- #define MYSQL_REPLICATION_INTERFACE_VERSION 0x0100
+ #define MYSQL_REPLICATION_INTERFACE_VERSION 0x0200
/**
Replication plugin descriptor
@@ -606,6 +609,7 @@ int thd_sql_command(const MYSQL_THD thd);
void **thd_ha_data(const MYSQL_THD thd, const struct handlerton *hton);
void thd_storage_lock_wait(MYSQL_THD thd, long long value);
int thd_tx_isolation(const MYSQL_THD thd);
+int thd_tx_is_read_only(const MYSQL_THD thd);
char *thd_security_context(MYSQL_THD thd, char *buffer, unsigned int length,
unsigned int max_query_len);
/* Increments the row counter, see THD::row_count */
diff --git a/include/mysql/plugin_audit.h.pp b/include/mysql/plugin_audit.h.pp
index fbd4ec2dd3f..c3ba7eec0d6 100644
--- a/include/mysql/plugin_audit.h.pp
+++ b/include/mysql/plugin_audit.h.pp
@@ -1,4 +1,5 @@
#include "plugin.h"
+typedef void * MYSQL_PLUGIN;
#include <mysql/services.h>
#include <mysql/service_my_snprintf.h>
extern struct my_snprintf_service_st {
@@ -232,6 +233,7 @@ int thd_sql_command(const void* thd);
void **thd_ha_data(const void* thd, const struct handlerton *hton);
void thd_storage_lock_wait(void* thd, long long value);
int thd_tx_isolation(const void* thd);
+int thd_tx_is_read_only(const void* thd);
char *thd_security_context(void* thd, char *buffer, unsigned int length,
unsigned int max_query_len);
void thd_inc_row_count(void* thd);
diff --git a/include/mysql/plugin_auth.h.pp b/include/mysql/plugin_auth.h.pp
index 46811825142..4f04d51cb52 100644
--- a/include/mysql/plugin_auth.h.pp
+++ b/include/mysql/plugin_auth.h.pp
@@ -1,4 +1,5 @@
#include <mysql/plugin.h>
+typedef void * MYSQL_PLUGIN;
#include <mysql/services.h>
#include <mysql/service_my_snprintf.h>
extern struct my_snprintf_service_st {
@@ -232,6 +233,7 @@ int thd_sql_command(const void* thd);
void **thd_ha_data(const void* thd, const struct handlerton *hton);
void thd_storage_lock_wait(void* thd, long long value);
int thd_tx_isolation(const void* thd);
+int thd_tx_is_read_only(const void* thd);
char *thd_security_context(void* thd, char *buffer, unsigned int length,
unsigned int max_query_len);
void thd_inc_row_count(void* thd);
diff --git a/include/mysql/plugin_ftparser.h.pp b/include/mysql/plugin_ftparser.h.pp
index 49cf7e5b931..3a978645c24 100644
--- a/include/mysql/plugin_ftparser.h.pp
+++ b/include/mysql/plugin_ftparser.h.pp
@@ -1,4 +1,5 @@
#include "plugin.h"
+typedef void * MYSQL_PLUGIN;
#include <mysql/services.h>
#include <mysql/service_my_snprintf.h>
extern struct my_snprintf_service_st {
@@ -185,6 +186,7 @@ int thd_sql_command(const void* thd);
void **thd_ha_data(const void* thd, const struct handlerton *hton);
void thd_storage_lock_wait(void* thd, long long value);
int thd_tx_isolation(const void* thd);
+int thd_tx_is_read_only(const void* thd);
char *thd_security_context(void* thd, char *buffer, unsigned int length,
unsigned int max_query_len);
void thd_inc_row_count(void* thd);
diff --git a/include/mysql/psi/mysql_file.h b/include/mysql/psi/mysql_file.h
index 816ac713631..c226258f462 100644
--- a/include/mysql/psi/mysql_file.h
+++ b/include/mysql/psi/mysql_file.h
@@ -518,7 +518,7 @@ static inline void inline_mysql_file_register(
)
{
#ifdef HAVE_PSI_FILE_INTERFACE
- PSI_CALL(register_file)(category, info, count);
+ PSI_FILE_CALL(register_file)(category, info, count);
#endif
}
@@ -533,13 +533,13 @@ inline_mysql_file_fgets(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_READ);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_READ);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) size, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) size, src_file, src_line);
result= fgets(str, size, file->m_file);
- PSI_CALL(end_file_wait)(locker, result ? strlen(result) : 0);
+ PSI_FILE_CALL(end_file_wait)(locker, result ? strlen(result) : 0);
return result;
}
#endif
@@ -559,13 +559,13 @@ inline_mysql_file_fgetc(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_READ);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_READ);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 1, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 1, src_file, src_line);
result= fgetc(file->m_file);
- PSI_CALL(end_file_wait)(locker, (size_t) 1);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) 1);
return result;
}
#endif
@@ -586,14 +586,14 @@ inline_mysql_file_fputs(
struct PSI_file_locker *locker;
PSI_file_locker_state state;
size_t bytes;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_WRITE);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_WRITE);
if (likely(locker != NULL))
{
bytes= str ? strlen(str) : 0;
- PSI_CALL(start_file_wait)(locker, bytes, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, bytes, src_file, src_line);
result= fputs(str, file->m_file);
- PSI_CALL(end_file_wait)(locker, bytes);
+ PSI_FILE_CALL(end_file_wait)(locker, bytes);
return result;
}
#endif
@@ -613,13 +613,13 @@ inline_mysql_file_fputc(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_WRITE);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_WRITE);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 1, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 1, src_file, src_line);
result= fputc(c, file->m_file);
- PSI_CALL(end_file_wait)(locker, (size_t) 1);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) 1);
return result;
}
#endif
@@ -639,15 +639,15 @@ inline_mysql_file_fprintf(MYSQL_FILE *file, const char *format, ...)
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_WRITE);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_WRITE);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, __FILE__, __LINE__);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, __FILE__, __LINE__);
va_start(args, format);
result= vfprintf(file->m_file, format, args);
va_end(args);
- PSI_CALL(end_file_wait)(locker, (size_t) result);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) result);
return result;
}
#endif
@@ -669,13 +669,13 @@ inline_mysql_file_vfprintf(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_WRITE);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_WRITE);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
result= vfprintf(file->m_file, format, args);
- PSI_CALL(end_file_wait)(locker, (size_t) result);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) result);
return result;
}
#endif
@@ -695,13 +695,13 @@ inline_mysql_file_fflush(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_FLUSH);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_FLUSH);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
result= fflush(file->m_file);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
return result;
}
#endif
@@ -727,13 +727,13 @@ inline_mysql_file_fstat(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, filenr,
- PSI_FILE_FSTAT);
+ locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+ (&state, filenr, PSI_FILE_FSTAT);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
result= my_fstat(filenr, stat_area, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
return result;
}
#endif
@@ -753,14 +753,13 @@ inline_mysql_file_stat(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_name_locker)(&state,
- key, PSI_FILE_STAT,
- path, &locker);
+ locker= PSI_FILE_CALL(get_thread_file_name_locker)
+ (&state, key, PSI_FILE_STAT, path, &locker);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_open_wait)(locker, src_file, src_line);
+ PSI_FILE_CALL(start_file_open_wait)(locker, src_file, src_line);
result= my_stat(path, stat_area, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_open_wait)(locker, result);
return result;
}
#endif
@@ -780,14 +779,14 @@ inline_mysql_file_chsize(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file,
- PSI_FILE_CHSIZE);
+ locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+ (&state, file, PSI_FILE_CHSIZE);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) newlength, src_file,
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) newlength, src_file,
src_line);
result= my_chsize(file, newlength, filler, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) newlength);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) newlength);
return result;
}
#endif
@@ -810,14 +809,14 @@ inline_mysql_file_fopen(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_name_locker)
+ locker= PSI_FILE_CALL(get_thread_file_name_locker)
(&state, key, PSI_FILE_STREAM_OPEN, filename, that);
if (likely(locker != NULL))
{
- that->m_psi= PSI_CALL(start_file_open_wait)(locker, src_file,
- src_line);
+ PSI_FILE_CALL(start_file_open_wait)
+ (locker, src_file, src_line);
that->m_file= my_fopen(filename, flags, myFlags);
- PSI_CALL(end_file_open_wait)(locker);
+ that->m_psi= PSI_FILE_CALL(end_file_open_wait)(locker, that->m_file);
if (unlikely(that->m_file == NULL))
{
my_free(that);
@@ -851,13 +850,13 @@ inline_mysql_file_fclose(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_STREAM_CLOSE);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_STREAM_CLOSE);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_close_wait)(locker, src_file, src_line);
result= my_fclose(file->m_file, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_close_wait)(locker, result);
my_free(file);
return result;
}
@@ -881,17 +880,17 @@ inline_mysql_file_fread(
struct PSI_file_locker *locker;
PSI_file_locker_state state;
size_t bytes_read;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_READ);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_READ);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
result= my_fread(file->m_file, buffer, count, flags);
if (flags & (MY_NABP | MY_FNABP))
bytes_read= (result == 0) ? count : 0;
else
bytes_read= (result != MY_FILE_ERROR) ? result : 0;
- PSI_CALL(end_file_wait)(locker, bytes_read);
+ PSI_FILE_CALL(end_file_wait)(locker, bytes_read);
return result;
}
#endif
@@ -912,17 +911,17 @@ inline_mysql_file_fwrite(
struct PSI_file_locker *locker;
PSI_file_locker_state state;
size_t bytes_written;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_WRITE);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_WRITE);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
result= my_fwrite(file->m_file, buffer, count, flags);
if (flags & (MY_NABP | MY_FNABP))
bytes_written= (result == 0) ? count : 0;
else
bytes_written= (result != MY_FILE_ERROR) ? result : 0;
- PSI_CALL(end_file_wait)(locker, bytes_written);
+ PSI_FILE_CALL(end_file_wait)(locker, bytes_written);
return result;
}
#endif
@@ -942,13 +941,13 @@ inline_mysql_file_fseek(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_SEEK);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_SEEK);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
result= my_fseek(file->m_file, pos, whence, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
return result;
}
#endif
@@ -968,13 +967,13 @@ inline_mysql_file_ftell(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_stream_locker)(&state, file->m_psi,
- PSI_FILE_TELL);
+ locker= PSI_FILE_CALL(get_thread_file_stream_locker)
+ (&state, file->m_psi, PSI_FILE_TELL);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
result= my_ftell(file->m_file, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
return result;
}
#endif
@@ -994,13 +993,13 @@ inline_mysql_file_create(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_CREATE,
- filename, &locker);
+ locker= PSI_FILE_CALL(get_thread_file_name_locker)
+ (&state, key, PSI_FILE_CREATE, filename, &locker);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_open_wait)(locker, src_file, src_line);
+ PSI_FILE_CALL(start_file_open_wait)(locker, src_file, src_line);
file= my_create(filename, create_flags, access_flags, myFlags);
- PSI_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
+ PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
return file;
}
#endif
@@ -1024,7 +1023,7 @@ inline_mysql_file_create_temp(
*/
file= create_temp_file(to, dir, pfx, mode, myFlags);
#ifdef HAVE_PSI_FILE_INTERFACE
- PSI_CALL(create_file)(key, to, file);
+ PSI_FILE_CALL(create_file)(key, to, file);
#endif
return file;
}
@@ -1040,13 +1039,13 @@ inline_mysql_file_open(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_OPEN,
- filename, &locker);
+ locker= PSI_FILE_CALL(get_thread_file_name_locker)
+ (&state, key, PSI_FILE_OPEN, filename, &locker);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_open_wait)(locker, src_file, src_line);
+ PSI_FILE_CALL(start_file_open_wait)(locker, src_file, src_line);
file= my_open(filename, flags, myFlags);
- PSI_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
+ PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
return file;
}
#endif
@@ -1066,13 +1065,13 @@ inline_mysql_file_close(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file,
- PSI_FILE_CLOSE);
+ locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+ (&state, file, PSI_FILE_CLOSE);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_close_wait)(locker, src_file, src_line);
result= my_close(file, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_close_wait)(locker, result);
return result;
}
#endif
@@ -1093,17 +1092,17 @@ inline_mysql_file_read(
struct PSI_file_locker *locker;
PSI_file_locker_state state;
size_t bytes_read;
- locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file,
- PSI_FILE_READ);
+ locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+ (&state, file, PSI_FILE_READ);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
result= my_read(file, buffer, count, flags);
if (flags & (MY_NABP | MY_FNABP))
bytes_read= (result == 0) ? count : 0;
else
bytes_read= (result != MY_FILE_ERROR) ? result : 0;
- PSI_CALL(end_file_wait)(locker, bytes_read);
+ PSI_FILE_CALL(end_file_wait)(locker, bytes_read);
return result;
}
#endif
@@ -1124,17 +1123,17 @@ inline_mysql_file_write(
struct PSI_file_locker *locker;
PSI_file_locker_state state;
size_t bytes_written;
- locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file,
- PSI_FILE_WRITE);
+ locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+ (&state, file, PSI_FILE_WRITE);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
result= my_write(file, buffer, count, flags);
if (flags & (MY_NABP | MY_FNABP))
bytes_written= (result == 0) ? count : 0;
else
bytes_written= (result != MY_FILE_ERROR) ? result : 0;
- PSI_CALL(end_file_wait)(locker, bytes_written);
+ PSI_FILE_CALL(end_file_wait)(locker, bytes_written);
return result;
}
#endif
@@ -1155,16 +1154,17 @@ inline_mysql_file_pread(
struct PSI_file_locker *locker;
PSI_file_locker_state state;
size_t bytes_read;
- locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file, PSI_FILE_READ);
+ locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+ (&state, file, PSI_FILE_READ);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
result= my_pread(file, buffer, count, offset, flags);
if (flags & (MY_NABP | MY_FNABP))
bytes_read= (result == 0) ? count : 0;
else
bytes_read= (result != MY_FILE_ERROR) ? result : 0;
- PSI_CALL(end_file_wait)(locker, bytes_read);
+ PSI_FILE_CALL(end_file_wait)(locker, bytes_read);
return result;
}
#endif
@@ -1185,17 +1185,17 @@ inline_mysql_file_pwrite(
struct PSI_file_locker *locker;
PSI_file_locker_state state;
size_t bytes_written;
- locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file,
- PSI_FILE_WRITE);
+ locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+ (&state, file, PSI_FILE_WRITE);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, count, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, count, src_file, src_line);
result= my_pwrite(file, buffer, count, offset, flags);
if (flags & (MY_NABP | MY_FNABP))
bytes_written= (result == 0) ? count : 0;
else
bytes_written= (result != MY_FILE_ERROR) ? result : 0;
- PSI_CALL(end_file_wait)(locker, bytes_written);
+ PSI_FILE_CALL(end_file_wait)(locker, bytes_written);
return result;
}
#endif
@@ -1215,12 +1215,13 @@ inline_mysql_file_seek(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file, PSI_FILE_SEEK);
+ locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+ (&state, file, PSI_FILE_SEEK);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
result= my_seek(file, pos, whence, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
return result;
}
#endif
@@ -1240,12 +1241,13 @@ inline_mysql_file_tell(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, file, PSI_FILE_TELL);
+ locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+ (&state, file, PSI_FILE_TELL);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
result= my_tell(file, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
return result;
}
#endif
@@ -1265,13 +1267,13 @@ inline_mysql_file_delete(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_DELETE,
- name, &locker);
+ locker= PSI_FILE_CALL(get_thread_file_name_locker)
+ (&state, key, PSI_FILE_DELETE, name, &locker);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_close_wait)(locker, src_file, src_line);
result= my_delete(name, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_close_wait)(locker, result);
return result;
}
#endif
@@ -1291,13 +1293,13 @@ inline_mysql_file_rename(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_RENAME,
- to, &locker);
+ locker= PSI_FILE_CALL(get_thread_file_name_locker)
+ (&state, key, PSI_FILE_RENAME, to, &locker);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
result= my_rename(from, to, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
return result;
}
#endif
@@ -1318,14 +1320,14 @@ inline_mysql_file_create_with_symlink(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_CREATE,
- filename, &locker);
+ locker= PSI_FILE_CALL(get_thread_file_name_locker)
+ (&state, key, PSI_FILE_CREATE, filename, &locker);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_open_wait)(locker, src_file, src_line);
+ PSI_FILE_CALL(start_file_open_wait)(locker, src_file, src_line);
file= my_create_with_symlink(linkname, filename, create_flags, access_flags,
flags);
- PSI_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
+ PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(locker, file);
return file;
}
#endif
@@ -1346,13 +1348,13 @@ inline_mysql_file_delete_with_symlink(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_DELETE,
- name, &locker);
+ locker= PSI_FILE_CALL(get_thread_file_name_locker)
+ (&state, key, PSI_FILE_DELETE, name, &locker);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_close_wait)(locker, src_file, src_line);
result= my_delete_with_symlink(name, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_close_wait)(locker, result);
return result;
}
#endif
@@ -1372,13 +1374,13 @@ inline_mysql_file_rename_with_symlink(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_name_locker)(&state, key, PSI_FILE_RENAME,
- to, &locker);
+ locker= PSI_FILE_CALL(get_thread_file_name_locker)
+ (&state, key, PSI_FILE_RENAME, to, &locker);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
result= my_rename_with_symlink(from, to, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
return result;
}
#endif
@@ -1398,12 +1400,13 @@ inline_mysql_file_sync(
#ifdef HAVE_PSI_FILE_INTERFACE
struct PSI_file_locker *locker;
PSI_file_locker_state state;
- locker= PSI_CALL(get_thread_file_descriptor_locker)(&state, fd, PSI_FILE_SYNC);
+ locker= PSI_FILE_CALL(get_thread_file_descriptor_locker)
+ (&state, fd, PSI_FILE_SYNC);
if (likely(locker != NULL))
{
- PSI_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
+ PSI_FILE_CALL(start_file_wait)(locker, (size_t) 0, src_file, src_line);
result= my_sync(fd, flags);
- PSI_CALL(end_file_wait)(locker, (size_t) 0);
+ PSI_FILE_CALL(end_file_wait)(locker, (size_t) 0);
return result;
}
#endif
diff --git a/include/mysql/psi/mysql_idle.h b/include/mysql/psi/mysql_idle.h
index 7a3fccfdb8c..c53d0ceb8c7 100644
--- a/include/mysql/psi/mysql_idle.h
+++ b/include/mysql/psi/mysql_idle.h
@@ -70,7 +70,7 @@ inline_mysql_start_idle_wait(PSI_idle_locker_state *state,
const char *src_file, int src_line)
{
struct PSI_idle_locker *locker;
- locker= PSI_CALL(start_idle_wait)(state, src_file, src_line);
+ locker= PSI_IDLE_CALL(start_idle_wait)(state, src_file, src_line);
return locker;
}
@@ -82,7 +82,7 @@ static inline void
inline_mysql_end_idle_wait(struct PSI_idle_locker *locker)
{
if (likely(locker != NULL))
- PSI_CALL(end_idle_wait)(locker);
+ PSI_IDLE_CALL(end_idle_wait)(locker);
}
#endif
diff --git a/include/mysql/psi/mysql_socket.h b/include/mysql/psi/mysql_socket.h
index c908032883a..e1d56539f85 100644
--- a/include/mysql/psi/mysql_socket.h
+++ b/include/mysql/psi/mysql_socket.h
@@ -29,6 +29,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
#ifdef __WIN__
#include <ws2def.h>
#include <winsock2.h>
+ #include <MSWSock.h>
#define SOCKBUF_T char
#else
#include <netinet/in.h>
@@ -121,7 +122,7 @@ mysql_socket_set_address(
{
#ifdef HAVE_PSI_SOCKET_INTERFACE
if (socket.m_psi != NULL)
- PSI_CALL(set_socket_info)(socket.m_psi, NULL, addr, addr_len);
+ PSI_SOCKET_CALL(set_socket_info)(socket.m_psi, NULL, addr, addr_len);
#endif
}
@@ -141,7 +142,7 @@ MYSQL_SOCKET socket __attribute__ ((unused))
{
#ifdef HAVE_PSI_SOCKET_INTERFACE
if (socket.m_psi != NULL)
- PSI_CALL(set_socket_thread_owner)(socket.m_psi);
+ PSI_SOCKET_CALL(set_socket_thread_owner)(socket.m_psi);
#endif
}
@@ -247,8 +248,8 @@ inline_mysql_start_socket_wait(PSI_socket_locker_state *state,
struct PSI_socket_locker *locker;
if (mysql_socket.m_psi != NULL)
{
- locker= PSI_CALL(start_socket_wait)(state, mysql_socket.m_psi, op,
- byte_count, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (state, mysql_socket.m_psi, op, byte_count, src_file, src_line);
}
else
locker= NULL;
@@ -263,7 +264,7 @@ static inline void
inline_mysql_end_socket_wait(struct PSI_socket_locker *locker, size_t byte_count)
{
if (locker != NULL)
- PSI_CALL(end_socket_wait)(locker, byte_count);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, byte_count);
}
/**
@@ -276,7 +277,7 @@ static inline void
inline_mysql_socket_set_state(MYSQL_SOCKET socket, enum PSI_socket_state state)
{
if (socket.m_psi != NULL)
- PSI_CALL(set_socket_state)(socket.m_psi, state);
+ PSI_SOCKET_CALL(set_socket_state)(socket.m_psi, state);
}
#endif /* HAVE_PSI_SOCKET_INTERFACE */
@@ -537,7 +538,7 @@ static inline void inline_mysql_socket_register(
PSI_socket_info *info,
int count)
{
- PSI_CALL(register_socket)(category, info, count);
+ PSI_SOCKET_CALL(register_socket)(category, info, count);
}
#endif
@@ -551,16 +552,15 @@ inline_mysql_socket_socket
#endif
int domain, int type, int protocol)
{
- MYSQL_SOCKET mysql_socket;
+ MYSQL_SOCKET mysql_socket= MYSQL_INVALID_SOCKET;
mysql_socket.fd= socket(domain, type, protocol);
#ifdef HAVE_PSI_SOCKET_INTERFACE
- mysql_socket.m_psi= PSI_CALL(init_socket)(key, (const my_socket*)&mysql_socket.fd);
-
- if (likely(mysql_socket.fd != INVALID_SOCKET && mysql_socket.m_psi != NULL))
- PSI_CALL(set_socket_info)(mysql_socket.m_psi, &mysql_socket.fd, NULL, 0);
-#else
- mysql_socket.m_psi= NULL;
+ if (likely(mysql_socket.fd != INVALID_SOCKET))
+ {
+ mysql_socket.m_psi= PSI_SOCKET_CALL(init_socket)
+ (key, (const my_socket*)&mysql_socket.fd, NULL, 0);
+ }
#endif
return mysql_socket;
}
@@ -583,17 +583,18 @@ inline_mysql_socket_bind
/* Instrumentation start */
PSI_socket_locker_state state;
PSI_socket_locker *locker;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
/* Instrumented code */
result= bind(mysql_socket.fd, addr, len);
/* Instrumentation end */
- PSI_CALL(set_socket_info)(mysql_socket.m_psi, NULL, addr, len);
+ if (result == 0)
+ PSI_SOCKET_CALL(set_socket_info)(mysql_socket.m_psi, NULL, addr, len);
if (locker != NULL)
- PSI_CALL(end_socket_wait)(locker, (size_t)0);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
return result;
}
@@ -622,15 +623,15 @@ inline_mysql_socket_getsockname
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
/* Instrumented code */
result= getsockname(mysql_socket.fd, addr, len);
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_socket_wait)(locker, (size_t)0);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
return result;
}
@@ -660,15 +661,15 @@ inline_mysql_socket_connect
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
/* Instrumented code */
result= connect(mysql_socket.fd, addr, len);
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_socket_wait)(locker, (size_t)0);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
return result;
}
@@ -698,15 +699,15 @@ inline_mysql_socket_getpeername
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_BIND, (size_t)0, src_file, src_line);
/* Instrumented code */
result= getpeername(mysql_socket.fd, addr, len);
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_socket_wait)(locker, (size_t)0);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
return result;
}
@@ -736,18 +737,18 @@ inline_mysql_socket_send
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_SEND, n, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_SEND, n, src_file, src_line);
/* Instrumented code */
- result= send(mysql_socket.fd, buf, n, flags);
+ result= send(mysql_socket.fd, buf, IF_WIN((int),) n, flags);
/* Instrumentation end */
if (locker != NULL)
{
size_t bytes_written;
bytes_written= (result > -1) ? result : 0;
- PSI_CALL(end_socket_wait)(locker, bytes_written);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, bytes_written);
}
return result;
@@ -755,7 +756,7 @@ inline_mysql_socket_send
#endif
/* Non instrumented code */
- result= send(mysql_socket.fd, buf, n, flags);
+ result= send(mysql_socket.fd, buf, IF_WIN((int),) n, flags);
return result;
}
@@ -778,18 +779,18 @@ inline_mysql_socket_recv
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_RECV, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_RECV, (size_t)0, src_file, src_line);
/* Instrumented code */
- result= recv(mysql_socket.fd, buf, n, flags);
+ result= recv(mysql_socket.fd, buf, IF_WIN((int),) n, flags);
/* Instrumentation end */
if (locker != NULL)
{
size_t bytes_read;
bytes_read= (result > -1) ? result : 0;
- PSI_CALL(end_socket_wait)(locker, bytes_read);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, bytes_read);
}
return result;
@@ -797,7 +798,7 @@ inline_mysql_socket_recv
#endif
/* Non instrumented code */
- result= recv(mysql_socket.fd, buf, n, flags);
+ result= recv(mysql_socket.fd, buf, IF_WIN((int),) n, flags);
return result;
}
@@ -820,18 +821,18 @@ inline_mysql_socket_sendto
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_SEND, n, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_SEND, n, src_file, src_line);
/* Instrumented code */
- result= sendto(mysql_socket.fd, buf, n, flags, addr, addr_len);
+ result= sendto(mysql_socket.fd, buf, IF_WIN((int),) n, flags, addr, addr_len);
/* Instrumentation end */
if (locker != NULL)
{
size_t bytes_written;
bytes_written = (result > -1) ? result : 0;
- PSI_CALL(end_socket_wait)(locker, bytes_written);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, bytes_written);
}
return result;
@@ -839,7 +840,7 @@ inline_mysql_socket_sendto
#endif
/* Non instrumented code */
- result= sendto(mysql_socket.fd, buf, n, flags, addr, addr_len);
+ result= sendto(mysql_socket.fd, buf, IF_WIN((int),) n, flags, addr, addr_len);
return result;
}
@@ -863,18 +864,18 @@ inline_mysql_socket_recvfrom
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_RECV, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_RECV, (size_t)0, src_file, src_line);
/* Instrumented code */
- result= recvfrom(mysql_socket.fd, buf, n, flags, addr, addr_len);
+ result= recvfrom(mysql_socket.fd, buf, IF_WIN((int),) n, flags, addr, addr_len);
/* Instrumentation end */
if (locker != NULL)
{
size_t bytes_read;
bytes_read = (result > -1) ? result : 0;
- PSI_CALL(end_socket_wait)(locker, bytes_read);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, bytes_read);
}
return result;
@@ -882,7 +883,7 @@ inline_mysql_socket_recvfrom
#endif
/* Non instrumented code */
- result= recvfrom(mysql_socket.fd, buf, n, flags, addr, addr_len);
+ result= recvfrom(mysql_socket.fd, buf, IF_WIN((int),) n, flags, addr, addr_len);
return result;
}
@@ -905,15 +906,15 @@ inline_mysql_socket_getsockopt
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_OPT, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_OPT, (size_t)0, src_file, src_line);
/* Instrumented code */
result= getsockopt(mysql_socket.fd, level, optname, optval, optlen);
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_socket_wait)(locker, (size_t)0);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
return result;
}
@@ -944,15 +945,15 @@ inline_mysql_socket_setsockopt
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_OPT, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_OPT, (size_t)0, src_file, src_line);
/* Instrumented code */
result= setsockopt(mysql_socket.fd, level, optname, optval, optlen);
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_socket_wait)(locker, (size_t)0);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
return result;
}
@@ -982,15 +983,15 @@ inline_mysql_socket_listen
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
/* Instrumented code */
result= listen(mysql_socket.fd, backlog);
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_socket_wait)(locker, (size_t)0);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
return result;
}
@@ -1021,15 +1022,15 @@ inline_mysql_socket_accept
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, socket_listen.m_psi,
- PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, socket_listen.m_psi, PSI_SOCKET_CONNECT, (size_t)0, src_file, src_line);
/* Instrumented code */
socket_accept.fd= accept(socket_listen.fd, addr, &addr_length);
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_socket_wait)(locker, (size_t)0);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
}
else
#endif
@@ -1039,14 +1040,12 @@ inline_mysql_socket_accept
}
#ifdef HAVE_PSI_SOCKET_INTERFACE
- /* Initialize the instrument with the new socket descriptor and address */
- socket_accept.m_psi=
- PSI_CALL(init_socket)(key, (const my_socket*)&socket_accept.fd);
-
- /* FIXME: simplify this with just 1 call to init_socket(). */
- if (socket_accept.m_psi != NULL)
- PSI_CALL(set_socket_info)(socket_accept.m_psi, &socket_accept.fd, addr,
- addr_length);
+ if (likely(socket_accept.fd != INVALID_SOCKET))
+ {
+ /* Initialize the instrument with the new socket descriptor and address */
+ socket_accept.m_psi= PSI_SOCKET_CALL(init_socket)
+ (key, (const my_socket*)&socket_accept.fd, addr, addr_length);
+ }
#endif
return socket_accept;
@@ -1070,18 +1069,18 @@ inline_mysql_socket_close
/* Instrumentation start */
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_CLOSE, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_CLOSE, (size_t)0, src_file, src_line);
/* Instrumented code */
result= closesocket(mysql_socket.fd);
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_socket_wait)(locker, (size_t)0);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
/* Remove the instrumentation for this socket. */
if (mysql_socket.m_psi != NULL)
- PSI_CALL(destroy_socket)(mysql_socket.m_psi);
+ PSI_SOCKET_CALL(destroy_socket)(mysql_socket.m_psi);
return result;
}
@@ -1105,28 +1104,53 @@ inline_mysql_socket_shutdown
{
int result;
- /* Instrumentation start */
+#ifdef __WIN__
+ static LPFN_DISCONNECTEX DisconnectEx = NULL;
+ if (DisconnectEx == NULL)
+ {
+ DWORD dwBytesReturned;
+ GUID guidDisconnectEx = WSAID_DISCONNECTEX;
+ WSAIoctl(mysql_socket.fd, SIO_GET_EXTENSION_FUNCTION_POINTER,
+ &guidDisconnectEx, sizeof(GUID),
+ &DisconnectEx, sizeof(DisconnectEx),
+ &dwBytesReturned, NULL, NULL);
+ }
+#endif
+
+/* Instrumentation start */
#ifdef HAVE_PSI_SOCKET_INTERFACE
if (mysql_socket.m_psi != NULL)
{
PSI_socket_locker *locker;
PSI_socket_locker_state state;
- locker= PSI_CALL(start_socket_wait)(&state, mysql_socket.m_psi,
- PSI_SOCKET_SHUTDOWN, (size_t)0, src_file, src_line);
+ locker= PSI_SOCKET_CALL(start_socket_wait)
+ (&state, mysql_socket.m_psi, PSI_SOCKET_SHUTDOWN, (size_t)0, src_file, src_line);
/* Instrumented code */
- result= shutdown(mysql_socket.fd, how);
+#ifdef __WIN__
+ if (DisconnectEx)
+ result= (DisconnectEx(mysql_socket.fd, (LPOVERLAPPED) NULL,
+ (DWORD) 0, (DWORD) 0) == TRUE) ? 0 : -1;
+ else
+#endif
+ result= shutdown(mysql_socket.fd, how);
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_socket_wait)(locker, (size_t)0);
+ PSI_SOCKET_CALL(end_socket_wait)(locker, (size_t)0);
return result;
}
#endif
/* Non instrumented code */
- result= shutdown(mysql_socket.fd, how);
+#ifdef __WIN__
+ if (DisconnectEx)
+ result= (DisconnectEx(mysql_socket.fd, (LPOVERLAPPED) NULL,
+ (DWORD) 0, (DWORD) 0) == TRUE) ? 0 : -1;
+ else
+#endif
+ result= shutdown(mysql_socket.fd, how);
return result;
}
diff --git a/include/mysql/psi/mysql_stage.h b/include/mysql/psi/mysql_stage.h
index dc44e9b0bed..61bfdbb7d59 100644
--- a/include/mysql/psi/mysql_stage.h
+++ b/include/mysql/psi/mysql_stage.h
@@ -53,7 +53,7 @@
static inline void inline_mysql_stage_register(
const char *category, PSI_stage_info **info, int count)
{
- PSI_CALL(register_stage)(category, info, count);
+ PSI_STAGE_CALL(register_stage)(category, info, count);
}
#endif
@@ -62,7 +62,7 @@ static inline void
inline_mysql_set_stage(PSI_stage_key key,
const char *src_file, int src_line)
{
- PSI_CALL(start_stage)(key, src_file, src_line);
+ PSI_STAGE_CALL(start_stage)(key, src_file, src_line);
}
#endif
diff --git a/include/mysql/psi/mysql_statement.h b/include/mysql/psi/mysql_statement.h
index 1b065065e57..d7a76ee25e4 100644
--- a/include/mysql/psi/mysql_statement.h
+++ b/include/mysql/psi/mysql_statement.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -63,10 +63,10 @@
#endif
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- #define MYSQL_START_STATEMENT(STATE, K, DB, DB_LEN) \
- inline_mysql_start_statement(STATE, K, DB, DB_LEN, __FILE__, __LINE__)
+ #define MYSQL_START_STATEMENT(STATE, K, DB, DB_LEN, CS) \
+ inline_mysql_start_statement(STATE, K, DB, DB_LEN, CS, __FILE__, __LINE__)
#else
- #define MYSQL_START_STATEMENT(STATE, K, DB, DB_LEN) \
+ #define MYSQL_START_STATEMENT(STATE, K, DB, DB_LEN, CS) \
NULL
#endif
@@ -122,7 +122,7 @@
static inline void inline_mysql_statement_register(
const char *category, PSI_statement_info *info, int count)
{
- PSI_CALL(register_statement)(category, info, count);
+ PSI_STATEMENT_CALL(register_statement)(category, info, count);
}
#ifdef HAVE_PSI_STATEMENT_DIGEST_INTERFACE
@@ -132,7 +132,7 @@ inline_mysql_digest_start(PSI_statement_locker *locker)
PSI_digest_locker* digest_locker= NULL;
if (likely(locker != NULL))
- digest_locker= PSI_CALL(digest_start)(locker);
+ digest_locker= PSI_STATEMENT_CALL(digest_start)(locker);
return digest_locker;
}
#endif
@@ -143,7 +143,7 @@ inline_mysql_add_token(PSI_digest_locker *locker, uint token,
void *yylval)
{
if (likely(locker != NULL))
- locker= PSI_CALL(digest_add_token)(locker, token,
+ locker= PSI_STATEMENT_CALL(digest_add_token)(locker, token,
(OPAQUE_LEX_YYSTYPE*)yylval);
return locker;
}
@@ -153,12 +153,13 @@ static inline struct PSI_statement_locker *
inline_mysql_start_statement(PSI_statement_locker_state *state,
PSI_statement_key key,
const char *db, uint db_len,
+ const CHARSET_INFO *charset,
const char *src_file, int src_line)
{
PSI_statement_locker *locker;
- locker= PSI_CALL(get_thread_statement_locker)(state, key);
+ locker= PSI_STATEMENT_CALL(get_thread_statement_locker)(state, key, charset);
if (likely(locker != NULL))
- PSI_CALL(start_statement)(locker, db, db_len, src_file, src_line);
+ PSI_STATEMENT_CALL(start_statement)(locker, db, db_len, src_file, src_line);
return locker;
}
@@ -168,7 +169,7 @@ inline_mysql_refine_statement(PSI_statement_locker *locker,
{
if (likely(locker != NULL))
{
- locker= PSI_CALL(refine_statement)(locker, key);
+ locker= PSI_STATEMENT_CALL(refine_statement)(locker, key);
}
return locker;
}
@@ -179,7 +180,7 @@ inline_mysql_set_statement_text(PSI_statement_locker *locker,
{
if (likely(locker != NULL))
{
- PSI_CALL(set_statement_text)(locker, text, text_len);
+ PSI_STATEMENT_CALL(set_statement_text)(locker, text, text_len);
}
}
@@ -189,7 +190,7 @@ inline_mysql_set_statement_lock_time(PSI_statement_locker *locker,
{
if (likely(locker != NULL))
{
- PSI_CALL(set_statement_lock_time)(locker, count);
+ PSI_STATEMENT_CALL(set_statement_lock_time)(locker, count);
}
}
@@ -199,7 +200,7 @@ inline_mysql_set_statement_rows_sent(PSI_statement_locker *locker,
{
if (likely(locker != NULL))
{
- PSI_CALL(set_statement_rows_sent)(locker, count);
+ PSI_STATEMENT_CALL(set_statement_rows_sent)(locker, count);
}
}
@@ -209,7 +210,7 @@ inline_mysql_set_statement_rows_examined(PSI_statement_locker *locker,
{
if (likely(locker != NULL))
{
- PSI_CALL(set_statement_rows_examined)(locker, count);
+ PSI_STATEMENT_CALL(set_statement_rows_examined)(locker, count);
}
}
@@ -217,9 +218,9 @@ static inline void
inline_mysql_end_statement(struct PSI_statement_locker *locker,
Diagnostics_area *stmt_da)
{
- PSI_CALL(end_stage)();
+ PSI_STAGE_CALL(end_stage)();
if (likely(locker != NULL))
- PSI_CALL(end_statement)(locker, stmt_da);
+ PSI_STATEMENT_CALL(end_statement)(locker, stmt_da);
}
#endif
diff --git a/include/mysql/psi/mysql_table.h b/include/mysql/psi/mysql_table.h
index 1796943096e..815313e654b 100644
--- a/include/mysql/psi/mysql_table.h
+++ b/include/mysql/psi/mysql_table.h
@@ -60,22 +60,22 @@
@sa MYSQL_END_TABLE_WAIT.
*/
#ifdef HAVE_PSI_TABLE_INTERFACE
- #define MYSQL_TABLE_IO_WAIT(PSI, OP, INDEX, FLAGS, PAYLOAD) \
- { \
- if (PSI != NULL) \
- { \
- PSI_table_locker *locker; \
- PSI_table_locker_state state; \
- locker= PSI_CALL(start_table_io_wait)(& state, PSI, OP, INDEX, \
- __FILE__, __LINE__); \
- PAYLOAD \
- if (locker != NULL) \
- PSI_CALL(end_table_io_wait)(locker); \
- } \
- else \
- { \
- PAYLOAD \
- } \
+ #define MYSQL_TABLE_IO_WAIT(PSI, OP, INDEX, FLAGS, PAYLOAD) \
+ { \
+ if (PSI != NULL) \
+ { \
+ PSI_table_locker *locker; \
+ PSI_table_locker_state state; \
+ locker= PSI_TABLE_CALL(start_table_io_wait) \
+ (& state, PSI, OP, INDEX, __FILE__, __LINE__); \
+ PAYLOAD \
+ if (locker != NULL) \
+ PSI_TABLE_CALL(end_table_io_wait)(locker); \
+ } \
+ else \
+ { \
+ PAYLOAD \
+ } \
}
#else
#define MYSQL_TABLE_IO_WAIT(PSI, OP, INDEX, FLAGS, PAYLOAD) \
@@ -93,22 +93,22 @@
@sa MYSQL_END_TABLE_WAIT.
*/
#ifdef HAVE_PSI_TABLE_INTERFACE
- #define MYSQL_TABLE_LOCK_WAIT(PSI, OP, FLAGS, PAYLOAD) \
- { \
- if (PSI != NULL) \
- { \
- PSI_table_locker *locker; \
- PSI_table_locker_state state; \
- locker= PSI_CALL(start_table_lock_wait)(& state, PSI, OP, FLAGS, \
- __FILE__, __LINE__); \
- PAYLOAD \
- if (locker != NULL) \
- PSI_CALL(end_table_lock_wait)(locker); \
- } \
- else \
- { \
- PAYLOAD \
- } \
+ #define MYSQL_TABLE_LOCK_WAIT(PSI, OP, FLAGS, PAYLOAD) \
+ { \
+ if (PSI != NULL) \
+ { \
+ PSI_table_locker *locker; \
+ PSI_table_locker_state state; \
+ locker= PSI_TABLE_CALL(start_table_lock_wait) \
+ (& state, PSI, OP, FLAGS, __FILE__, __LINE__); \
+ PAYLOAD \
+ if (locker != NULL) \
+ PSI_TABLE_CALL(end_table_lock_wait)(locker); \
+ } \
+ else \
+ { \
+ PAYLOAD \
+ } \
}
#else
#define MYSQL_TABLE_LOCK_WAIT(PSI, OP, FLAGS, PAYLOAD) \
@@ -164,7 +164,8 @@ inline_mysql_start_table_lock_wait(PSI_table_locker_state *state,
if (psi != NULL)
{
struct PSI_table_locker *locker;
- locker= PSI_CALL(start_table_lock_wait)(state, psi, op, flags, src_file, src_line);
+ locker= PSI_TABLE_CALL(start_table_lock_wait)
+ (state, psi, op, flags, src_file, src_line);
return locker;
}
return NULL;
@@ -178,7 +179,7 @@ static inline void
inline_mysql_end_table_lock_wait(struct PSI_table_locker *locker)
{
if (locker != NULL)
- PSI_CALL(end_table_lock_wait)(locker);
+ PSI_TABLE_CALL(end_table_lock_wait)(locker);
}
#endif
diff --git a/include/mysql/psi/mysql_thread.h b/include/mysql/psi/mysql_thread.h
index 78175196fa2..f0d88ff8ede 100644
--- a/include/mysql/psi/mysql_thread.h
+++ b/include/mysql/psi/mysql_thread.h
@@ -597,7 +597,7 @@ static inline void inline_mysql_mutex_register(
)
{
#ifdef HAVE_PSI_MUTEX_INTERFACE
- PSI_CALL(register_mutex)(category, info, count);
+ PSI_MUTEX_CALL(register_mutex)(category, info, count);
#endif
}
@@ -613,7 +613,7 @@ static inline int inline_mysql_mutex_init(
)
{
#ifdef HAVE_PSI_MUTEX_INTERFACE
- that->m_psi= PSI_CALL(init_mutex)(key, &that->m_mutex);
+ that->m_psi= PSI_MUTEX_CALL(init_mutex)(key, &that->m_mutex);
#else
that->m_psi= NULL;
#endif
@@ -636,7 +636,7 @@ static inline int inline_mysql_mutex_destroy(
#ifdef HAVE_PSI_MUTEX_INTERFACE
if (that->m_psi != NULL)
{
- PSI_CALL(destroy_mutex)(that->m_psi);
+ PSI_MUTEX_CALL(destroy_mutex)(that->m_psi);
that->m_psi= NULL;
}
#endif
@@ -664,7 +664,7 @@ static inline int inline_mysql_mutex_lock(
/* Instrumentation start */
PSI_mutex_locker *locker;
PSI_mutex_locker_state state;
- locker= PSI_CALL(start_mutex_wait)(&state, that->m_psi,
+ locker= PSI_MUTEX_CALL(start_mutex_wait)(&state, that->m_psi,
PSI_MUTEX_LOCK, src_file, src_line);
/* Instrumented code */
@@ -678,7 +678,7 @@ static inline int inline_mysql_mutex_lock(
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_mutex_wait)(locker, result);
+ PSI_MUTEX_CALL(end_mutex_wait)(locker, result);
return result;
}
@@ -711,7 +711,7 @@ static inline int inline_mysql_mutex_trylock(
/* Instrumentation start */
PSI_mutex_locker *locker;
PSI_mutex_locker_state state;
- locker= PSI_CALL(start_mutex_wait)(&state, that->m_psi,
+ locker= PSI_MUTEX_CALL(start_mutex_wait)(&state, that->m_psi,
PSI_MUTEX_TRYLOCK, src_file, src_line);
/* Instrumented code */
@@ -725,7 +725,7 @@ static inline int inline_mysql_mutex_trylock(
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_mutex_wait)(locker, result);
+ PSI_MUTEX_CALL(end_mutex_wait)(locker, result);
return result;
}
@@ -754,7 +754,7 @@ static inline int inline_mysql_mutex_unlock(
#ifdef HAVE_PSI_MUTEX_INTERFACE
if (that->m_psi != NULL)
- PSI_CALL(unlock_mutex)(that->m_psi);
+ PSI_MUTEX_CALL(unlock_mutex)(that->m_psi);
#endif
#ifdef SAFE_MUTEX
@@ -781,7 +781,7 @@ static inline void inline_mysql_rwlock_register(
)
{
#ifdef HAVE_PSI_RWLOCK_INTERFACE
- PSI_CALL(register_rwlock)(category, info, count);
+ PSI_RWLOCK_CALL(register_rwlock)(category, info, count);
#endif
}
@@ -792,7 +792,7 @@ static inline int inline_mysql_rwlock_init(
mysql_rwlock_t *that)
{
#ifdef HAVE_PSI_RWLOCK_INTERFACE
- that->m_psi= PSI_CALL(init_rwlock)(key, &that->m_rwlock);
+ that->m_psi= PSI_RWLOCK_CALL(init_rwlock)(key, &that->m_rwlock);
#else
that->m_psi= NULL;
#endif
@@ -810,7 +810,7 @@ static inline int inline_mysql_prlock_init(
mysql_prlock_t *that)
{
#ifdef HAVE_PSI_RWLOCK_INTERFACE
- that->m_psi= PSI_CALL(init_rwlock)(key, &that->m_prlock);
+ that->m_psi= PSI_RWLOCK_CALL(init_rwlock)(key, &that->m_prlock);
#else
that->m_psi= NULL;
#endif
@@ -824,7 +824,7 @@ static inline int inline_mysql_rwlock_destroy(
#ifdef HAVE_PSI_RWLOCK_INTERFACE
if (that->m_psi != NULL)
{
- PSI_CALL(destroy_rwlock)(that->m_psi);
+ PSI_RWLOCK_CALL(destroy_rwlock)(that->m_psi);
that->m_psi= NULL;
}
#endif
@@ -838,7 +838,7 @@ static inline int inline_mysql_prlock_destroy(
#ifdef HAVE_PSI_RWLOCK_INTERFACE
if (that->m_psi != NULL)
{
- PSI_CALL(destroy_rwlock)(that->m_psi);
+ PSI_RWLOCK_CALL(destroy_rwlock)(that->m_psi);
that->m_psi= NULL;
}
#endif
@@ -861,7 +861,7 @@ static inline int inline_mysql_rwlock_rdlock(
/* Instrumentation start */
PSI_rwlock_locker *locker;
PSI_rwlock_locker_state state;
- locker= PSI_CALL(start_rwlock_rdwait)(&state, that->m_psi,
+ locker= PSI_RWLOCK_CALL(start_rwlock_rdwait)(&state, that->m_psi,
PSI_RWLOCK_READLOCK, src_file, src_line);
/* Instrumented code */
@@ -869,7 +869,7 @@ static inline int inline_mysql_rwlock_rdlock(
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_rwlock_rdwait)(locker, result);
+ PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, result);
return result;
}
@@ -897,7 +897,7 @@ static inline int inline_mysql_prlock_rdlock(
/* Instrumentation start */
PSI_rwlock_locker *locker;
PSI_rwlock_locker_state state;
- locker= PSI_CALL(start_rwlock_rdwait)(&state, that->m_psi,
+ locker= PSI_RWLOCK_CALL(start_rwlock_rdwait)(&state, that->m_psi,
PSI_RWLOCK_READLOCK, src_file, src_line);
/* Instrumented code */
@@ -905,7 +905,7 @@ static inline int inline_mysql_prlock_rdlock(
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_rwlock_rdwait)(locker, result);
+ PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, result);
return result;
}
@@ -933,7 +933,7 @@ static inline int inline_mysql_rwlock_wrlock(
/* Instrumentation start */
PSI_rwlock_locker *locker;
PSI_rwlock_locker_state state;
- locker= PSI_CALL(start_rwlock_wrwait)(&state, that->m_psi,
+ locker= PSI_RWLOCK_CALL(start_rwlock_wrwait)(&state, that->m_psi,
PSI_RWLOCK_WRITELOCK, src_file, src_line);
/* Instrumented code */
@@ -941,7 +941,7 @@ static inline int inline_mysql_rwlock_wrlock(
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_rwlock_wrwait)(locker, result);
+ PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, result);
return result;
}
@@ -969,7 +969,7 @@ static inline int inline_mysql_prlock_wrlock(
/* Instrumentation start */
PSI_rwlock_locker *locker;
PSI_rwlock_locker_state state;
- locker= PSI_CALL(start_rwlock_wrwait)(&state, that->m_psi,
+ locker= PSI_RWLOCK_CALL(start_rwlock_wrwait)(&state, that->m_psi,
PSI_RWLOCK_WRITELOCK, src_file, src_line);
/* Instrumented code */
@@ -977,7 +977,7 @@ static inline int inline_mysql_prlock_wrlock(
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_rwlock_wrwait)(locker, result);
+ PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, result);
return result;
}
@@ -1005,7 +1005,7 @@ static inline int inline_mysql_rwlock_tryrdlock(
/* Instrumentation start */
PSI_rwlock_locker *locker;
PSI_rwlock_locker_state state;
- locker= PSI_CALL(start_rwlock_rdwait)(&state, that->m_psi,
+ locker= PSI_RWLOCK_CALL(start_rwlock_rdwait)(&state, that->m_psi,
PSI_RWLOCK_TRYREADLOCK, src_file, src_line);
/* Instrumented code */
@@ -1013,7 +1013,7 @@ static inline int inline_mysql_rwlock_tryrdlock(
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_rwlock_rdwait)(locker, result);
+ PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, result);
return result;
}
@@ -1040,7 +1040,7 @@ static inline int inline_mysql_rwlock_trywrlock(
/* Instrumentation start */
PSI_rwlock_locker *locker;
PSI_rwlock_locker_state state;
- locker= PSI_CALL(start_rwlock_wrwait)(&state, that->m_psi,
+ locker= PSI_RWLOCK_CALL(start_rwlock_wrwait)(&state, that->m_psi,
PSI_RWLOCK_TRYWRITELOCK, src_file, src_line);
/* Instrumented code */
@@ -1048,7 +1048,7 @@ static inline int inline_mysql_rwlock_trywrlock(
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_rwlock_wrwait)(locker, result);
+ PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, result);
return result;
}
@@ -1066,7 +1066,7 @@ static inline int inline_mysql_rwlock_unlock(
int result;
#ifdef HAVE_PSI_RWLOCK_INTERFACE
if (that->m_psi != NULL)
- PSI_CALL(unlock_rwlock)(that->m_psi);
+ PSI_RWLOCK_CALL(unlock_rwlock)(that->m_psi);
#endif
result= rw_unlock(&that->m_rwlock);
return result;
@@ -1079,7 +1079,7 @@ static inline int inline_mysql_prlock_unlock(
int result;
#ifdef HAVE_PSI_RWLOCK_INTERFACE
if (that->m_psi != NULL)
- PSI_CALL(unlock_rwlock)(that->m_psi);
+ PSI_RWLOCK_CALL(unlock_rwlock)(that->m_psi);
#endif
result= rw_pr_unlock(&that->m_prlock);
return result;
@@ -1099,7 +1099,7 @@ static inline void inline_mysql_cond_register(
)
{
#ifdef HAVE_PSI_COND_INTERFACE
- PSI_CALL(register_cond)(category, info, count);
+ PSI_COND_CALL(register_cond)(category, info, count);
#endif
}
@@ -1111,7 +1111,7 @@ static inline int inline_mysql_cond_init(
const pthread_condattr_t *attr)
{
#ifdef HAVE_PSI_COND_INTERFACE
- that->m_psi= PSI_CALL(init_cond)(key, &that->m_cond);
+ that->m_psi= PSI_COND_CALL(init_cond)(key, &that->m_cond);
#else
that->m_psi= NULL;
#endif
@@ -1124,7 +1124,7 @@ static inline int inline_mysql_cond_destroy(
#ifdef HAVE_PSI_COND_INTERFACE
if (that->m_psi != NULL)
{
- PSI_CALL(destroy_cond)(that->m_psi);
+ PSI_COND_CALL(destroy_cond)(that->m_psi);
that->m_psi= NULL;
}
#endif
@@ -1147,7 +1147,7 @@ static inline int inline_mysql_cond_wait(
/* Instrumentation start */
PSI_cond_locker *locker;
PSI_cond_locker_state state;
- locker= PSI_CALL(start_cond_wait)(&state, that->m_psi, mutex->m_psi,
+ locker= PSI_COND_CALL(start_cond_wait)(&state, that->m_psi, mutex->m_psi,
PSI_COND_WAIT, src_file, src_line);
/* Instrumented code */
@@ -1155,7 +1155,7 @@ static inline int inline_mysql_cond_wait(
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_cond_wait)(locker, result);
+ PSI_COND_CALL(end_cond_wait)(locker, result);
return result;
}
@@ -1184,7 +1184,7 @@ static inline int inline_mysql_cond_timedwait(
/* Instrumentation start */
PSI_cond_locker *locker;
PSI_cond_locker_state state;
- locker= PSI_CALL(start_cond_wait)(&state, that->m_psi, mutex->m_psi,
+ locker= PSI_COND_CALL(start_cond_wait)(&state, that->m_psi, mutex->m_psi,
PSI_COND_TIMEDWAIT, src_file, src_line);
/* Instrumented code */
@@ -1192,7 +1192,7 @@ static inline int inline_mysql_cond_timedwait(
/* Instrumentation end */
if (locker != NULL)
- PSI_CALL(end_cond_wait)(locker, result);
+ PSI_COND_CALL(end_cond_wait)(locker, result);
return result;
}
@@ -1210,7 +1210,7 @@ static inline int inline_mysql_cond_signal(
int result;
#ifdef HAVE_PSI_COND_INTERFACE
if (that->m_psi != NULL)
- PSI_CALL(signal_cond)(that->m_psi);
+ PSI_COND_CALL(signal_cond)(that->m_psi);
#endif
result= pthread_cond_signal(&that->m_cond);
return result;
@@ -1222,7 +1222,7 @@ static inline int inline_mysql_cond_broadcast(
int result;
#ifdef HAVE_PSI_COND_INTERFACE
if (that->m_psi != NULL)
- PSI_CALL(broadcast_cond)(that->m_psi);
+ PSI_COND_CALL(broadcast_cond)(that->m_psi);
#endif
result= pthread_cond_broadcast(&that->m_cond);
return result;
@@ -1241,7 +1241,7 @@ static inline void inline_mysql_thread_register(
)
{
#ifdef HAVE_PSI_THREAD_INTERFACE
- PSI_CALL(register_thread)(category, info, count);
+ PSI_THREAD_CALL(register_thread)(category, info, count);
#endif
}
@@ -1252,14 +1252,14 @@ static inline int inline_mysql_thread_create(
void *(*start_routine)(void*), void *arg)
{
int result;
- result= PSI_CALL(spawn_thread)(key, thread, attr, start_routine, arg);
+ result= PSI_THREAD_CALL(spawn_thread)(key, thread, attr, start_routine, arg);
return result;
}
static inline void inline_mysql_thread_set_psi_id(ulong id)
{
- struct PSI_thread *psi= PSI_CALL(get_thread)();
- PSI_CALL(set_thread_id)(psi, id);
+ struct PSI_thread *psi= PSI_THREAD_CALL(get_thread)();
+ PSI_THREAD_CALL(set_thread_id)(psi, id);
}
#endif
diff --git a/include/mysql/psi/psi.h b/include/mysql/psi/psi.h
index 8d5e6db7307..cc2057c630d 100644
--- a/include/mysql/psi/psi.h
+++ b/include/mysql/psi/psi.h
@@ -899,6 +899,10 @@ struct PSI_file_locker_state_v1
enum PSI_file_operation m_operation;
/** Current file. */
struct PSI_file *m_file;
+ /** Current file name. */
+ const char *m_name;
+ /** Current file class. */
+ void *m_class;
/** Current thread. */
struct PSI_thread *m_thread;
/** Operation number of bytes. */
@@ -958,6 +962,8 @@ struct PSI_digest_storage
{
my_bool m_full;
int m_byte_count;
+ /** Character set number. */
+ uint m_charset_number;
unsigned char m_token_array[PSI_MAX_DIGEST_STORAGE_SIZE];
};
typedef struct PSI_digest_storage PSI_digest_storage;
@@ -969,6 +975,9 @@ struct PSI_digest_locker_state
};
typedef struct PSI_digest_locker_state PSI_digest_locker_state;
+/* Duplicate of NAME_LEN, to avoid dependency on mysql_com.h */
+#define PSI_SCHEMA_NAME_LEN (64 * 3)
+
/**
State data storage for @c get_thread_statement_locker_v1_t,
@c get_thread_statement_locker_v1_t.
@@ -1029,6 +1038,10 @@ struct PSI_statement_locker_state_v1
ulong m_sort_scan;
/** Statement digest. */
PSI_digest_locker_state m_digest_state;
+ /** Current schema name. */
+ char m_schema_name[PSI_SCHEMA_NAME_LEN];
+ /** Length in bytes of @c m_schema_name. */
+ uint m_schema_name_length;
};
/**
@@ -1187,10 +1200,13 @@ typedef void (*destroy_cond_v1_t)(struct PSI_cond *cond);
Socket instrumentation initialisation API.
@param key the registered mutex key
@param socket descriptor
+ @param addr the socket ip address
+ @param addr_len length of socket ip address
@return an instrumented socket
*/
typedef struct PSI_socket* (*init_socket_v1_t)
- (PSI_socket_key key, const my_socket *fd);
+ (PSI_socket_key key, const my_socket *fd,
+ const struct sockaddr *addr, socklen_t addr_len);
/**
socket instrumentation destruction API.
@@ -1290,7 +1306,7 @@ typedef int (*spawn_thread_v1_t)(PSI_thread_key key,
@return an instrumented thread
*/
typedef struct PSI_thread* (*new_thread_v1_t)
- (PSI_thread_key key, const void *identity, ulong thread_id);
+ (PSI_thread_key key, const void *identity, ulonglong thread_id);
/**
Assign an id to an instrumented thread.
@@ -1298,7 +1314,7 @@ typedef struct PSI_thread* (*new_thread_v1_t)
@param id the id to assign
*/
typedef void (*set_thread_id_v1_t)(struct PSI_thread *thread,
- unsigned long id);
+ ulonglong id);
/**
Get the instrumentation for the running thread.
@@ -1570,16 +1586,18 @@ typedef void (*end_table_lock_wait_v1_t)(struct PSI_table_locker *locker);
@param op the operation to perform
@param src_file the source file name
@param src_line the source line number
- @return an instrumented file handle
*/
-typedef struct PSI_file* (*start_file_open_wait_v1_t)
+typedef void (*start_file_open_wait_v1_t)
(struct PSI_file_locker *locker, const char *src_file, uint src_line);
/**
End a file instrumentation open operation, for file streams.
@param locker the file locker.
+ @param result the opened file (NULL indicates failure, non NULL success).
+ @return an instrumented file handle
*/
-typedef void (*end_file_open_wait_v1_t)(struct PSI_file_locker *locker);
+typedef struct PSI_file* (*end_file_open_wait_v1_t)
+ (struct PSI_file_locker *locker, void *result);
/**
End a file instrumentation open operation, for non stream files.
@@ -1617,6 +1635,25 @@ typedef void (*end_file_wait_v1_t)
(struct PSI_file_locker *locker, size_t count);
/**
+ Start a file instrumentation close operation.
+ @param locker the file locker
+ @param op the operation to perform
+ @param src_file the source file name
+ @param src_line the source line number
+*/
+typedef void (*start_file_close_wait_v1_t)
+ (struct PSI_file_locker *locker, const char *src_file, uint src_line);
+
+/**
+ End a file instrumentation close operation.
+ @param locker the file locker.
+ @param rc the close operation return code (0 for success).
+ @return an instrumented file handle
+*/
+typedef void (*end_file_close_wait_v1_t)
+ (struct PSI_file_locker *locker, int rc);
+
+/**
Start a new stage, and implicitly end the previous stage.
@param key the key of the new stage
@param src_file the source file name
@@ -1632,11 +1669,12 @@ typedef void (*end_stage_v1_t) (void);
Get a statement instrumentation locker.
@param state data storage for the locker
@param key the statement instrumentation key
+ @param charset client character set
@return a statement locker, or NULL
*/
typedef struct PSI_statement_locker* (*get_thread_statement_locker_v1_t)
(struct PSI_statement_locker_state_v1 *state,
- PSI_statement_key key);
+ PSI_statement_key key, const void *charset);
/**
Refine a statement locker to a more specific key.
@@ -1871,6 +1909,19 @@ typedef struct PSI_digest_locker* (*digest_add_token_v1_t)
(struct PSI_digest_locker *locker, uint token, struct OPAQUE_LEX_YYSTYPE *yylval);
/**
+ Stores an array of connection attributes
+ @param buffer char array of length encoded connection attributes
+ in network format
+ @param length legnth of the data in buffer
+ @param from_cs charset in which @buffer is encodded
+ @return state
+ @retval non-0 attributes truncated
+ @retval 0 stored the attribute
+*/
+typedef int (*set_thread_connect_attrs_v1_t)(const char *buffer, uint length,
+ const void *from_cs);
+
+/**
Performance Schema Interface, version 1.
@since PSI_VERSION_1
*/
@@ -2005,6 +2056,10 @@ struct PSI_v1
start_file_wait_v1_t start_file_wait;
/** @sa end_file_wait_v1_t. */
end_file_wait_v1_t end_file_wait;
+ /** @sa start_file_close_wait_v1_t. */
+ start_file_close_wait_v1_t start_file_close_wait;
+ /** @sa end_file_close_wait_v1_t. */
+ end_file_close_wait_v1_t end_file_close_wait;
/** @sa start_stage_v1_t. */
start_stage_v1_t start_stage;
/** @sa end_stage_v1_t. */
@@ -2065,6 +2120,8 @@ struct PSI_v1
digest_start_v1_t digest_start;
/** @sa digest_add_token_v1_t. */
digest_add_token_v1_t digest_add_token;
+ /** @sa set_thread_connect_attrs_v1_t. */
+ set_thread_connect_attrs_v1_t set_thread_connect_attrs;
};
/** @} (end of group Group_PSI_v1) */
@@ -2318,7 +2375,54 @@ typedef struct PSI_stage_info_none PSI_stage_info;
extern MYSQL_PLUGIN_IMPORT PSI *PSI_server;
-#define PSI_CALL(M) PSI_server->M
+/*
+ Allow to override PSI_XXX_CALL at compile time
+ with more efficient implementations, if available.
+ If nothing better is available,
+ make a dynamic call using the PSI_server function pointer.
+*/
+
+#ifndef PSI_MUTEX_CALL
+#define PSI_MUTEX_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_RWLOCK_CALL
+#define PSI_RWLOCK_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_COND_CALL
+#define PSI_COND_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_THREAD_CALL
+#define PSI_THREAD_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_FILE_CALL
+#define PSI_FILE_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_SOCKET_CALL
+#define PSI_SOCKET_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_STAGE_CALL
+#define PSI_STAGE_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_STATEMENT_CALL
+#define PSI_STATEMENT_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_TABLE_CALL
+#define PSI_TABLE_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#ifndef PSI_IDLE_CALL
+#define PSI_IDLE_CALL(M) PSI_DYNAMIC_CALL(M)
+#endif
+
+#define PSI_DYNAMIC_CALL(M) PSI_server->M
/** @} */
diff --git a/include/mysql/psi/psi_abi_v1.h.pp b/include/mysql/psi/psi_abi_v1.h.pp
index b0559213998..f2037c5b724 100644
--- a/include/mysql/psi/psi_abi_v1.h.pp
+++ b/include/mysql/psi/psi_abi_v1.h.pp
@@ -221,6 +221,8 @@ struct PSI_file_locker_state_v1
uint m_flags;
enum PSI_file_operation m_operation;
struct PSI_file *m_file;
+ const char *m_name;
+ void *m_class;
struct PSI_thread *m_thread;
size_t m_number_of_bytes;
ulonglong m_timer_start;
@@ -243,6 +245,7 @@ struct PSI_digest_storage
{
my_bool m_full;
int m_byte_count;
+ uint m_charset_number;
unsigned char m_token_array[1024];
};
typedef struct PSI_digest_storage PSI_digest_storage;
@@ -278,6 +281,8 @@ struct PSI_statement_locker_state_v1
ulong m_sort_rows;
ulong m_sort_scan;
PSI_digest_locker_state m_digest_state;
+ char m_schema_name[(64 * 3)];
+ uint m_schema_name_length;
};
struct PSI_socket_locker_state_v1
{
@@ -318,7 +323,8 @@ typedef struct PSI_cond* (*init_cond_v1_t)
(PSI_cond_key key, const void *identity);
typedef void (*destroy_cond_v1_t)(struct PSI_cond *cond);
typedef struct PSI_socket* (*init_socket_v1_t)
- (PSI_socket_key key, const my_socket *fd);
+ (PSI_socket_key key, const my_socket *fd,
+ const struct sockaddr *addr, socklen_t addr_len);
typedef void (*destroy_socket_v1_t)(struct PSI_socket *socket);
typedef struct PSI_table_share* (*get_table_share_v1_t)
(my_bool temporary, struct TABLE_SHARE *share);
@@ -340,9 +346,9 @@ typedef int (*spawn_thread_v1_t)(PSI_thread_key key,
const pthread_attr_t *attr,
void *(*start_routine)(void*), void *arg);
typedef struct PSI_thread* (*new_thread_v1_t)
- (PSI_thread_key key, const void *identity, ulong thread_id);
+ (PSI_thread_key key, const void *identity, ulonglong thread_id);
typedef void (*set_thread_id_v1_t)(struct PSI_thread *thread,
- unsigned long id);
+ ulonglong id);
typedef struct PSI_thread* (*get_thread_v1_t)(void);
typedef void (*set_thread_user_v1_t)(const char *user, int user_len);
typedef void (*set_thread_user_host_v1_t)(const char *user, int user_len,
@@ -420,9 +426,10 @@ typedef struct PSI_table_locker* (*start_table_lock_wait_v1_t)
ulong flags,
const char *src_file, uint src_line);
typedef void (*end_table_lock_wait_v1_t)(struct PSI_table_locker *locker);
-typedef struct PSI_file* (*start_file_open_wait_v1_t)
+typedef void (*start_file_open_wait_v1_t)
(struct PSI_file_locker *locker, const char *src_file, uint src_line);
-typedef void (*end_file_open_wait_v1_t)(struct PSI_file_locker *locker);
+typedef struct PSI_file* (*end_file_open_wait_v1_t)
+ (struct PSI_file_locker *locker, void *result);
typedef void (*end_file_open_wait_and_bind_to_descriptor_v1_t)
(struct PSI_file_locker *locker, File file);
typedef void (*start_file_wait_v1_t)
@@ -430,12 +437,16 @@ typedef void (*start_file_wait_v1_t)
const char *src_file, uint src_line);
typedef void (*end_file_wait_v1_t)
(struct PSI_file_locker *locker, size_t count);
+typedef void (*start_file_close_wait_v1_t)
+ (struct PSI_file_locker *locker, const char *src_file, uint src_line);
+typedef void (*end_file_close_wait_v1_t)
+ (struct PSI_file_locker *locker, int rc);
typedef void (*start_stage_v1_t)
(PSI_stage_key key, const char *src_file, int src_line);
typedef void (*end_stage_v1_t) (void);
typedef struct PSI_statement_locker* (*get_thread_statement_locker_v1_t)
(struct PSI_statement_locker_state_v1 *state,
- PSI_statement_key key);
+ PSI_statement_key key, const void *charset);
typedef struct PSI_statement_locker* (*refine_statement_v1_t)
(struct PSI_statement_locker *locker,
PSI_statement_key key);
@@ -499,6 +510,8 @@ typedef struct PSI_digest_locker * (*digest_start_v1_t)
(struct PSI_statement_locker *locker);
typedef struct PSI_digest_locker* (*digest_add_token_v1_t)
(struct PSI_digest_locker *locker, uint token, struct OPAQUE_LEX_YYSTYPE *yylval);
+typedef int (*set_thread_connect_attrs_v1_t)(const char *buffer, uint length,
+ const void *from_cs);
struct PSI_v1
{
register_mutex_v1_t register_mutex;
@@ -566,6 +579,8 @@ struct PSI_v1
end_file_open_wait_and_bind_to_descriptor;
start_file_wait_v1_t start_file_wait;
end_file_wait_v1_t end_file_wait;
+ start_file_close_wait_v1_t start_file_close_wait;
+ end_file_close_wait_v1_t end_file_close_wait;
start_stage_v1_t start_stage;
end_stage_v1_t end_stage;
get_thread_statement_locker_v1_t get_thread_statement_locker;
@@ -596,6 +611,7 @@ struct PSI_v1
set_socket_thread_owner_v1_t set_socket_thread_owner;
digest_start_v1_t digest_start;
digest_add_token_v1_t digest_add_token;
+ set_thread_connect_attrs_v1_t set_thread_connect_attrs;
};
typedef struct PSI_v1 PSI;
typedef struct PSI_mutex_info_v1 PSI_mutex_info;
diff --git a/include/mysql/service_debug_sync.h b/include/mysql/service_debug_sync.h
index bb1202c5e63..eee8e6bbe96 100644
--- a/include/mysql/service_debug_sync.h
+++ b/include/mysql/service_debug_sync.h
@@ -339,9 +339,16 @@ extern void (*debug_sync_C_callback_ptr)(MYSQL_THD, const char *, size_t);
if (debug_sync_service) \
debug_sync_service(thd, STRING_WITH_LEN(name)); \
} while(0)
+
+#define DEBUG_SYNC_C_IF_THD(thd, name) \
+ do { \
+ if (debug_sync_service && thd) \
+ debug_sync_service((MYSQL_THD) thd, STRING_WITH_LEN(name)); \
+ } while(0)
#else
-#define DEBUG_SYNC(thd,name) do { } while(0)
-#endif
+#define DEBUG_SYNC(thd,name) do { } while(0)
+#define DEBUG_SYNC_C_IF_THD(thd, _sync_point_name_) do { } while(0)
+#endif /* defined(ENABLED_DEBUG_SYNC) */
/* compatibility macro */
#define DEBUG_SYNC_C(name) DEBUG_SYNC(NULL, name)
diff --git a/include/mysql/service_my_plugin_log.h b/include/mysql/service_my_plugin_log.h
new file mode 100644
index 00000000000..0cf7817573c
--- /dev/null
+++ b/include/mysql/service_my_plugin_log.h
@@ -0,0 +1,64 @@
+/* Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; version 2 of the
+ License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+/**
+ @file
+ This service provides functions to report error conditions and log to
+ mysql error log.
+*/
+
+#ifndef MYSQL_SERVICE_MY_PLUGIN_LOG_INCLUDED
+#define MYSQL_SERVICE_MY_PLUGIN_LOG_INCLUDED
+
+#ifndef MYSQL_ABI_CHECK
+#include <stdarg.h>
+#endif
+
+/* keep in sync with the loglevel enum in my_sys.h */
+enum plugin_log_level
+{
+ MY_ERROR_LEVEL,
+ MY_WARNING_LEVEL,
+ MY_INFORMATION_LEVEL
+};
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern struct my_plugin_log_service
+{
+ /** write a message to the log */
+ int (*my_plugin_log_message)(MYSQL_PLUGIN *, enum plugin_log_level, const char *, ...);
+} *my_plugin_log_service;
+
+#ifdef MYSQL_DYNAMIC_PLUGIN
+
+#define my_plugin_log_message my_plugin_log_service->my_plugin_log_message
+
+#else
+
+int my_plugin_log_message(MYSQL_PLUGIN *plugin, enum plugin_log_level level,
+ const char *format, ...);
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/mysql_com.h b/include/mysql_com.h
index 63b95bb5295..f38cc5ed52e 100644
--- a/include/mysql_com.h
+++ b/include/mysql_com.h
@@ -45,6 +45,7 @@
#define TABLE_COMMENT_MAXLEN 2048
#define COLUMN_COMMENT_MAXLEN 1024
#define INDEX_COMMENT_MAXLEN 1024
+#define TABLE_PARTITION_COMMENT_MAXLEN 1024
/*
USER_HOST_BUFF_SIZE -- length of string buffer, that is enough to contain
@@ -119,13 +120,19 @@ enum enum_server_command
#define BINCMP_FLAG 131072 /* Intern: Used by sql_yacc */
#define GET_FIXED_FIELDS_FLAG (1 << 18) /* Used to get fields in item tree */
#define FIELD_IN_PART_FUNC_FLAG (1 << 19)/* Field part of partition func */
-#define FIELD_IN_ADD_INDEX (1<< 20) /* Intern: Field used in ADD INDEX */
+
+/**
+ Intern: Field in TABLE object for new version of altered table,
+ which participates in a newly added index.
+*/
+#define FIELD_IN_ADD_INDEX (1 << 20)
#define FIELD_IS_RENAMED (1<< 21) /* Intern: Field is being renamed */
-#define FIELD_FLAGS_STORAGE_MEDIA 22 /* Field storage media, bit 22-23,
- reserved by MySQL Cluster */
-#define FIELD_FLAGS_COLUMN_FORMAT 24 /* Field column format, bit 24-25,
- reserved by MySQL Cluster */
-#define HAS_EXPLICIT_VALUE (1 << 26) /* An INSERT/UPDATE operation supplied
+#define FIELD_FLAGS_STORAGE_MEDIA 22 /* Field storage media, bit 22-23 */
+#define FIELD_FLAGS_STORAGE_MEDIA_MASK (3 << FIELD_FLAGS_STORAGE_MEDIA)
+#define FIELD_FLAGS_COLUMN_FORMAT 24 /* Field column format, bit 24-25 */
+#define FIELD_FLAGS_COLUMN_FORMAT_MASK (3 << FIELD_FLAGS_COLUMN_FORMAT)
+#define FIELD_IS_DROPPED (1<< 26) /* Intern: Field is being dropped */
+#define HAS_EXPLICIT_VALUE (1 << 27) /* An INSERT/UPDATE operation supplied
an explicit default value */
#define REFRESH_GRANT (1UL << 0) /* Refresh grant tables */
@@ -154,12 +161,12 @@ enum enum_server_command
#define REFRESH_QUERY_CACHE_FREE (1UL << 17) /* pack query cache */
#define REFRESH_DES_KEY_FILE (1UL << 18)
#define REFRESH_USER_RESOURCES (1UL << 19)
+#define REFRESH_FOR_EXPORT (1UL << 20) /* FLUSH TABLES ... FOR EXPORT */
-#define REFRESH_TABLE_STATS (1UL << 20) /* Refresh table stats hash table */
-#define REFRESH_INDEX_STATS (1UL << 21) /* Refresh index stats hash table */
-#define REFRESH_USER_STATS (1UL << 22) /* Refresh user stats hash table */
-#define REFRESH_CLIENT_STATS (1UL << 23) /* Refresh client stats hash table */
-
+#define REFRESH_TABLE_STATS (1UL << 27) /* Refresh table stats hash table */
+#define REFRESH_INDEX_STATS (1UL << 28) /* Refresh index stats hash table */
+#define REFRESH_USER_STATS (1UL << 29) /* Refresh user stats hash table */
+#define REFRESH_CLIENT_STATS (1UL << 30) /* Refresh client stats hash table */
#define REFRESH_FAST (1UL << 31) /* Intern flag */
#define CLIENT_LONG_PASSWORD 1 /* new more secure passwords */
@@ -183,8 +190,15 @@ enum enum_server_command
#define CLIENT_PS_MULTI_RESULTS (1UL << 18) /* Multi-results in PS-protocol */
#define CLIENT_PLUGIN_AUTH (1UL << 19) /* Client supports plugin authentication */
-#define CLIENT_PROGRESS (1UL << 29) /* Client support progress indicator */
+#define CLIENT_PLUGIN_AUTH (1UL << 19) /* Client supports plugin authentication */
+#define CLIENT_CONNECT_ATTRS (1UL << 20) /* Client supports connection attributes */
+/* Enable authentication response packet to be larger than 255 bytes. */
+#define CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA (1UL << 21)
+/* Don't close the connection for a connection with expired password. */
+#define CLIENT_CAN_HANDLE_EXPIRED_PASSWORDS (1UL << 22)
+
+#define CLIENT_PROGRESS (1UL << 29) /* Client support progress indicator */
#define CLIENT_SSL_VERIFY_SERVER_CERT (1UL << 30)
/*
It used to be that if mysql_real_connect() failed, it would delete any
@@ -229,6 +243,12 @@ enum enum_server_command
CLIENT_PLUGIN_AUTH)
/*
+ To be added later:
+ CLIENT_CONNECT_ATTRS, CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA,
+ CLIENT_CAN_HANDLE_EXPIRED_PASSWORDS
+*/
+
+/*
Switch off the flags that are optional and depending on build flags
If any of the optional flags is supported by the build it will be switched
on before sending to the client during the connection handshake.
diff --git a/include/password.h b/include/password.h
index 082f917e7c0..5dfea533546 100644
--- a/include/password.h
+++ b/include/password.h
@@ -24,6 +24,8 @@ void my_make_scrambled_password_323(char *to, const char *password,
size_t pass_len);
void my_make_scrambled_password(char *to, const char *password,
size_t pass_len);
+void my_make_scrambled_password_sha1(char *to, const char *password,
+ size_t pass_len);
void hash_password(ulong *result, const char *password, uint password_len);
diff --git a/include/sha1.h b/include/sha1.h
index c3469333c27..b20cc8f5026 100644
--- a/include/sha1.h
+++ b/include/sha1.h
@@ -18,88 +18,13 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-/*
- This is the header file for code which implements the Secure
- Hashing Algorithm 1 as defined in FIPS PUB 180-1 published
- April 17, 1995.
-
- Many of the variable names in this code, especially the
- single character names, were used because those were the names
- used in the publication.
-
- Please read the file sha1.c for more information.
-
- Modified 2002 by Peter Zaitsev to better follow MySQL standards
-
- Original Source from: http://www.faqs.org/rfcs/rfc3174.html
-
- Copyright (C) The Internet Society (2001). All Rights Reserved.
-
- This document and translations of it may be copied and furnished to
- others, and derivative works that comment on or otherwise explain it
- or assist in its implementation may be prepared, copied, published
- and distributed, in whole or in part, without restriction of any
- kind, provided that the above copyright notice and this paragraph are
- included on all such copies and derivative works. However, this
- document itself may not be modified in any way, such as by removing
- the copyright notice or references to the Internet Society or other
- Internet organizations, except as needed for the purpose of
- developing Internet standards in which case the procedures for
- copyrights defined in the Internet Standards process must be
- followed, or as required to translate it into languages other than
- English.
-
- The limited permissions granted above are perpetual and will not be
- revoked by the Internet Society or its successors or assigns.
-
- This document and the information contained herein is provided on an
- "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
- TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
- BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
- HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
- MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
-
- Acknowledgement
- Funding for the RFC Editor function is currently provided by the
- Internet Society.
-*/
-
-
-enum sha_result_codes
-{
- SHA_SUCCESS = 0,
- SHA_NULL, /* Null pointer parameter */
- SHA_INPUT_TOO_LONG, /* input data too long */
- SHA_STATE_ERROR /* called Input after Result */
-};
-
#define SHA1_HASH_SIZE 20 /* Hash size in bytes */
-/*
- This structure will hold context information for the SHA-1
- hashing operation
-*/
-
-typedef struct SHA1_CONTEXT
-{
- ulonglong Length; /* Message length in bits */
- uint32 Intermediate_Hash[SHA1_HASH_SIZE/4]; /* Message Digest */
- int Computed; /* Is the digest computed? */
- int Corrupted; /* Is the message digest corrupted? */
- int16 Message_Block_Index; /* Index into message block array */
- uint8 Message_Block[64]; /* 512-bit message blocks */
-} SHA1_CONTEXT;
-
-/*
- Function Prototypes
-*/
-
C_MODE_START
-int mysql_sha1_reset(SHA1_CONTEXT*);
-int mysql_sha1_input(SHA1_CONTEXT*, const uint8 *, unsigned int);
-int mysql_sha1_result(SHA1_CONTEXT* , uint8 Message_Digest[SHA1_HASH_SIZE]);
-
+void compute_sha1_hash(uint8 *digest, const char *buf, int len);
+void compute_sha1_hash_multi(uint8 *digest, const char *buf1, int len1,
+ const char *buf2, int len2);
C_MODE_END
#endif /* SHA__INCLUDED */
diff --git a/include/thread_pool_priv.h b/include/thread_pool_priv.h
index 78526894e21..95f7cd95493 100644
--- a/include/thread_pool_priv.h
+++ b/include/thread_pool_priv.h
@@ -49,7 +49,6 @@ void thd_set_killed(THD *thd);
void thd_clear_errors(THD *thd);
void thd_set_thread_stack(THD *thd, char *stack_start);
void thd_lock_thread_count(THD *thd);
-void thd_unlock_thread_count(THD *thd);
void thd_close_connection(THD *thd);
THD *thd_get_current_thd();
void thd_lock_data(THD *thd);
diff --git a/libevent/CMakeLists.txt b/libevent/CMakeLists.txt
new file mode 100644
index 00000000000..ea50bab2530
--- /dev/null
+++ b/libevent/CMakeLists.txt
@@ -0,0 +1,80 @@
+# Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+# Common defines and includes
+IF(WITH_INNODB_MEMCACHED AND UNIX)
+
+ADD_DEFINITIONS(-DHAVE_CONFIG_H)
+INCLUDE_DIRECTORIES(${LIBEVENT_INCLUDE_DIR}/compat/sys
+ ${LIBEVENT_INCLUDE_DIR})
+
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CMAKE_SHARED_LIBRARY_C_FLAGS} -I${LIBEVENT_INCLUDE_DIR}")
+
+SET(LIBEVENT_CORE_SOURCES
+ event.h
+ event-internal.h
+ evutil.h
+ log.h
+ event.c
+ buffer.c
+ evbuffer.c
+ log.c
+ evutil.c)
+
+SET(LIBEVENT_EXTRA_SOURCES
+ event_tagging.c
+ http.c
+ evhttp.h
+ http-internal.h
+ evdns.h
+ evrpc.c
+ evrpc.h
+ evrpc-internal.h
+ strlcpy.c
+ strlcpy-internal.h)
+
+IF(HAVE_SIGNAL_H)
+ SET(LIBEVENT_SIGNAL_SOURCES signal.c)
+ENDIF()
+
+IF(HAVE_POLL_H)
+ SET(LIBEVENT_POLL_SOURCES poll.c)
+ENDIF()
+
+IF(HAVE_SELECT)
+ SET(LIBEVENT_SELECT_SOURCE select.c)
+ENDIF()
+
+IF(HAVE_SYS_EPOLL_H)
+ SET(LIBEVENT_EPOLL_SOURCES epoll.c epoll_sub.c)
+ENDIF()
+
+IF(HAVE_SYS_DEVPOLL_H)
+ SET(LIBEVENT_DEVPOLL_SOURCES devpoll.c)
+ENDIF()
+
+IF(HAVE_EVENT_PORTS)
+ SET(LIBEVENT_EVPORT_SOURCES evport.c)
+ENDIF()
+
+IF(HAVE_WORKING_KQUEUE)
+ SET(LIBEVENT_KQUEUE_SOURCES kqueue.c)
+ENDIF()
+
+ADD_LIBRARY(event_share SHARED ${LIBEVENT_CORE_SOURCES} ${LIBEVENT_EXTRA_SOURCES} ${LIBEVENT_SIGNAL_SOURCES} ${LIBEVENT_POLL_SOURCES} ${LIBEVENT_SELECT_SOURCE} ${LIBEVENT_EPOLL_SOURCES} ${LIBEVENT_DEVPOLL_SOURCES} ${LIBEVENT_EVPORT_SOURCES} ${LIBEVENT_KQUEUE_SOURCES})
+
+ADD_LIBRARY(event STATIC ${LIBEVENT_CORE_SOURCES} ${LIBEVENT_EXTRA_SOURCES} ${LIBEVENT_SIGNAL_SOURCES} ${LIBEVENT_POLL_SOURCES} ${LIBEVENT_SELECT_SOURCE} ${LIBEVENT_EPOLL_SOURCES} ${LIBEVENT_DEVPOLL_SOURCES} ${LIBEVENT_EVPORT_SOURCES} ${LIBEVENT_KQUEUE_SOURCES})
+ENDIF()
+
diff --git a/libevent/ChangeLog b/libevent/ChangeLog
new file mode 100644
index 00000000000..2435c1f15d8
--- /dev/null
+++ b/libevent/ChangeLog
@@ -0,0 +1,190 @@
+Changes in 1.4.12-stable:
+ o Try to contain degree of failure when running on a win32 version so heavily firewalled that we can't fake a socketpair.
+ o Fix an obscure timing-dependent, allocator-dependent crash in the evdns code.
+ o Use __VA_ARGS__ syntax for varargs macros in event_rpcgen when compiler is not GCC.
+ o Activate fd events in a pseudorandom order with O(N) backends, so that we don't systematically favor low fds (select) or earlier-added fds (poll, win32).
+ o Fix another pair of fencepost bugs in epoll.c. [Patch from Adam Langley.]
+ o Do not break evdns connections to nameservers when our IP changes.
+ o Set truncated flag correctly in evdns server replies.
+ o Disable strict aliasing with GCC: our code is not compliant with it.
+
+Changes in 1.4.11-stable:
+ o Fix a bug when removing a timeout from the heap. [Patch from Marko Kreen]
+ o Remove the limit on size of HTTP headers by removing static buffers.
+ o Fix a nasty dangling pointer bug in epoll.c that could occur after epoll_recalc(). [Patch from Kevin Springborn]
+ o Distribute Win32-Code/event-config.h, not ./event-config.h
+
+Changes in 1.4.10-stable:
+ o clean up buffered http connection data on reset; reported by Brian O'Kelley
+ o bug fix and potential race condition in signal handling; from Alexander Drozdov
+ o rename the Solaris event ports backend to evport
+ o support compilation on Haiku
+ o fix signal processing when a signal callback delivers a signal; from Alexander Drozdov
+ o const-ify some arguments to evdns functions.
+ o off-by-one error in epoll_recalc; reported by Victor Goya
+ o include Doxyfile in tar ball; from Jeff Garzik
+ o correctly parse queries with encoded \r, \n or + characters
+
+Changes in 1.4.9-stable:
+ o event_add would not return error for some backends; from Dean McNamee
+ o Clear the timer cache on entering the event loop; reported by Victor Chang
+ o Only bind the socket on connect when a local address has been provided; reported by Alejo Sanchez
+ o Allow setting of local port for evhttp connections to support millions of connections from a single system; from Richard Jones.
+ o Clear the timer cache when leaving the event loop; reported by Robin Haberkorn
+ o Fix a typo in setting the global event base; reported by lance.
+ o Fix a memory leak when reading multi-line headers
+ o Fix a memory leak by not running explicit close detection for server connections
+
+Changes in 1.4.8-stable:
+ o Match the query in DNS replies to the query in the request; from Vsevolod Stakhov.
+ o Fix a merge problem in which name_from_addr returned pointers to the stack; found by Jiang Hong.
+ o Do not remove Accept-Encoding header
+
+Changes in 1.4.7-stable:
+ o Fix a bug where headers arriving in multiple packets were not parsed; fix from Jiang Hong; test by me.
+
+Changes in 1.4.6-stable:
+ o evutil.h now includes <stdarg.h> directly
+ o switch all uses of [v]snprintf over to evutil
+ o Correct handling of trailing headers in chunked replies; from Scott Lamb.
+ o Support multi-line HTTP headers; based on a patch from Moshe Litvin
+ o Reject negative Content-Length headers; anonymous bug report
+ o Detect CLOCK_MONOTONIC at runtime for evdns; anonymous bug report
+ o Fix a bug where deleting signals with the kqueue backend would cause subsequent adds to fail
+ o Support multiple events listening on the same signal; make signals regular events that go on the same event queue; problem report by Alexander Drozdov.
+ o Deal with evbuffer_read() returning -1 on EINTR|EAGAIN; from Adam Langley.
+ o Fix a bug in which the DNS server would incorrectly set the type of a cname reply to a.
+ o Fix a bug where setting the timeout on a bufferevent would take not effect if the event was already pending.
+ o Fix a memory leak when using signals for some event bases; reported by Alexander Drozdov.
+ o Add libevent.vcproj file to distribution to help with Windows build.
+ o Fix a problem with epoll() and reinit; problem report by Alexander Drozdov.
+ o Fix off-by-one errors in devpoll; from Ian Bell
+ o Make event_add not change any state if it fails; reported by Ian Bell.
+ o Do not warn on accept when errno is either EAGAIN or EINTR
+
+Changes in 1.4.5-stable:
+ o Fix connection keep-alive behavior for HTTP/1.0
+ o Fix use of freed memory in event_reinit; pointed out by Peter Postma
+ o Constify struct timeval * where possible; pointed out by Forest Wilkinson
+ o allow min_heap_erase to be called on removed members; from liusifan.
+ o Rename INPUT and OUTPUT to EVRPC_INPUT and EVRPC_OUTPUT. Retain INPUT/OUTPUT aliases on on-win32 platforms for backwards compatibility.
+ o Do not use SO_REUSEADDR when connecting
+ o Fix Windows build
+ o Fix a bug in event_rpcgen when generated fixed-sized entries
+
+Changes in 1.4.4-stable:
+ o Correct the documentation on buffer printf functions.
+ o Don't warn on unimplemented epoll_create(): this isn't a problem, just a reason to fall back to poll or select.
+ o Correctly handle timeouts larger than 35 minutes on Linux with epoll.c. This is probably a kernel defect, but we'll have to support old kernels anyway even if it gets fixed.
+ o Fix a potential stack corruption bug in tagging on 64-bit CPUs.
+ o expose bufferevent_setwatermark via header files and fix high watermark on read
+ o fix a bug in bufferevent read water marks and add a test for them
+ o introduce bufferevent_setcb and bufferevent_setfd to allow better manipulation of bufferevents
+ o use libevent's internal timercmp on all platforms, to avoid bugs on old platforms where timercmp(a,b,<=) is buggy.
+ o reduce system calls for getting current time by caching it.
+ o fix evhttp_bind_socket() so that multiple sockets can be bound by the same http server.
+ o Build test directory correctly with CPPFLAGS set.
+ o Fix build under Visual C++ 2005.
+ o Expose evhttp_accept_socket() API.
+ o Merge windows gettimeofday() replacement into a new evutil_gettimeofday() function.
+ o Fix autoconf script behavior on IRIX.
+ o Make sure winsock2.h include always comes before windows.h include.
+
+Changes in 1.4.3-stable:
+ o include Content-Length in reply for HTTP/1.0 requests with keep-alive
+ o Patch from Tani Hosokawa: make some functions in http.c threadsafe.
+ o Do not free the kqop file descriptor in other processes, also allow it to be 0; from Andrei Nigmatulin
+ o make event_rpcgen.py generate code include event-config.h; reported by Sam Banks.
+ o make event methods static so that they are not exported; from Andrei Nigmatulin
+ o make RPC replies use application/octet-stream as mime type
+ o do not delete uninitialized timeout event in evdns
+
+Changes in 1.4.2-rc:
+ o remove pending timeouts on event_base_free()
+ o also check EAGAIN for Solaris' event ports; from W.C.A. Wijngaards
+ o devpoll and evport need reinit; tested by W.C.A Wijngaards
+ o event_base_get_method; from Springande Ulv
+ o Send CRLF after each chunk in HTTP output, for compliance with RFC2626. Patch from "propanbutan". Fixes bug 1894184.
+ o Add a int64_t parsing function, with unit tests, so we can apply Scott Lamb's fix to allow large HTTP values.
+ o Use a 64-bit field to hold HTTP content-lengths. Patch from Scott Lamb.
+ o Allow regression code to build even without Python installed
+ o remove NDEBUG ifdefs from evdns.c
+ o update documentation of event_loop and event_base_loop; from Tani Hosokawa.
+ o detect integer types properly on platforms without stdint.h
+ o Remove "AM_MAINTAINER_MODE" declaration in configure.in: now makefiles and configure should get re-generated automatically when Makefile.am or configure.in chanes.
+ o do not insert event into list when evsel->add fails
+
+Changes in 1.4.1-beta:
+ o free minheap on event_base_free(); from Christopher Layne
+ o debug cleanups in signal.c; from Christopher Layne
+ o provide event_base_new() that does not set the current_base global
+ o bufferevent_write now uses a const source argument; report from Charles Kerr
+ o better documentation for event_base_loopexit; from Scott Lamb.
+ o Make kqueue have the same behavior as other backends when a signal is caught between event_add() and event_loop(). Previously, it would catch and ignore such signals.
+ o Make kqueue restore signal handlers correctly when event_del() is called.
+ o provide event_reinit() to reintialize an event_base after fork
+ o small improvements to evhttp documentation
+ o always generate Date and Content-Length headers for HTTP/1.1 replies
+ o set the correct event base for HTTP close events
+ o New function, event_{base_}loopbreak. Like event_loopexit, it makes an event loop stop executing and return. Unlike event_loopexit, it keeps subsequent pending events from getting executed. Patch from Scott Lamb
+ o Removed obsoleted recalc code
+ o pull setters/getters out of RPC structures into a base class to which we just need to store a pointer; this reduces the memory footprint of these structures.
+ o fix a bug with event_rpcgen for integers
+ o move EV_PERSIST handling out of the event backends
+ o support for 32-bit tag numbers in rpc structures; this is wire compatible, but changes the API slightly.
+ o prefix {encode,decode}_tag functions with evtag to avoid collisions
+ o Correctly handle DNS replies with no answers set (Fixes bug 1846282)
+ o The configure script now takes an --enable-gcc-warnigns option that turns on many optional gcc warnings. (Nick has been building with these for a while, but they might be useful to other developers.)
+ o When building with GCC, use the "format" attribute to verify type correctness of calls to printf-like functions.
+ o removed linger from http server socket; reported by Ilya Martynov
+ o allow \r or \n individually to separate HTTP headers instead of the standard "\r\n"; from Charles Kerr.
+ o demote most http warnings to debug messages
+ o Fix Solaris compilation; from Magne Mahre
+ o Add a "Date" header to HTTP responses, as required by HTTP 1.1.
+ o Support specifying the local address of an evhttp_connection using set_local_address
+ o Fix a memory leak in which failed HTTP connections would not free the request object
+ o Make adding of array members in event_rpcgen more efficient, but doubling memory allocation
+ o Fix a memory leak in the DNS server
+ o Fix compilation when DNS_USE_OPENSSL_FOR_ID is enabled
+ o Fix buffer size and string generation in evdns_resolve_reverse_ipv6().
+ o Respond to nonstandard DNS queries with "NOTIMPL" rather than by ignoring them.
+ o In DNS responses, the CD flag should be preserved, not the TC flag.
+ o Fix http.c to compile properly with USE_DEBUG; from Christopher Layne
+ o Handle NULL timeouts correctly on Solaris; from Trond Norbye
+ o Recalculate pending events properly when reallocating event array on Solaris; from Trond Norbye
+ o Add Doxygen documentation to header files; from Mark Heily
+ o Add a evdns_set_transaction_id_fn() function to override the default
+ transaction ID generation code.
+ o Add an evutil module (with header evutil.h) to implement our standard cross-platform hacks, on the theory that somebody else would like to use them too.
+ o Fix signals implementation on windows.
+ o Fix http module on windows to close sockets properly.
+ o Make autogen.sh script run correctly on systems where /bin/sh isn't bash. (Patch from Trond Norbye, rewritten by Hagne Mahre and then Hannah Schroeter.)
+ o Skip calling gettime() in timeout_process if we are not in fact waiting for any events. (Patch from Trond Norbye)
+ o Make test subdirectory compile under mingw.
+ o Fix win32 buffer.c behavior so that it is correct for sockets (which do not like ReadFile and WriteFile).
+ o Make the test.sh script run unit tests for the evpoll method.
+ o Make the entire evdns.h header enclosed in "extern C" as appropriate.
+ o Fix implementation of strsep on platforms that lack it
+ o Fix implementation of getaddrinfo on platforms that lack it; mainly, this will make Windows http.c work better. Original patch by Lubomir Marinov.
+ o Fix evport implementation: port_disassociate called on unassociated events resulting in bogus errors; more efficient memory management; from Trond Norbye and Prakash Sangappa
+ o support for hooks on rpc input and output; can be used to implement rpc independent processing such as compression or authentication.
+ o use a min heap instead of a red-black tree for timeouts; as a result finding the min is a O(1) operation now; from Maxim Yegorushkin
+ o associate an event base with an rpc pool
+ o added two additional libraries: libevent_core and libevent_extra in addition to the regular libevent. libevent_core contains only the event core whereas libevent_extra contains dns, http and rpc support
+ o Begin using libtool's library versioning support correctly. If we don't mess up, this will more or less guarantee binaries linked against old versions of libevent continue working when we make changes to libevent that do not break backward compatibility.
+ o Fix evhttp.h compilation when TAILQ_ENTRY is not defined.
+ o Small code cleanups in epoll_dispatch().
+ o Increase the maximum number of addresses read from a packet in evdns to 32.
+ o Remove support for the rtsig method: it hasn't compiled for a while, and nobody seems to miss it very much. Let us know if there's a good reason to put it back in.
+ o Rename the "class" field in evdns_server_request to dns_question_class, so that it won't break compilation under C++. Use a macro so that old code won't break. Mark the macro as deprecated.
+ o Fix DNS unit tests so that having a DNS server with broken IPv6 support is no longer cause for aborting the unit tests.
+ o Make event_base_free() succeed even if there are pending non-internal events on a base. This may still leak memory and fds, but at least it no longer crashes.
+ o Post-process the config.h file into a new, installed event-config.h file that we can install, and whose macros will be safe to include in header files.
+ o Remove the long-deprecated acconfig.h file.
+ o Do not require #include <sys/types.h> before #include <event.h>.
+ o Add new evutil_timer* functions to wrap (or replace) the regular timeval manipulation functions.
+ o Fix many build issues when using the Microsoft C compiler.
+ o Remove a bash-ism in autogen.sh
+ o When calling event_del on a signal, restore the signal handler's previous value rather than setting it to SIG_DFL. Patch from Christopher Layne.
+ o Make the logic for active events work better with internal events; patch from Christopher Layne.
+ o We do not need to specially remove a timeout before calling event_del; patch from Christopher Layne.
diff --git a/libevent/Doxyfile b/libevent/Doxyfile
new file mode 100644
index 00000000000..77f6de89b46
--- /dev/null
+++ b/libevent/Doxyfile
@@ -0,0 +1,230 @@
+# Doxyfile 1.5.1
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+# TAG = value [value, ...]
+# For lists items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME = libevent
+
+# Place all output under 'doxygen/'
+
+OUTPUT_DIRECTORY = doxygen/
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like the Qt-style comments (thus requiring an
+# explicit @brief command for a brief description.
+
+JAVADOC_AUTOBRIEF = YES
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT = event.h evdns.h evhttp.h evrpc.h
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN = YES
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED = TAILQ_ENTRY RB_ENTRY _EVENT_DEFINED_TQENTRY
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS = YES
diff --git a/libevent/Makefile.am b/libevent/Makefile.am
new file mode 100644
index 00000000000..8d9d7520373
--- /dev/null
+++ b/libevent/Makefile.am
@@ -0,0 +1,124 @@
+AUTOMAKE_OPTIONS = foreign no-dependencies
+
+# This is the point release for libevent. It shouldn't include any
+# a/b/c/d/e notations.
+RELEASE = 1.4
+
+# This is the version info for the libevent binary API. It has three
+# numbers:
+# Current -- the number of the binary API that we're implementing
+# Revision -- which iteration of the implementation of the binary
+# API are we supplying?
+# Age -- How many previous binary API versions do we also
+# support?
+#
+# If we release a new version that does not change the binary API,
+# increment Revision.
+#
+# If we release a new version that changes the binary API, but does
+# not break programs compiled against the old binary API, increment
+# Current and Age. Set Revision to 0, since this is the first
+# implementation of the new API.
+#
+# Otherwise, we're changing the binary API and breaking bakward
+# compatibility with old binaries. Increment Current. Set Age to 0,
+# since we're backward compatible with no previous APIs. Set Revision
+# to 0 too.
+
+# History:
+# Libevent 1.4.1 was 2:0:0
+# Libevent 1.4.2 should be 3:0:0
+# Libevent 1.4.5 is 3:0:1 (we forgot to increment in the past)
+VERSION_INFO = 3:3:1
+
+bin_SCRIPTS = event_rpcgen.py
+
+EXTRA_DIST = autogen.sh event.h event-internal.h log.h evsignal.h evdns.3 \
+ evrpc.h evrpc-internal.h min_heap.h \
+ event.3 \
+ Doxyfile \
+ kqueue.c epoll_sub.c epoll.c select.c poll.c signal.c \
+ evport.c devpoll.c event_rpcgen.py \
+ sample/Makefile.am sample/Makefile.in sample/event-test.c \
+ sample/signal-test.c sample/time-test.c \
+ test/Makefile.am test/Makefile.in test/bench.c test/regress.c \
+ test/test-eof.c test/test-weof.c test/test-time.c \
+ test/test-init.c test/test.sh \
+ compat/sys/queue.h compat/sys/_time.h \
+ WIN32-Code/config.h \
+ WIN32-Code/event-config.h \
+ WIN32-Code/win32.c \
+ WIN32-Code/tree.h \
+ WIN32-Prj/event_test/event_test.dsp \
+ WIN32-Prj/event_test/test.txt WIN32-Prj/libevent.dsp \
+ WIN32-Prj/libevent.dsw WIN32-Prj/signal_test/signal_test.dsp \
+ WIN32-Prj/time_test/time_test.dsp WIN32-Prj/regress/regress.vcproj \
+ WIN32-Prj/libevent.sln WIN32-Prj/libevent.vcproj
+
+lib_LTLIBRARIES = libevent.la libevent_core.la libevent_extra.la
+
+if BUILD_WIN32
+
+SUBDIRS = . sample
+SYS_LIBS = -lws2_32
+SYS_SRC = WIN32-Code/win32.c
+SYS_INCLUDES = -IWIN32-Code
+
+else
+
+SUBDIRS = . sample test
+SYS_LIBS =
+SYS_SRC =
+SYS_INCLUDES =
+
+endif
+
+BUILT_SOURCES = event-config.h
+
+event-config.h: config.h
+ echo '/* event-config.h' > $@
+ echo ' * Generated by autoconf; post-processed by libevent.' >> $@
+ echo ' * Do not edit this file.' >> $@
+ echo ' * Do not rely on macros in this file existing in later versions.'>> $@
+ echo ' */' >> $@
+ echo '#ifndef _EVENT_CONFIG_H_' >> $@
+ echo '#define _EVENT_CONFIG_H_' >> $@
+
+ sed -e 's/#define /#define _EVENT_/' \
+ -e 's/#undef /#undef _EVENT_/' \
+ -e 's/#ifndef /#ifndef _EVENT_/' < config.h >> $@
+ echo "#endif" >> $@
+
+CORE_SRC = event.c buffer.c evbuffer.c log.c evutil.c $(SYS_SRC)
+EXTRA_SRC = event_tagging.c http.c evhttp.h http-internal.h evdns.c \
+ evdns.h evrpc.c evrpc.h evrpc-internal.h \
+ strlcpy.c strlcpy-internal.h strlcpy-internal.h
+
+libevent_la_SOURCES = $(CORE_SRC) $(EXTRA_SRC)
+libevent_la_LIBADD = @LTLIBOBJS@ $(SYS_LIBS)
+libevent_la_LDFLAGS = -release $(RELEASE) -version-info $(VERSION_INFO)
+
+libevent_core_la_SOURCES = $(CORE_SRC)
+libevent_core_la_LIBADD = @LTLIBOBJS@ $(SYS_LIBS)
+libevent_core_la_LDFLAGS = -release $(RELEASE) -version-info $(VERSION_INFO)
+
+libevent_extra_la_SOURCES = $(EXTRA_SRC)
+libevent_extra_la_LIBADD = @LTLIBOBJS@ $(SYS_LIBS)
+libevent_extra_la_LDFLAGS = -release $(RELEASE) -version-info $(VERSION_INFO)
+
+include_HEADERS = event.h evhttp.h evdns.h evrpc.h evutil.h
+
+nodist_include_HEADERS = event-config.h
+
+INCLUDES = -I$(srcdir)/compat $(SYS_INCLUDES)
+
+man_MANS = event.3 evdns.3
+
+verify: libevent.la
+ cd test && make verify
+
+doxygen: FORCE
+ doxygen $(srcdir)/Doxyfile
+FORCE:
+
+DISTCLEANFILES = *~ event-config.h
diff --git a/libevent/README b/libevent/README
new file mode 100644
index 00000000000..b0650392ed4
--- /dev/null
+++ b/libevent/README
@@ -0,0 +1,57 @@
+To build libevent, type
+
+$ ./configure && make
+
+ (If you got libevent from the subversion repository, you will
+ first need to run the included "autogen.sh" script in order to
+ generate the configure script.)
+
+Install as root via
+
+# make install
+
+You can run the regression tests by
+
+$ make verify
+
+Before, reporting any problems, please run the regression tests.
+
+To enable the low-level tracing build the library as:
+
+CFLAGS=-DUSE_DEBUG ./configure [...]
+
+Acknowledgements:
+-----------------
+
+The following people have helped with suggestions, ideas, code or
+fixing bugs:
+
+ Alejo
+ Weston Andros Adamson
+ William Ahern
+ Stas Bekman
+ Andrew Danforth
+ Mike Davis
+ Shie Erlich
+ Alexander von Gernler
+ Artur Grabowski
+ Aaron Hopkins
+ Claudio Jeker
+ Scott Lamb
+ Adam Langley
+ Philip Lewis
+ David Libenzi
+ Nick Mathewson
+ Andrey Matveev
+ Richard Nyberg
+ Jon Oberheide
+ Phil Oleson
+ Dave Pacheco
+ Tassilo von Parseval
+ Pierre Phaneuf
+ Jon Poland
+ Bert JW Regeer
+ Dug Song
+ Taral
+
+If I have forgotten your name, please contact me.
diff --git a/libevent/WIN32-Code/event-config.h b/libevent/WIN32-Code/event-config.h
new file mode 100644
index 00000000000..3059080274b
--- /dev/null
+++ b/libevent/WIN32-Code/event-config.h
@@ -0,0 +1,244 @@
+/* event-config.h
+ * Generated by autoconf; post-processed by libevent.
+ * Do not edit this file.
+ * Do not rely on macros in this file existing in later versions.
+ */
+#ifndef _EVENT_CONFIG_H_
+#define _EVENT_CONFIG_H_
+/* config.h. Generated by configure. */
+/* config.h.in. Generated from configure.in by autoheader. */
+
+/* Define if clock_gettime is available in libc */
+/* #undef _EVENT_DNS_USE_CPU_CLOCK_FOR_ID */
+
+/* Define is no secure id variant is available */
+#define _EVENT_DNS_USE_GETTIMEOFDAY_FOR_ID 1
+
+/* Define to 1 if you have the `clock_gettime' function. */
+/* #undef _EVENT_HAVE_CLOCK_GETTIME */
+
+/* Define if /dev/poll is available */
+/* #undef _EVENT_HAVE_DEVPOLL */
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+/* #undef _EVENT_HAVE_DLFCN_H */
+
+/* Define if your system supports the epoll system calls */
+/* #undef _EVENT_HAVE_EPOLL */
+
+/* Define to 1 if you have the `epoll_ctl' function. */
+/* #undef _EVENT_HAVE_EPOLL_CTL */
+
+/* Define if your system supports event ports */
+/* #undef _EVENT_HAVE_EVENT_PORTS */
+
+/* Define to 1 if you have the `fcntl' function. */
+/* #undef _EVENT_HAVE_FCNTL */
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#define _EVENT_HAVE_FCNTL_H 1
+
+/* Define to 1 if you have the `getaddrinfo' function. */
+/* #undef _EVENT_HAVE_GETADDRINFO */
+
+/* Define to 1 if you have the `getnameinfo' function. */
+/* #undef _EVENT_HAVE_GETNAMEINFO */
+
+/* Define to 1 if you have the `gettimeofday' function. */
+/* #define _EVENT_HAVE_GETTIMEOFDAY 1 */
+
+/* Define to 1 if you have the `inet_ntop' function. */
+/* #undef _EVENT_HAVE_INET_NTOP */
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+/* #undef _EVENT_HAVE_INTTYPES_H 1 */
+
+/* Define to 1 if you have the `kqueue' function. */
+/* #undef _EVENT_HAVE_KQUEUE */
+
+/* Define to 1 if you have the `nsl' library (-lnsl). */
+/* #undef _EVENT_HAVE_LIBNSL */
+
+/* Define to 1 if you have the `resolv' library (-lresolv). */
+/* #undef _EVENT_HAVE_LIBRESOLV */
+
+/* Define to 1 if you have the `rt' library (-lrt). */
+/* #undef _EVENT_HAVE_LIBRT */
+
+/* Define to 1 if you have the `socket' library (-lsocket). */
+/* #undef _EVENT_HAVE_LIBSOCKET */
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define _EVENT_HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <netinet/in6.h> header file. */
+/* #undef _EVENT_HAVE_NETINET_IN6_H */
+
+/* Define to 1 if you have the `poll' function. */
+/* #undef _EVENT_HAVE_POLL */
+
+/* Define to 1 if you have the <poll.h> header file. */
+/* #undef _EVENT_HAVE_POLL_H */
+
+/* Define to 1 if you have the `port_create' function. */
+/* #undef _EVENT_HAVE_PORT_CREATE */
+
+/* Define to 1 if you have the <port.h> header file. */
+/* #undef _EVENT_HAVE_PORT_H */
+
+/* Define to 1 if you have the `select' function. */
+/* #undef _EVENT_HAVE_SELECT */
+
+/* Define if F_SETFD is defined in <fcntl.h> */
+/* #undef _EVENT_HAVE_SETFD */
+
+/* Define to 1 if you have the `sigaction' function. */
+/* #undef _EVENT_HAVE_SIGACTION */
+
+/* Define to 1 if you have the `signal' function. */
+#define _EVENT_HAVE_SIGNAL 1
+
+/* Define to 1 if you have the <signal.h> header file. */
+#define _EVENT_HAVE_SIGNAL_H 1
+
+/* Define to 1 if you have the <stdarg.h> header file. */
+#define _EVENT_HAVE_STDARG_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+/* #define _EVENT_HAVE_STDINT_H 1 */
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define _EVENT_HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define _EVENT_HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define _EVENT_HAVE_STRING_H 1
+
+/* Define to 1 if you have the `strlcpy' function. */
+/* #undef _EVENT_HAVE_STRLCPY */
+
+/* Define to 1 if you have the `strsep' function. */
+/* #undef _EVENT_HAVE_STRSEP */
+
+/* Define to 1 if you have the `strtok_r' function. */
+/* #undef _EVENT_HAVE_STRTOK_R */
+
+/* Define to 1 if the system has the type `struct in6_addr'. */
+#define _EVENT_HAVE_STRUCT_IN6_ADDR 1
+
+/* Define to 1 if you have the <sys/devpoll.h> header file. */
+/* #undef _EVENT_HAVE_SYS_DEVPOLL_H */
+
+/* Define to 1 if you have the <sys/epoll.h> header file. */
+/* #undef _EVENT_HAVE_SYS_EPOLL_H */
+
+/* Define to 1 if you have the <sys/event.h> header file. */
+/* #undef _EVENT_HAVE_SYS_EVENT_H */
+
+/* Define to 1 if you have the <sys/ioctl.h> header file. */
+/* #undef _EVENT_HAVE_SYS_IOCTL_H */
+
+/* Define to 1 if you have the <sys/queue.h> header file. */
+/* #undef _EVENT_HAVE_SYS_QUEUE_H */
+
+/* Define to 1 if you have the <sys/select.h> header file. */
+/* #undef _EVENT_HAVE_SYS_SELECT_H */
+
+/* Define to 1 if you have the <sys/socket.h> header file. */
+/* #undef _EVENT_HAVE_SYS_SOCKET_H */
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define _EVENT_HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+/* #define _EVENT_HAVE_SYS_TIME_H 1 */
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+/* #define _EVENT_HAVE_SYS_TYPES_H 1 */
+
+/* Define if TAILQ_FOREACH is defined in <sys/queue.h> */
+/* #undef _EVENT_HAVE_TAILQFOREACH */
+
+/* Define if timeradd is defined in <sys/time.h> */
+/* #undef _EVENT_HAVE_TIMERADD */
+
+/* Define if timerclear is defined in <sys/time.h> */
+/* #define _EVENT_HAVE_TIMERCLEAR 1 */
+
+/* Define if timercmp is defined in <sys/time.h> */
+#define _EVENT_HAVE_TIMERCMP 1
+
+/* Define if timerisset is defined in <sys/time.h> */
+#define _EVENT_HAVE_TIMERISSET 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+/* #define _EVENT_HAVE_UNISTD_H 1 */
+
+/* Define to 1 if you have the `vasprintf' function. */
+/* #undef _EVENT_HAVE_VASPRINTF */
+
+/* Define if kqueue works correctly with pipes */
+/* #undef _EVENT_HAVE_WORKING_KQUEUE */
+
+/* Name of package */
+#define _EVENT_PACKAGE "libevent"
+
+/* Define to the address where bug reports for this package should be sent. */
+#define _EVENT_PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define _EVENT_PACKAGE_NAME ""
+
+/* Define to the full name and version of this package. */
+#define _EVENT_PACKAGE_STRING ""
+
+/* Define to the one symbol short name of this package. */
+#define _EVENT_PACKAGE_TARNAME ""
+
+/* Define to the version of this package. */
+#define _EVENT_PACKAGE_VERSION ""
+
+/* Define to 1 if you have the ANSI C header files. */
+#define _EVENT_STDC_HEADERS 1
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#define _EVENT_TIME_WITH_SYS_TIME 1
+
+/* Version number of package */
+#define _EVENT_VERSION "1.3.99-trunk"
+
+/* Define to appropriate substitue if compiler doesnt have __func__ */
+/* #undef _EVENT___func__ */
+
+/* Define to empty if `const' does not conform to ANSI C. */
+/* #undef _EVENT_const */
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+ calls it, or to nothing if 'inline' is not supported under any name. */
+#ifndef _EVENT___cplusplus
+#define _EVENT_inline __inline
+#endif
+
+/* Define to `int' if <sys/types.h> does not define. */
+/* #undef _EVENT_pid_t */
+
+/* Define to `unsigned' if <sys/types.h> does not define. */
+/* #undef _EVENT_size_t */
+
+/* Define to unsigned int if you dont have it */
+#define _EVENT_socklen_t unsigned int
+
+/* Define to `unsigned short' if <sys/types.h> does not define. */
+/* #undef _EVENT_uint16_t */
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+/* #undef _EVENT_uint32_t */
+
+/* Define to `unsigned long long' if <sys/types.h> does not define. */
+/* #undef _EVENT_uint64_t */
+
+/* Define to `unsigned char' if <sys/types.h> does not define. */
+/* #undef _EVENT_uint8_t */
+#endif
diff --git a/libevent/WIN32-Code/misc.c b/libevent/WIN32-Code/misc.c
new file mode 100644
index 00000000000..371e192beae
--- /dev/null
+++ b/libevent/WIN32-Code/misc.c
@@ -0,0 +1,93 @@
+#include <stdio.h>
+#include <string.h>
+#include <windows.h>
+#include <sys/timeb.h>
+#include <time.h>
+
+#ifdef __GNUC__
+/*our prototypes for timeval and timezone are in here, just in case the above
+ headers don't have them*/
+#include "misc.h"
+#endif
+
+/****************************************************************************
+ *
+ * Function: gettimeofday(struct timeval *, struct timezone *)
+ *
+ * Purpose: Get current time of day.
+ *
+ * Arguments: tv => Place to store the curent time of day.
+ * tz => Ignored.
+ *
+ * Returns: 0 => Success.
+ *
+ ****************************************************************************/
+
+#ifndef HAVE_GETTIMEOFDAY
+int gettimeofday(struct timeval *tv, struct timezone *tz) {
+ struct _timeb tb;
+
+ if(tv == NULL)
+ return -1;
+
+ _ftime(&tb);
+ tv->tv_sec = (long) tb.time;
+ tv->tv_usec = ((int) tb.millitm) * 1000;
+ return 0;
+}
+#endif
+
+#if 0
+int
+win_read(int fd, void *buf, unsigned int length)
+{
+ DWORD dwBytesRead;
+ int res = ReadFile((HANDLE) fd, buf, length, &dwBytesRead, NULL);
+ if (res == 0) {
+ DWORD error = GetLastError();
+ if (error == ERROR_NO_DATA)
+ return (0);
+ return (-1);
+ } else
+ return (dwBytesRead);
+}
+
+int
+win_write(int fd, void *buf, unsigned int length)
+{
+ DWORD dwBytesWritten;
+ int res = WriteFile((HANDLE) fd, buf, length, &dwBytesWritten, NULL);
+ if (res == 0) {
+ DWORD error = GetLastError();
+ if (error == ERROR_NO_DATA)
+ return (0);
+ return (-1);
+ } else
+ return (dwBytesWritten);
+}
+
+int
+socketpair(int d, int type, int protocol, int *sv)
+{
+ static int count;
+ char buf[64];
+ HANDLE fd;
+ DWORD dwMode;
+ sprintf(buf, "\\\\.\\pipe\\levent-%d", count++);
+ /* Create a duplex pipe which will behave like a socket pair */
+ fd = CreateNamedPipe(buf, PIPE_ACCESS_DUPLEX, PIPE_TYPE_BYTE | PIPE_NOWAIT,
+ PIPE_UNLIMITED_INSTANCES, 4096, 4096, 0, NULL);
+ if (fd == INVALID_HANDLE_VALUE)
+ return (-1);
+ sv[0] = (int)fd;
+
+ fd = CreateFile(buf, GENERIC_READ|GENERIC_WRITE, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+ if (fd == INVALID_HANDLE_VALUE)
+ return (-1);
+ dwMode = PIPE_NOWAIT;
+ SetNamedPipeHandleState(fd, &dwMode, NULL, NULL);
+ sv[1] = (int)fd;
+
+ return (0);
+}
+#endif
diff --git a/libevent/WIN32-Code/misc.h b/libevent/WIN32-Code/misc.h
new file mode 100644
index 00000000000..aced574687c
--- /dev/null
+++ b/libevent/WIN32-Code/misc.h
@@ -0,0 +1,11 @@
+#ifndef MISC_H
+#define MISC_H
+
+struct timezone;
+struct timeval;
+
+#ifndef HAVE_GETTIMEOFDAY
+int gettimeofday(struct timeval *,struct timezone *);
+#endif
+
+#endif
diff --git a/libevent/WIN32-Code/tree.h b/libevent/WIN32-Code/tree.h
new file mode 100644
index 00000000000..79e8d91f0eb
--- /dev/null
+++ b/libevent/WIN32-Code/tree.h
@@ -0,0 +1,1354 @@
+/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */
+/*
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_TREE_H_
+#define _SYS_TREE_H_
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure. Every operation
+ * on the tree causes a splay to happen. The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree. On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n). The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute. It fulfills a set of conditions:
+ * - every search path from the root to a leaf consists of the
+ * same number of black nodes,
+ * - each red node (except for the root) has a black parent,
+ * - each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type) \
+struct name { \
+ struct type *sph_root; /* root of the tree */ \
+}
+
+#define SPLAY_INITIALIZER(root) \
+ { NULL }
+
+#define SPLAY_INIT(root) do { \
+ (root)->sph_root = NULL; \
+} while (0)
+
+#define SPLAY_ENTRY(type) \
+struct { \
+ struct type *spe_left; /* left element */ \
+ struct type *spe_right; /* right element */ \
+}
+
+#define SPLAY_LEFT(elm, field) (elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right
+#define SPLAY_ROOT(head) (head)->sph_root
+#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \
+ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \
+ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \
+ (head)->sph_root = tmp; \
+} while (0)
+
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do { \
+ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \
+ SPLAY_LEFT(tmp, field) = (head)->sph_root; \
+ (head)->sph_root = tmp; \
+} while (0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do { \
+ SPLAY_LEFT(tmp, field) = (head)->sph_root; \
+ tmp = (head)->sph_root; \
+ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \
+} while (0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do { \
+ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \
+ tmp = (head)->sph_root; \
+ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \
+} while (0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do { \
+ SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \
+ SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \
+ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \
+} while (0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp) \
+void name##_SPLAY(struct name *, struct type *); \
+void name##_SPLAY_MINMAX(struct name *, int); \
+struct type *name##_SPLAY_INSERT(struct name *, struct type *); \
+struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \
+ \
+/* Finds the node with the same key as elm */ \
+static __inline struct type * \
+name##_SPLAY_FIND(struct name *head, struct type *elm) \
+{ \
+ if (SPLAY_EMPTY(head)) \
+ return(NULL); \
+ name##_SPLAY(head, elm); \
+ if ((cmp)(elm, (head)->sph_root) == 0) \
+ return (head->sph_root); \
+ return (NULL); \
+} \
+ \
+static __inline struct type * \
+name##_SPLAY_NEXT(struct name *head, struct type *elm) \
+{ \
+ name##_SPLAY(head, elm); \
+ if (SPLAY_RIGHT(elm, field) != NULL) { \
+ elm = SPLAY_RIGHT(elm, field); \
+ while (SPLAY_LEFT(elm, field) != NULL) { \
+ elm = SPLAY_LEFT(elm, field); \
+ } \
+ } else \
+ elm = NULL; \
+ return (elm); \
+} \
+ \
+static __inline struct type * \
+name##_SPLAY_MIN_MAX(struct name *head, int val) \
+{ \
+ name##_SPLAY_MINMAX(head, val); \
+ return (SPLAY_ROOT(head)); \
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp) \
+struct type * \
+name##_SPLAY_INSERT(struct name *head, struct type *elm) \
+{ \
+ if (SPLAY_EMPTY(head)) { \
+ SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \
+ } else { \
+ int __comp; \
+ name##_SPLAY(head, elm); \
+ __comp = (cmp)(elm, (head)->sph_root); \
+ if(__comp < 0) { \
+ SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+ SPLAY_RIGHT(elm, field) = (head)->sph_root; \
+ SPLAY_LEFT((head)->sph_root, field) = NULL; \
+ } else if (__comp > 0) { \
+ SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+ SPLAY_LEFT(elm, field) = (head)->sph_root; \
+ SPLAY_RIGHT((head)->sph_root, field) = NULL; \
+ } else \
+ return ((head)->sph_root); \
+ } \
+ (head)->sph_root = (elm); \
+ return (NULL); \
+} \
+ \
+struct type * \
+name##_SPLAY_REMOVE(struct name *head, struct type *elm) \
+{ \
+ struct type *__tmp; \
+ if (SPLAY_EMPTY(head)) \
+ return (NULL); \
+ name##_SPLAY(head, elm); \
+ if ((cmp)(elm, (head)->sph_root) == 0) { \
+ if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \
+ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+ } else { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+ name##_SPLAY(head, elm); \
+ SPLAY_RIGHT((head)->sph_root, field) = __tmp; \
+ } \
+ return (elm); \
+ } \
+ return (NULL); \
+} \
+ \
+void \
+name##_SPLAY(struct name *head, struct type *elm) \
+{ \
+ struct type __node, *__left, *__right, *__tmp; \
+ int __comp; \
+\
+ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+ __left = __right = &__node; \
+\
+ while ((__comp = (cmp)(elm, (head)->sph_root))) { \
+ if (__comp < 0) { \
+ __tmp = SPLAY_LEFT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if ((cmp)(elm, __tmp) < 0){ \
+ SPLAY_ROTATE_RIGHT(head, __tmp, field); \
+ if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKLEFT(head, __right, field); \
+ } else if (__comp > 0) { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if ((cmp)(elm, __tmp) > 0){ \
+ SPLAY_ROTATE_LEFT(head, __tmp, field); \
+ if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKRIGHT(head, __left, field); \
+ } \
+ } \
+ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \
+} \
+ \
+/* Splay with either the minimum or the maximum element \
+ * Used to find minimum or maximum element in tree. \
+ */ \
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{ \
+ struct type __node, *__left, *__right, *__tmp; \
+\
+ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+ __left = __right = &__node; \
+\
+ while (1) { \
+ if (__comp < 0) { \
+ __tmp = SPLAY_LEFT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if (__comp < 0){ \
+ SPLAY_ROTATE_RIGHT(head, __tmp, field); \
+ if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKLEFT(head, __right, field); \
+ } else if (__comp > 0) { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if (__comp > 0) { \
+ SPLAY_ROTATE_LEFT(head, __tmp, field); \
+ if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKRIGHT(head, __left, field); \
+ } \
+ } \
+ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \
+}
+
+#define SPLAY_NEGINF -1
+#define SPLAY_INF 1
+
+#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \
+ : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \
+ : name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head) \
+ for ((x) = SPLAY_MIN(name, head); \
+ (x) != NULL; \
+ (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-back tree */
+#define RB_HEAD(name, type) \
+struct name { \
+ struct type *rbh_root; /* root of the tree */ \
+}
+
+#define RB_INITIALIZER(root) \
+ { NULL }
+
+#define RB_INIT(root) do { \
+ (root)->rbh_root = NULL; \
+} while (0)
+
+#define RB_BLACK 0
+#define RB_RED 1
+#define RB_ENTRY(type) \
+struct { \
+ struct type *rbe_left; /* left element */ \
+ struct type *rbe_right; /* right element */ \
+ struct type *rbe_parent; /* parent element */ \
+ int rbe_color; /* node color */ \
+}
+
+#define RB_LEFT(elm, field) (elm)->field.rbe_left
+#define RB_RIGHT(elm, field) (elm)->field.rbe_right
+#define RB_PARENT(elm, field) (elm)->field.rbe_parent
+#define RB_COLOR(elm, field) (elm)->field.rbe_color
+#define RB_ROOT(head) (head)->rbh_root
+#define RB_EMPTY(head) (RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field) do { \
+ RB_PARENT(elm, field) = parent; \
+ RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \
+ RB_COLOR(elm, field) = RB_RED; \
+} while (0)
+
+#define RB_SET_BLACKRED(black, red, field) do { \
+ RB_COLOR(black, field) = RB_BLACK; \
+ RB_COLOR(red, field) = RB_RED; \
+} while (0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do { \
+ (tmp) = RB_RIGHT(elm, field); \
+ if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) { \
+ RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \
+ } \
+ RB_AUGMENT(elm); \
+ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \
+ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
+ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
+ else \
+ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
+ } else \
+ (head)->rbh_root = (tmp); \
+ RB_LEFT(tmp, field) = (elm); \
+ RB_PARENT(elm, field) = (tmp); \
+ RB_AUGMENT(tmp); \
+ if ((RB_PARENT(tmp, field))) \
+ RB_AUGMENT(RB_PARENT(tmp, field)); \
+} while (0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \
+ (tmp) = RB_LEFT(elm, field); \
+ if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) { \
+ RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \
+ } \
+ RB_AUGMENT(elm); \
+ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \
+ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
+ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
+ else \
+ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
+ } else \
+ (head)->rbh_root = (tmp); \
+ RB_RIGHT(tmp, field) = (elm); \
+ RB_PARENT(elm, field) = (tmp); \
+ RB_AUGMENT(tmp); \
+ if ((RB_PARENT(tmp, field))) \
+ RB_AUGMENT(RB_PARENT(tmp, field)); \
+} while (0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp) \
+void name##_RB_INSERT_COLOR(struct name *, struct type *); \
+void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
+struct type *name##_RB_REMOVE(struct name *, struct type *); \
+struct type *name##_RB_INSERT(struct name *, struct type *); \
+struct type *name##_RB_FIND(struct name *, struct type *); \
+struct type *name##_RB_NEXT(struct type *); \
+struct type *name##_RB_MINMAX(struct name *, int); \
+ \
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE(name, type, field, cmp) \
+void \
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \
+{ \
+ struct type *parent, *gparent, *tmp; \
+ while ((parent = RB_PARENT(elm, field)) && \
+ RB_COLOR(parent, field) == RB_RED) { \
+ gparent = RB_PARENT(parent, field); \
+ if (parent == RB_LEFT(gparent, field)) { \
+ tmp = RB_RIGHT(gparent, field); \
+ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
+ RB_COLOR(tmp, field) = RB_BLACK; \
+ RB_SET_BLACKRED(parent, gparent, field);\
+ elm = gparent; \
+ continue; \
+ } \
+ if (RB_RIGHT(parent, field) == elm) { \
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ tmp = parent; \
+ parent = elm; \
+ elm = tmp; \
+ } \
+ RB_SET_BLACKRED(parent, gparent, field); \
+ RB_ROTATE_RIGHT(head, gparent, tmp, field); \
+ } else { \
+ tmp = RB_LEFT(gparent, field); \
+ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
+ RB_COLOR(tmp, field) = RB_BLACK; \
+ RB_SET_BLACKRED(parent, gparent, field);\
+ elm = gparent; \
+ continue; \
+ } \
+ if (RB_LEFT(parent, field) == elm) { \
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ tmp = parent; \
+ parent = elm; \
+ elm = tmp; \
+ } \
+ RB_SET_BLACKRED(parent, gparent, field); \
+ RB_ROTATE_LEFT(head, gparent, tmp, field); \
+ } \
+ } \
+ RB_COLOR(head->rbh_root, field) = RB_BLACK; \
+} \
+ \
+void \
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{ \
+ struct type *tmp; \
+ while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && \
+ elm != RB_ROOT(head)) { \
+ if (RB_LEFT(parent, field) == elm) { \
+ tmp = RB_RIGHT(parent, field); \
+ if (RB_COLOR(tmp, field) == RB_RED) { \
+ RB_SET_BLACKRED(tmp, parent, field); \
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ tmp = RB_RIGHT(parent, field); \
+ } \
+ if ((RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+ (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+ RB_COLOR(tmp, field) = RB_RED; \
+ elm = parent; \
+ parent = RB_PARENT(elm, field); \
+ } else { \
+ if (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
+ struct type *oleft; \
+ if ((oleft = RB_LEFT(tmp, field)))\
+ RB_COLOR(oleft, field) = RB_BLACK;\
+ RB_COLOR(tmp, field) = RB_RED; \
+ RB_ROTATE_RIGHT(head, tmp, oleft, field);\
+ tmp = RB_RIGHT(parent, field); \
+ } \
+ RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+ RB_COLOR(parent, field) = RB_BLACK; \
+ if (RB_RIGHT(tmp, field)) \
+ RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ elm = RB_ROOT(head); \
+ break; \
+ } \
+ } else { \
+ tmp = RB_LEFT(parent, field); \
+ if (RB_COLOR(tmp, field) == RB_RED) { \
+ RB_SET_BLACKRED(tmp, parent, field); \
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ tmp = RB_LEFT(parent, field); \
+ } \
+ if ((RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+ (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+ RB_COLOR(tmp, field) = RB_RED; \
+ elm = parent; \
+ parent = RB_PARENT(elm, field); \
+ } else { \
+ if (RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
+ struct type *oright; \
+ if ((oright = RB_RIGHT(tmp, field)))\
+ RB_COLOR(oright, field) = RB_BLACK;\
+ RB_COLOR(tmp, field) = RB_RED; \
+ RB_ROTATE_LEFT(head, tmp, oright, field);\
+ tmp = RB_LEFT(parent, field); \
+ } \
+ RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+ RB_COLOR(parent, field) = RB_BLACK; \
+ if (RB_LEFT(tmp, field)) \
+ RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ elm = RB_ROOT(head); \
+ break; \
+ } \
+ } \
+ } \
+ if (elm) \
+ RB_COLOR(elm, field) = RB_BLACK; \
+} \
+ \
+struct type * \
+name##_RB_REMOVE(struct name *head, struct type *elm) \
+{ \
+ struct type *child, *parent, *old = elm; \
+ int color; \
+ if (RB_LEFT(elm, field) == NULL) \
+ child = RB_RIGHT(elm, field); \
+ else if (RB_RIGHT(elm, field) == NULL) \
+ child = RB_LEFT(elm, field); \
+ else { \
+ struct type *left; \
+ elm = RB_RIGHT(elm, field); \
+ while ((left = RB_LEFT(elm, field))) \
+ elm = left; \
+ child = RB_RIGHT(elm, field); \
+ parent = RB_PARENT(elm, field); \
+ color = RB_COLOR(elm, field); \
+ if (child) \
+ RB_PARENT(child, field) = parent; \
+ if (parent) { \
+ if (RB_LEFT(parent, field) == elm) \
+ RB_LEFT(parent, field) = child; \
+ else \
+ RB_RIGHT(parent, field) = child; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = child; \
+ if (RB_PARENT(elm, field) == old) \
+ parent = elm; \
+ (elm)->field = (old)->field; \
+ if (RB_PARENT(old, field)) { \
+ if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+ RB_LEFT(RB_PARENT(old, field), field) = elm;\
+ else \
+ RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+ RB_AUGMENT(RB_PARENT(old, field)); \
+ } else \
+ RB_ROOT(head) = elm; \
+ RB_PARENT(RB_LEFT(old, field), field) = elm; \
+ if (RB_RIGHT(old, field)) \
+ RB_PARENT(RB_RIGHT(old, field), field) = elm; \
+ if (parent) { \
+ left = parent; \
+ do { \
+ RB_AUGMENT(left); \
+ } while ((left = RB_PARENT(left, field))); \
+ } \
+ goto color; \
+ } \
+ parent = RB_PARENT(elm, field); \
+ color = RB_COLOR(elm, field); \
+ if (child) \
+ RB_PARENT(child, field) = parent; \
+ if (parent) { \
+ if (RB_LEFT(parent, field) == elm) \
+ RB_LEFT(parent, field) = child; \
+ else \
+ RB_RIGHT(parent, field) = child; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = child; \
+color: \
+ if (color == RB_BLACK) \
+ name##_RB_REMOVE_COLOR(head, parent, child); \
+ return (old); \
+} \
+ \
+/* Inserts a node into the RB tree */ \
+struct type * \
+name##_RB_INSERT(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp; \
+ struct type *parent = NULL; \
+ int comp = 0; \
+ tmp = RB_ROOT(head); \
+ while (tmp) { \
+ parent = tmp; \
+ comp = (cmp)(elm, parent); \
+ if (comp < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ RB_SET(elm, parent, field); \
+ if (parent != NULL) { \
+ if (comp < 0) \
+ RB_LEFT(parent, field) = elm; \
+ else \
+ RB_RIGHT(parent, field) = elm; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = elm; \
+ name##_RB_INSERT_COLOR(head, elm); \
+ return (NULL); \
+} \
+ \
+/* Finds the node with the same key as elm */ \
+struct type * \
+name##_RB_FIND(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ int comp; \
+ while (tmp) { \
+ comp = cmp(elm, tmp); \
+ if (comp < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ return (NULL); \
+} \
+ \
+struct type * \
+name##_RB_NEXT(struct type *elm) \
+{ \
+ if (RB_RIGHT(elm, field)) { \
+ elm = RB_RIGHT(elm, field); \
+ while (RB_LEFT(elm, field)) \
+ elm = RB_LEFT(elm, field); \
+ } else { \
+ if (RB_PARENT(elm, field) && \
+ (elm == RB_LEFT(RB_PARENT(elm, field), field))) \
+ elm = RB_PARENT(elm, field); \
+ else { \
+ while (RB_PARENT(elm, field) && \
+ (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+ elm = RB_PARENT(elm, field); \
+ elm = RB_PARENT(elm, field); \
+ } \
+ } \
+ return (elm); \
+} \
+ \
+struct type * \
+name##_RB_MINMAX(struct name *head, int val) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ struct type *parent = NULL; \
+ while (tmp) { \
+ parent = tmp; \
+ if (val < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else \
+ tmp = RB_RIGHT(tmp, field); \
+ } \
+ return (parent); \
+}
+
+#define RB_NEGINF -1
+#define RB_INF 1
+
+#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y) name##_RB_FIND(x, y)
+#define RB_NEXT(name, x, y) name##_RB_NEXT(y)
+#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head) \
+ for ((x) = RB_MIN(name, head); \
+ (x) != NULL; \
+ (x) = name##_RB_NEXT(x))
+
+#endif /* _SYS_TREE_H_ */
+/* $OpenBSD: tree.h,v 1.7 2002/10/17 21:51:54 art Exp $ */
+/*
+ * Copyright 2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SYS_TREE_H_
+#define _SYS_TREE_H_
+
+/*
+ * This file defines data structures for different types of trees:
+ * splay trees and red-black trees.
+ *
+ * A splay tree is a self-organizing data structure. Every operation
+ * on the tree causes a splay to happen. The splay moves the requested
+ * node to the root of the tree and partly rebalances it.
+ *
+ * This has the benefit that request locality causes faster lookups as
+ * the requested nodes move to the top of the tree. On the other hand,
+ * every lookup causes memory writes.
+ *
+ * The Balance Theorem bounds the total access time for m operations
+ * and n inserts on an initially empty tree as O((m + n)lg n). The
+ * amortized cost for a sequence of m accesses to a splay tree is O(lg n);
+ *
+ * A red-black tree is a binary search tree with the node color as an
+ * extra attribute. It fulfills a set of conditions:
+ * - every search path from the root to a leaf consists of the
+ * same number of black nodes,
+ * - each red node (except for the root) has a black parent,
+ * - each leaf node is black.
+ *
+ * Every operation on a red-black tree is bounded as O(lg n).
+ * The maximum height of a red-black tree is 2lg (n+1).
+ */
+
+#define SPLAY_HEAD(name, type) \
+struct name { \
+ struct type *sph_root; /* root of the tree */ \
+}
+
+#define SPLAY_INITIALIZER(root) \
+ { NULL }
+
+#define SPLAY_INIT(root) do { \
+ (root)->sph_root = NULL; \
+} while (0)
+
+#define SPLAY_ENTRY(type) \
+struct { \
+ struct type *spe_left; /* left element */ \
+ struct type *spe_right; /* right element */ \
+}
+
+#define SPLAY_LEFT(elm, field) (elm)->field.spe_left
+#define SPLAY_RIGHT(elm, field) (elm)->field.spe_right
+#define SPLAY_ROOT(head) (head)->sph_root
+#define SPLAY_EMPTY(head) (SPLAY_ROOT(head) == NULL)
+
+/* SPLAY_ROTATE_{LEFT,RIGHT} expect that tmp hold SPLAY_{RIGHT,LEFT} */
+#define SPLAY_ROTATE_RIGHT(head, tmp, field) do { \
+ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(tmp, field); \
+ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \
+ (head)->sph_root = tmp; \
+} while (0)
+
+#define SPLAY_ROTATE_LEFT(head, tmp, field) do { \
+ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(tmp, field); \
+ SPLAY_LEFT(tmp, field) = (head)->sph_root; \
+ (head)->sph_root = tmp; \
+} while (0)
+
+#define SPLAY_LINKLEFT(head, tmp, field) do { \
+ SPLAY_LEFT(tmp, field) = (head)->sph_root; \
+ tmp = (head)->sph_root; \
+ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field); \
+} while (0)
+
+#define SPLAY_LINKRIGHT(head, tmp, field) do { \
+ SPLAY_RIGHT(tmp, field) = (head)->sph_root; \
+ tmp = (head)->sph_root; \
+ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field); \
+} while (0)
+
+#define SPLAY_ASSEMBLE(head, node, left, right, field) do { \
+ SPLAY_RIGHT(left, field) = SPLAY_LEFT((head)->sph_root, field); \
+ SPLAY_LEFT(right, field) = SPLAY_RIGHT((head)->sph_root, field);\
+ SPLAY_LEFT((head)->sph_root, field) = SPLAY_RIGHT(node, field); \
+ SPLAY_RIGHT((head)->sph_root, field) = SPLAY_LEFT(node, field); \
+} while (0)
+
+/* Generates prototypes and inline functions */
+
+#define SPLAY_PROTOTYPE(name, type, field, cmp) \
+void name##_SPLAY(struct name *, struct type *); \
+void name##_SPLAY_MINMAX(struct name *, int); \
+struct type *name##_SPLAY_INSERT(struct name *, struct type *); \
+struct type *name##_SPLAY_REMOVE(struct name *, struct type *); \
+ \
+/* Finds the node with the same key as elm */ \
+static __inline struct type * \
+name##_SPLAY_FIND(struct name *head, struct type *elm) \
+{ \
+ if (SPLAY_EMPTY(head)) \
+ return(NULL); \
+ name##_SPLAY(head, elm); \
+ if ((cmp)(elm, (head)->sph_root) == 0) \
+ return (head->sph_root); \
+ return (NULL); \
+} \
+ \
+static __inline struct type * \
+name##_SPLAY_NEXT(struct name *head, struct type *elm) \
+{ \
+ name##_SPLAY(head, elm); \
+ if (SPLAY_RIGHT(elm, field) != NULL) { \
+ elm = SPLAY_RIGHT(elm, field); \
+ while (SPLAY_LEFT(elm, field) != NULL) { \
+ elm = SPLAY_LEFT(elm, field); \
+ } \
+ } else \
+ elm = NULL; \
+ return (elm); \
+} \
+ \
+static __inline struct type * \
+name##_SPLAY_MIN_MAX(struct name *head, int val) \
+{ \
+ name##_SPLAY_MINMAX(head, val); \
+ return (SPLAY_ROOT(head)); \
+}
+
+/* Main splay operation.
+ * Moves node close to the key of elm to top
+ */
+#define SPLAY_GENERATE(name, type, field, cmp) \
+struct type * \
+name##_SPLAY_INSERT(struct name *head, struct type *elm) \
+{ \
+ if (SPLAY_EMPTY(head)) { \
+ SPLAY_LEFT(elm, field) = SPLAY_RIGHT(elm, field) = NULL; \
+ } else { \
+ int __comp; \
+ name##_SPLAY(head, elm); \
+ __comp = (cmp)(elm, (head)->sph_root); \
+ if(__comp < 0) { \
+ SPLAY_LEFT(elm, field) = SPLAY_LEFT((head)->sph_root, field);\
+ SPLAY_RIGHT(elm, field) = (head)->sph_root; \
+ SPLAY_LEFT((head)->sph_root, field) = NULL; \
+ } else if (__comp > 0) { \
+ SPLAY_RIGHT(elm, field) = SPLAY_RIGHT((head)->sph_root, field);\
+ SPLAY_LEFT(elm, field) = (head)->sph_root; \
+ SPLAY_RIGHT((head)->sph_root, field) = NULL; \
+ } else \
+ return ((head)->sph_root); \
+ } \
+ (head)->sph_root = (elm); \
+ return (NULL); \
+} \
+ \
+struct type * \
+name##_SPLAY_REMOVE(struct name *head, struct type *elm) \
+{ \
+ struct type *__tmp; \
+ if (SPLAY_EMPTY(head)) \
+ return (NULL); \
+ name##_SPLAY(head, elm); \
+ if ((cmp)(elm, (head)->sph_root) == 0) { \
+ if (SPLAY_LEFT((head)->sph_root, field) == NULL) { \
+ (head)->sph_root = SPLAY_RIGHT((head)->sph_root, field);\
+ } else { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ (head)->sph_root = SPLAY_LEFT((head)->sph_root, field);\
+ name##_SPLAY(head, elm); \
+ SPLAY_RIGHT((head)->sph_root, field) = __tmp; \
+ } \
+ return (elm); \
+ } \
+ return (NULL); \
+} \
+ \
+void \
+name##_SPLAY(struct name *head, struct type *elm) \
+{ \
+ struct type __node, *__left, *__right, *__tmp; \
+ int __comp; \
+\
+ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+ __left = __right = &__node; \
+\
+ while ((__comp = (cmp)(elm, (head)->sph_root))) { \
+ if (__comp < 0) { \
+ __tmp = SPLAY_LEFT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if ((cmp)(elm, __tmp) < 0){ \
+ SPLAY_ROTATE_RIGHT(head, __tmp, field); \
+ if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKLEFT(head, __right, field); \
+ } else if (__comp > 0) { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if ((cmp)(elm, __tmp) > 0){ \
+ SPLAY_ROTATE_LEFT(head, __tmp, field); \
+ if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKRIGHT(head, __left, field); \
+ } \
+ } \
+ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \
+} \
+ \
+/* Splay with either the minimum or the maximum element \
+ * Used to find minimum or maximum element in tree. \
+ */ \
+void name##_SPLAY_MINMAX(struct name *head, int __comp) \
+{ \
+ struct type __node, *__left, *__right, *__tmp; \
+\
+ SPLAY_LEFT(&__node, field) = SPLAY_RIGHT(&__node, field) = NULL;\
+ __left = __right = &__node; \
+\
+ while (1) { \
+ if (__comp < 0) { \
+ __tmp = SPLAY_LEFT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if (__comp < 0){ \
+ SPLAY_ROTATE_RIGHT(head, __tmp, field); \
+ if (SPLAY_LEFT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKLEFT(head, __right, field); \
+ } else if (__comp > 0) { \
+ __tmp = SPLAY_RIGHT((head)->sph_root, field); \
+ if (__tmp == NULL) \
+ break; \
+ if (__comp > 0) { \
+ SPLAY_ROTATE_LEFT(head, __tmp, field); \
+ if (SPLAY_RIGHT((head)->sph_root, field) == NULL)\
+ break; \
+ } \
+ SPLAY_LINKRIGHT(head, __left, field); \
+ } \
+ } \
+ SPLAY_ASSEMBLE(head, &__node, __left, __right, field); \
+}
+
+#define SPLAY_NEGINF -1
+#define SPLAY_INF 1
+
+#define SPLAY_INSERT(name, x, y) name##_SPLAY_INSERT(x, y)
+#define SPLAY_REMOVE(name, x, y) name##_SPLAY_REMOVE(x, y)
+#define SPLAY_FIND(name, x, y) name##_SPLAY_FIND(x, y)
+#define SPLAY_NEXT(name, x, y) name##_SPLAY_NEXT(x, y)
+#define SPLAY_MIN(name, x) (SPLAY_EMPTY(x) ? NULL \
+ : name##_SPLAY_MIN_MAX(x, SPLAY_NEGINF))
+#define SPLAY_MAX(name, x) (SPLAY_EMPTY(x) ? NULL \
+ : name##_SPLAY_MIN_MAX(x, SPLAY_INF))
+
+#define SPLAY_FOREACH(x, name, head) \
+ for ((x) = SPLAY_MIN(name, head); \
+ (x) != NULL; \
+ (x) = SPLAY_NEXT(name, head, x))
+
+/* Macros that define a red-back tree */
+#define RB_HEAD(name, type) \
+struct name { \
+ struct type *rbh_root; /* root of the tree */ \
+}
+
+#define RB_INITIALIZER(root) \
+ { NULL }
+
+#define RB_INIT(root) do { \
+ (root)->rbh_root = NULL; \
+} while (0)
+
+#define RB_BLACK 0
+#define RB_RED 1
+#define RB_ENTRY(type) \
+struct { \
+ struct type *rbe_left; /* left element */ \
+ struct type *rbe_right; /* right element */ \
+ struct type *rbe_parent; /* parent element */ \
+ int rbe_color; /* node color */ \
+}
+
+#define RB_LEFT(elm, field) (elm)->field.rbe_left
+#define RB_RIGHT(elm, field) (elm)->field.rbe_right
+#define RB_PARENT(elm, field) (elm)->field.rbe_parent
+#define RB_COLOR(elm, field) (elm)->field.rbe_color
+#define RB_ROOT(head) (head)->rbh_root
+#define RB_EMPTY(head) (RB_ROOT(head) == NULL)
+
+#define RB_SET(elm, parent, field) do { \
+ RB_PARENT(elm, field) = parent; \
+ RB_LEFT(elm, field) = RB_RIGHT(elm, field) = NULL; \
+ RB_COLOR(elm, field) = RB_RED; \
+} while (0)
+
+#define RB_SET_BLACKRED(black, red, field) do { \
+ RB_COLOR(black, field) = RB_BLACK; \
+ RB_COLOR(red, field) = RB_RED; \
+} while (0)
+
+#ifndef RB_AUGMENT
+#define RB_AUGMENT(x)
+#endif
+
+#define RB_ROTATE_LEFT(head, elm, tmp, field) do { \
+ (tmp) = RB_RIGHT(elm, field); \
+ if ((RB_RIGHT(elm, field) = RB_LEFT(tmp, field))) { \
+ RB_PARENT(RB_LEFT(tmp, field), field) = (elm); \
+ } \
+ RB_AUGMENT(elm); \
+ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \
+ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
+ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
+ else \
+ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
+ } else \
+ (head)->rbh_root = (tmp); \
+ RB_LEFT(tmp, field) = (elm); \
+ RB_PARENT(elm, field) = (tmp); \
+ RB_AUGMENT(tmp); \
+ if ((RB_PARENT(tmp, field))) \
+ RB_AUGMENT(RB_PARENT(tmp, field)); \
+} while (0)
+
+#define RB_ROTATE_RIGHT(head, elm, tmp, field) do { \
+ (tmp) = RB_LEFT(elm, field); \
+ if ((RB_LEFT(elm, field) = RB_RIGHT(tmp, field))) { \
+ RB_PARENT(RB_RIGHT(tmp, field), field) = (elm); \
+ } \
+ RB_AUGMENT(elm); \
+ if ((RB_PARENT(tmp, field) = RB_PARENT(elm, field))) { \
+ if ((elm) == RB_LEFT(RB_PARENT(elm, field), field)) \
+ RB_LEFT(RB_PARENT(elm, field), field) = (tmp); \
+ else \
+ RB_RIGHT(RB_PARENT(elm, field), field) = (tmp); \
+ } else \
+ (head)->rbh_root = (tmp); \
+ RB_RIGHT(tmp, field) = (elm); \
+ RB_PARENT(elm, field) = (tmp); \
+ RB_AUGMENT(tmp); \
+ if ((RB_PARENT(tmp, field))) \
+ RB_AUGMENT(RB_PARENT(tmp, field)); \
+} while (0)
+
+/* Generates prototypes and inline functions */
+#define RB_PROTOTYPE(name, type, field, cmp) \
+void name##_RB_INSERT_COLOR(struct name *, struct type *); \
+void name##_RB_REMOVE_COLOR(struct name *, struct type *, struct type *);\
+struct type *name##_RB_REMOVE(struct name *, struct type *); \
+struct type *name##_RB_INSERT(struct name *, struct type *); \
+struct type *name##_RB_FIND(struct name *, struct type *); \
+struct type *name##_RB_NEXT(struct type *); \
+struct type *name##_RB_MINMAX(struct name *, int); \
+ \
+
+/* Main rb operation.
+ * Moves node close to the key of elm to top
+ */
+#define RB_GENERATE(name, type, field, cmp) \
+void \
+name##_RB_INSERT_COLOR(struct name *head, struct type *elm) \
+{ \
+ struct type *parent, *gparent, *tmp; \
+ while ((parent = RB_PARENT(elm, field)) && \
+ RB_COLOR(parent, field) == RB_RED) { \
+ gparent = RB_PARENT(parent, field); \
+ if (parent == RB_LEFT(gparent, field)) { \
+ tmp = RB_RIGHT(gparent, field); \
+ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
+ RB_COLOR(tmp, field) = RB_BLACK; \
+ RB_SET_BLACKRED(parent, gparent, field);\
+ elm = gparent; \
+ continue; \
+ } \
+ if (RB_RIGHT(parent, field) == elm) { \
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ tmp = parent; \
+ parent = elm; \
+ elm = tmp; \
+ } \
+ RB_SET_BLACKRED(parent, gparent, field); \
+ RB_ROTATE_RIGHT(head, gparent, tmp, field); \
+ } else { \
+ tmp = RB_LEFT(gparent, field); \
+ if (tmp && RB_COLOR(tmp, field) == RB_RED) { \
+ RB_COLOR(tmp, field) = RB_BLACK; \
+ RB_SET_BLACKRED(parent, gparent, field);\
+ elm = gparent; \
+ continue; \
+ } \
+ if (RB_LEFT(parent, field) == elm) { \
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ tmp = parent; \
+ parent = elm; \
+ elm = tmp; \
+ } \
+ RB_SET_BLACKRED(parent, gparent, field); \
+ RB_ROTATE_LEFT(head, gparent, tmp, field); \
+ } \
+ } \
+ RB_COLOR(head->rbh_root, field) = RB_BLACK; \
+} \
+ \
+void \
+name##_RB_REMOVE_COLOR(struct name *head, struct type *parent, struct type *elm) \
+{ \
+ struct type *tmp; \
+ while ((elm == NULL || RB_COLOR(elm, field) == RB_BLACK) && \
+ elm != RB_ROOT(head)) { \
+ if (RB_LEFT(parent, field) == elm) { \
+ tmp = RB_RIGHT(parent, field); \
+ if (RB_COLOR(tmp, field) == RB_RED) { \
+ RB_SET_BLACKRED(tmp, parent, field); \
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ tmp = RB_RIGHT(parent, field); \
+ } \
+ if ((RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+ (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+ RB_COLOR(tmp, field) = RB_RED; \
+ elm = parent; \
+ parent = RB_PARENT(elm, field); \
+ } else { \
+ if (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK) {\
+ struct type *oleft; \
+ if ((oleft = RB_LEFT(tmp, field)))\
+ RB_COLOR(oleft, field) = RB_BLACK;\
+ RB_COLOR(tmp, field) = RB_RED; \
+ RB_ROTATE_RIGHT(head, tmp, oleft, field);\
+ tmp = RB_RIGHT(parent, field); \
+ } \
+ RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+ RB_COLOR(parent, field) = RB_BLACK; \
+ if (RB_RIGHT(tmp, field)) \
+ RB_COLOR(RB_RIGHT(tmp, field), field) = RB_BLACK;\
+ RB_ROTATE_LEFT(head, parent, tmp, field);\
+ elm = RB_ROOT(head); \
+ break; \
+ } \
+ } else { \
+ tmp = RB_LEFT(parent, field); \
+ if (RB_COLOR(tmp, field) == RB_RED) { \
+ RB_SET_BLACKRED(tmp, parent, field); \
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ tmp = RB_LEFT(parent, field); \
+ } \
+ if ((RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) &&\
+ (RB_RIGHT(tmp, field) == NULL || \
+ RB_COLOR(RB_RIGHT(tmp, field), field) == RB_BLACK)) {\
+ RB_COLOR(tmp, field) = RB_RED; \
+ elm = parent; \
+ parent = RB_PARENT(elm, field); \
+ } else { \
+ if (RB_LEFT(tmp, field) == NULL || \
+ RB_COLOR(RB_LEFT(tmp, field), field) == RB_BLACK) {\
+ struct type *oright; \
+ if ((oright = RB_RIGHT(tmp, field)))\
+ RB_COLOR(oright, field) = RB_BLACK;\
+ RB_COLOR(tmp, field) = RB_RED; \
+ RB_ROTATE_LEFT(head, tmp, oright, field);\
+ tmp = RB_LEFT(parent, field); \
+ } \
+ RB_COLOR(tmp, field) = RB_COLOR(parent, field);\
+ RB_COLOR(parent, field) = RB_BLACK; \
+ if (RB_LEFT(tmp, field)) \
+ RB_COLOR(RB_LEFT(tmp, field), field) = RB_BLACK;\
+ RB_ROTATE_RIGHT(head, parent, tmp, field);\
+ elm = RB_ROOT(head); \
+ break; \
+ } \
+ } \
+ } \
+ if (elm) \
+ RB_COLOR(elm, field) = RB_BLACK; \
+} \
+ \
+struct type * \
+name##_RB_REMOVE(struct name *head, struct type *elm) \
+{ \
+ struct type *child, *parent, *old = elm; \
+ int color; \
+ if (RB_LEFT(elm, field) == NULL) \
+ child = RB_RIGHT(elm, field); \
+ else if (RB_RIGHT(elm, field) == NULL) \
+ child = RB_LEFT(elm, field); \
+ else { \
+ struct type *left; \
+ elm = RB_RIGHT(elm, field); \
+ while ((left = RB_LEFT(elm, field))) \
+ elm = left; \
+ child = RB_RIGHT(elm, field); \
+ parent = RB_PARENT(elm, field); \
+ color = RB_COLOR(elm, field); \
+ if (child) \
+ RB_PARENT(child, field) = parent; \
+ if (parent) { \
+ if (RB_LEFT(parent, field) == elm) \
+ RB_LEFT(parent, field) = child; \
+ else \
+ RB_RIGHT(parent, field) = child; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = child; \
+ if (RB_PARENT(elm, field) == old) \
+ parent = elm; \
+ (elm)->field = (old)->field; \
+ if (RB_PARENT(old, field)) { \
+ if (RB_LEFT(RB_PARENT(old, field), field) == old)\
+ RB_LEFT(RB_PARENT(old, field), field) = elm;\
+ else \
+ RB_RIGHT(RB_PARENT(old, field), field) = elm;\
+ RB_AUGMENT(RB_PARENT(old, field)); \
+ } else \
+ RB_ROOT(head) = elm; \
+ RB_PARENT(RB_LEFT(old, field), field) = elm; \
+ if (RB_RIGHT(old, field)) \
+ RB_PARENT(RB_RIGHT(old, field), field) = elm; \
+ if (parent) { \
+ left = parent; \
+ do { \
+ RB_AUGMENT(left); \
+ } while ((left = RB_PARENT(left, field))); \
+ } \
+ goto color; \
+ } \
+ parent = RB_PARENT(elm, field); \
+ color = RB_COLOR(elm, field); \
+ if (child) \
+ RB_PARENT(child, field) = parent; \
+ if (parent) { \
+ if (RB_LEFT(parent, field) == elm) \
+ RB_LEFT(parent, field) = child; \
+ else \
+ RB_RIGHT(parent, field) = child; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = child; \
+color: \
+ if (color == RB_BLACK) \
+ name##_RB_REMOVE_COLOR(head, parent, child); \
+ return (old); \
+} \
+ \
+/* Inserts a node into the RB tree */ \
+struct type * \
+name##_RB_INSERT(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp; \
+ struct type *parent = NULL; \
+ int comp = 0; \
+ tmp = RB_ROOT(head); \
+ while (tmp) { \
+ parent = tmp; \
+ comp = (cmp)(elm, parent); \
+ if (comp < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ RB_SET(elm, parent, field); \
+ if (parent != NULL) { \
+ if (comp < 0) \
+ RB_LEFT(parent, field) = elm; \
+ else \
+ RB_RIGHT(parent, field) = elm; \
+ RB_AUGMENT(parent); \
+ } else \
+ RB_ROOT(head) = elm; \
+ name##_RB_INSERT_COLOR(head, elm); \
+ return (NULL); \
+} \
+ \
+/* Finds the node with the same key as elm */ \
+struct type * \
+name##_RB_FIND(struct name *head, struct type *elm) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ int comp; \
+ while (tmp) { \
+ comp = cmp(elm, tmp); \
+ if (comp < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else if (comp > 0) \
+ tmp = RB_RIGHT(tmp, field); \
+ else \
+ return (tmp); \
+ } \
+ return (NULL); \
+} \
+ \
+struct type * \
+name##_RB_NEXT(struct type *elm) \
+{ \
+ if (RB_RIGHT(elm, field)) { \
+ elm = RB_RIGHT(elm, field); \
+ while (RB_LEFT(elm, field)) \
+ elm = RB_LEFT(elm, field); \
+ } else { \
+ if (RB_PARENT(elm, field) && \
+ (elm == RB_LEFT(RB_PARENT(elm, field), field))) \
+ elm = RB_PARENT(elm, field); \
+ else { \
+ while (RB_PARENT(elm, field) && \
+ (elm == RB_RIGHT(RB_PARENT(elm, field), field)))\
+ elm = RB_PARENT(elm, field); \
+ elm = RB_PARENT(elm, field); \
+ } \
+ } \
+ return (elm); \
+} \
+ \
+struct type * \
+name##_RB_MINMAX(struct name *head, int val) \
+{ \
+ struct type *tmp = RB_ROOT(head); \
+ struct type *parent = NULL; \
+ while (tmp) { \
+ parent = tmp; \
+ if (val < 0) \
+ tmp = RB_LEFT(tmp, field); \
+ else \
+ tmp = RB_RIGHT(tmp, field); \
+ } \
+ return (parent); \
+}
+
+#define RB_NEGINF -1
+#define RB_INF 1
+
+#define RB_INSERT(name, x, y) name##_RB_INSERT(x, y)
+#define RB_REMOVE(name, x, y) name##_RB_REMOVE(x, y)
+#define RB_FIND(name, x, y) name##_RB_FIND(x, y)
+#define RB_NEXT(name, x, y) name##_RB_NEXT(y)
+#define RB_MIN(name, x) name##_RB_MINMAX(x, RB_NEGINF)
+#define RB_MAX(name, x) name##_RB_MINMAX(x, RB_INF)
+
+#define RB_FOREACH(x, name, head) \
+ for ((x) = RB_MIN(name, head); \
+ (x) != NULL; \
+ (x) = name##_RB_NEXT(x))
+
+#endif /* _SYS_TREE_H_ */
diff --git a/libevent/WIN32-Code/win32.c b/libevent/WIN32-Code/win32.c
new file mode 100644
index 00000000000..8a603b7eceb
--- /dev/null
+++ b/libevent/WIN32-Code/win32.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
+ * Copyright 2003 Michael A. Davis <mike@datanerds.net>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef _MSC_VER
+#include "./config.h"
+#else
+/* Avoid the windows/msvc thing. */
+#include "../config.h"
+#endif
+
+#include <winsock2.h>
+#include <windows.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#define RB_AUGMENT(x) (void)(x)
+#include "./tree.h"
+#include "log.h"
+#include "event.h"
+#include "event-internal.h"
+
+#define XFREE(ptr) do { if (ptr) free(ptr); } while(0)
+
+extern struct event_list timequeue;
+extern struct event_list addqueue;
+#if 0
+extern struct event_list signalqueue;
+#endif
+
+struct win_fd_set {
+ u_int fd_count;
+ SOCKET fd_array[1];
+};
+
+int evsigcaught[NSIG];
+volatile sig_atomic_t signal_caught = 0;
+/* MSDN says this is required to handle SIGFPE */
+volatile double SIGFPE_REQ = 0.0f;
+
+#if 0
+static void signal_handler(int sig);
+
+void signal_process(void);
+int signal_recalc(void);
+#endif
+
+struct event_entry {
+ RB_ENTRY(event_entry) node;
+ SOCKET sock;
+ int read_pos;
+ int write_pos;
+ struct event *read_event;
+ struct event *write_event;
+};
+
+static int
+compare(struct event_entry *a, struct event_entry *b)
+{
+ if (a->sock < b->sock)
+ return -1;
+ else if (a->sock > b->sock)
+ return 1;
+ else
+ return 0;
+}
+
+struct win32op {
+ int fd_setsz;
+ struct win_fd_set *readset_in;
+ struct win_fd_set *writeset_in;
+ struct win_fd_set *readset_out;
+ struct win_fd_set *writeset_out;
+ struct win_fd_set *exset_out;
+ RB_HEAD(event_map, event_entry) event_root;
+
+ unsigned signals_are_broken : 1;
+};
+
+RB_PROTOTYPE(event_map, event_entry, node, compare);
+RB_GENERATE(event_map, event_entry, node, compare);
+
+void *win32_init (struct event_base *);
+int win32_insert (void *, struct event *);
+int win32_del (void *, struct event *);
+int win32_dispatch (struct event_base *base, void *, struct timeval *);
+void win32_dealloc (struct event_base *, void *);
+
+struct eventop win32ops = {
+ "win32",
+ win32_init,
+ win32_insert,
+ win32_del,
+ win32_dispatch,
+ win32_dealloc,
+ 0
+};
+
+#define FD_SET_ALLOC_SIZE(n) ((sizeof(struct win_fd_set) + ((n)-1)*sizeof(SOCKET)))
+
+static int
+realloc_fd_sets(struct win32op *op, size_t new_size)
+{
+ size_t size;
+
+ assert(new_size >= op->readset_in->fd_count &&
+ new_size >= op->writeset_in->fd_count);
+ assert(new_size >= 1);
+
+ size = FD_SET_ALLOC_SIZE(new_size);
+ if (!(op->readset_in = realloc(op->readset_in, size)))
+ return (-1);
+ if (!(op->writeset_in = realloc(op->writeset_in, size)))
+ return (-1);
+ if (!(op->readset_out = realloc(op->readset_out, size)))
+ return (-1);
+ if (!(op->exset_out = realloc(op->exset_out, size)))
+ return (-1);
+ if (!(op->writeset_out = realloc(op->writeset_out, size)))
+ return (-1);
+ op->fd_setsz = new_size;
+ return (0);
+}
+
+static int
+timeval_to_ms(struct timeval *tv)
+{
+ return ((tv->tv_sec * 1000) + (tv->tv_usec / 1000));
+}
+
+static struct event_entry*
+get_event_entry(struct win32op *op, SOCKET s, int create)
+{
+ struct event_entry key, *val;
+ key.sock = s;
+ val = RB_FIND(event_map, &op->event_root, &key);
+ if (val || !create)
+ return val;
+ if (!(val = calloc(1, sizeof(struct event_entry)))) {
+ event_warn("%s: calloc", __func__);
+ return NULL;
+ }
+ val->sock = s;
+ val->read_pos = val->write_pos = -1;
+ RB_INSERT(event_map, &op->event_root, val);
+ return val;
+}
+
+static int
+do_fd_set(struct win32op *op, struct event_entry *ent, int read)
+{
+ SOCKET s = ent->sock;
+ struct win_fd_set *set = read ? op->readset_in : op->writeset_in;
+ if (read) {
+ if (ent->read_pos >= 0)
+ return (0);
+ } else {
+ if (ent->write_pos >= 0)
+ return (0);
+ }
+ if (set->fd_count == op->fd_setsz) {
+ if (realloc_fd_sets(op, op->fd_setsz*2))
+ return (-1);
+ /* set pointer will have changed and needs reiniting! */
+ set = read ? op->readset_in : op->writeset_in;
+ }
+ set->fd_array[set->fd_count] = s;
+ if (read)
+ ent->read_pos = set->fd_count;
+ else
+ ent->write_pos = set->fd_count;
+ return (set->fd_count++);
+}
+
+static int
+do_fd_clear(struct win32op *op, struct event_entry *ent, int read)
+{
+ int i;
+ struct win_fd_set *set = read ? op->readset_in : op->writeset_in;
+ if (read) {
+ i = ent->read_pos;
+ ent->read_pos = -1;
+ } else {
+ i = ent->write_pos;
+ ent->write_pos = -1;
+ }
+ if (i < 0)
+ return (0);
+ if (--set->fd_count != i) {
+ struct event_entry *ent2;
+ SOCKET s2;
+ s2 = set->fd_array[i] = set->fd_array[set->fd_count];
+ ent2 = get_event_entry(op, s2, 0);
+ if (!ent) /* This indicates a bug. */
+ return (0);
+ if (read)
+ ent2->read_pos = i;
+ else
+ ent2->write_pos = i;
+ }
+ return (0);
+}
+
+#define NEVENT 64
+void *
+win32_init(struct event_base *_base)
+{
+ struct win32op *winop;
+ size_t size;
+ if (!(winop = calloc(1, sizeof(struct win32op))))
+ return NULL;
+ winop->fd_setsz = NEVENT;
+ size = FD_SET_ALLOC_SIZE(NEVENT);
+ if (!(winop->readset_in = malloc(size)))
+ goto err;
+ if (!(winop->writeset_in = malloc(size)))
+ goto err;
+ if (!(winop->readset_out = malloc(size)))
+ goto err;
+ if (!(winop->writeset_out = malloc(size)))
+ goto err;
+ if (!(winop->exset_out = malloc(size)))
+ goto err;
+ RB_INIT(&winop->event_root);
+ winop->readset_in->fd_count = winop->writeset_in->fd_count = 0;
+ winop->readset_out->fd_count = winop->writeset_out->fd_count
+ = winop->exset_out->fd_count = 0;
+
+ if (evsignal_init(_base) < 0)
+ winop->signals_are_broken = 1;
+
+ return (winop);
+ err:
+ XFREE(winop->readset_in);
+ XFREE(winop->writeset_in);
+ XFREE(winop->readset_out);
+ XFREE(winop->writeset_out);
+ XFREE(winop->exset_out);
+ XFREE(winop);
+ return (NULL);
+}
+
+int
+win32_insert(void *op, struct event *ev)
+{
+ struct win32op *win32op = op;
+ struct event_entry *ent;
+
+ if (ev->ev_events & EV_SIGNAL) {
+ if (win32op->signals_are_broken)
+ return (-1);
+ return (evsignal_add(ev));
+ }
+ if (!(ev->ev_events & (EV_READ|EV_WRITE)))
+ return (0);
+ ent = get_event_entry(win32op, ev->ev_fd, 1);
+ if (!ent)
+ return (-1); /* out of memory */
+
+ event_debug(("%s: adding event for %d", __func__, (int)ev->ev_fd));
+ if (ev->ev_events & EV_READ) {
+ if (do_fd_set(win32op, ent, 1)<0)
+ return (-1);
+ ent->read_event = ev;
+ }
+ if (ev->ev_events & EV_WRITE) {
+ if (do_fd_set(win32op, ent, 0)<0)
+ return (-1);
+ ent->write_event = ev;
+ }
+ return (0);
+}
+
+int
+win32_del(void *op, struct event *ev)
+{
+ struct win32op *win32op = op;
+ struct event_entry *ent;
+
+ if (ev->ev_events & EV_SIGNAL)
+ return (evsignal_del(ev));
+
+ if (!(ent = get_event_entry(win32op, ev->ev_fd, 0)))
+ return (-1);
+ event_debug(("%s: Removing event for %d", __func__, ev->ev_fd));
+ if (ev == ent->read_event) {
+ do_fd_clear(win32op, ent, 1);
+ ent->read_event = NULL;
+ }
+ if (ev == ent->write_event) {
+ do_fd_clear(win32op, ent, 0);
+ ent->write_event = NULL;
+ }
+ if (!ent->read_event && !ent->write_event) {
+ RB_REMOVE(event_map, &win32op->event_root, ent);
+ free(ent);
+ }
+
+ return 0;
+}
+
+static void
+fd_set_copy(struct win_fd_set *out, const struct win_fd_set *in)
+{
+ out->fd_count = in->fd_count;
+ memcpy(out->fd_array, in->fd_array, in->fd_count * (sizeof(SOCKET)));
+}
+
+/*
+ static void dump_fd_set(struct win_fd_set *s)
+ {
+ unsigned int i;
+ printf("[ ");
+ for(i=0;i<s->fd_count;++i)
+ printf("%d ",(int)s->fd_array[i]);
+ printf("]\n");
+ }
+*/
+
+int
+win32_dispatch(struct event_base *base, void *op,
+ struct timeval *tv)
+{
+ struct win32op *win32op = op;
+ int res = 0;
+ unsigned j, i;
+ int fd_count;
+ SOCKET s;
+ struct event_entry *ent;
+
+ fd_set_copy(win32op->readset_out, win32op->readset_in);
+ fd_set_copy(win32op->exset_out, win32op->readset_in);
+ fd_set_copy(win32op->writeset_out, win32op->writeset_in);
+
+ fd_count =
+ (win32op->readset_out->fd_count > win32op->writeset_out->fd_count) ?
+ win32op->readset_out->fd_count : win32op->writeset_out->fd_count;
+
+ if (!fd_count) {
+ /* Windows doesn't like you to call select() with no sockets */
+ Sleep(timeval_to_ms(tv));
+ evsignal_process(base);
+ return (0);
+ }
+
+ res = select(fd_count,
+ (struct fd_set*)win32op->readset_out,
+ (struct fd_set*)win32op->writeset_out,
+ (struct fd_set*)win32op->exset_out, tv);
+
+ event_debug(("%s: select returned %d", __func__, res));
+
+ if(res <= 0) {
+ evsignal_process(base);
+ return res;
+ } else if (base->sig.evsignal_caught) {
+ evsignal_process(base);
+ }
+
+ if (win32op->readset_out->fd_count) {
+ i = rand() % win32op->readset_out->fd_count;
+ for (j=0; j<win32op->readset_out->fd_count; ++j) {
+ if (++i >= win32op->readset_out->fd_count)
+ i = 0;
+ s = win32op->readset_out->fd_array[i];
+ if ((ent = get_event_entry(win32op, s, 0)) && ent->read_event)
+ event_active(ent->read_event, EV_READ, 1);
+ }
+ }
+ if (win32op->exset_out->fd_count) {
+ i = rand() % win32op->exset_out->fd_count;
+ for (j=0; j<win32op->exset_out->fd_count; ++j) {
+ if (++i >= win32op->exset_out->fd_count)
+ i = 0;
+ s = win32op->exset_out->fd_array[i];
+ if ((ent = get_event_entry(win32op, s, 0)) && ent->read_event)
+ event_active(ent->read_event, EV_READ, 1);
+ }
+ }
+ if (win32op->writeset_out->fd_count) {
+ i = rand() % win32op->writeset_out->fd_count;
+ for (j=0; j<win32op->writeset_out->fd_count; ++j) {
+ if (++i >= win32op->exset_out->fd_count)
+ i = 0;
+ s = win32op->writeset_out->fd_array[i];
+ if ((ent = get_event_entry(win32op, s, 0)) && ent->write_event)
+ event_active(ent->write_event, EV_WRITE, 1);
+
+ }
+ }
+
+ return (0);
+}
+
+void
+win32_dealloc(struct event_base *_base, void *arg)
+{
+ struct win32op *win32op = arg;
+
+ evsignal_dealloc(_base);
+ if (win32op->readset_in)
+ free(win32op->readset_in);
+ if (win32op->writeset_in)
+ free(win32op->writeset_in);
+ if (win32op->readset_out)
+ free(win32op->readset_out);
+ if (win32op->writeset_out)
+ free(win32op->writeset_out);
+ if (win32op->exset_out)
+ free(win32op->exset_out);
+ /* XXXXX free the tree. */
+
+ memset(win32op, 0, sizeof(win32op));
+ free(win32op);
+}
+
+#if 0
+static void
+signal_handler(int sig)
+{
+ evsigcaught[sig]++;
+ signal_caught = 1;
+}
+
+int
+signal_recalc(void)
+{
+ struct event *ev;
+
+ /* Reinstall our signal handler. */
+ TAILQ_FOREACH(ev, &signalqueue, ev_signal_next) {
+ if((int)signal(EVENT_SIGNAL(ev), signal_handler) == -1)
+ return (-1);
+ }
+ return (0);
+}
+
+void
+signal_process(void)
+{
+ struct event *ev;
+ short ncalls;
+
+ TAILQ_FOREACH(ev, &signalqueue, ev_signal_next) {
+ ncalls = evsigcaught[EVENT_SIGNAL(ev)];
+ if (ncalls) {
+ if (!(ev->ev_events & EV_PERSIST))
+ event_del(ev);
+ event_active(ev, EV_SIGNAL, ncalls);
+ }
+ }
+
+ memset(evsigcaught, 0, sizeof(evsigcaught));
+ signal_caught = 0;
+}
+#endif
+
diff --git a/libevent/WIN32-Prj/libevent.dsw b/libevent/WIN32-Prj/libevent.dsw
new file mode 100644
index 00000000000..fb05451ca25
--- /dev/null
+++ b/libevent/WIN32-Prj/libevent.dsw
@@ -0,0 +1,74 @@
+Microsoft Developer Studio Workspace File, Format Version 6.00
+# WARNING: DO NOT EDIT OR DELETE THIS WORKSPACE FILE!
+
+###############################################################################
+
+Project: "event_test"=".\event_test\event_test.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+ Begin Project Dependency
+ Project_Dep_Name libevent
+ End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "libevent"=".\libevent.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+}}}
+
+###############################################################################
+
+Project: "signal_test"=".\signal_test\signal_test.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+ Begin Project Dependency
+ Project_Dep_Name libevent
+ End Project Dependency
+}}}
+
+###############################################################################
+
+Project: "time_test"=".\time_test\time_test.dsp" - Package Owner=<4>
+
+Package=<5>
+{{{
+}}}
+
+Package=<4>
+{{{
+ Begin Project Dependency
+ Project_Dep_Name libevent
+ End Project Dependency
+}}}
+
+###############################################################################
+
+Global:
+
+Package=<5>
+{{{
+}}}
+
+Package=<3>
+{{{
+}}}
+
+###############################################################################
+
diff --git a/libevent/WIN32-Prj/libevent.sln b/libevent/WIN32-Prj/libevent.sln
new file mode 100644
index 00000000000..17e0c98bae6
--- /dev/null
+++ b/libevent/WIN32-Prj/libevent.sln
@@ -0,0 +1,53 @@
+
+Microsoft Visual Studio Solution File, Format Version 9.00
+# Visual Studio 2005
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "event_test", "event_test\event_test.vcproj", "{52099A8B-455B-4BE9-8E61-A3D6E8A4338D}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libevent", "libevent.vcproj", "{B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "signal_test", "signal_test\signal_test.vcproj", "{768DB9DD-2694-4274-89B8-74106E8F7786}"
+ ProjectSection(ProjectDependencies) = postProject
+ {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9} = {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "time_test", "time_test\time_test.vcproj", "{D4BE29FB-E45C-4177-9647-74BBAFDC1257}"
+ ProjectSection(ProjectDependencies) = postProject
+ {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9} = {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "regress", "regress\regress.vcproj", "{F7C26008-6066-4AD3-8543-452EFE58BD2E}"
+ ProjectSection(ProjectDependencies) = postProject
+ {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9} = {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}
+ EndProjectSection
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Release|Win32 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {52099A8B-455B-4BE9-8E61-A3D6E8A4338D}.Debug|Win32.ActiveCfg = Debug|Win32
+ {52099A8B-455B-4BE9-8E61-A3D6E8A4338D}.Debug|Win32.Build.0 = Debug|Win32
+ {52099A8B-455B-4BE9-8E61-A3D6E8A4338D}.Release|Win32.ActiveCfg = Release|Win32
+ {52099A8B-455B-4BE9-8E61-A3D6E8A4338D}.Release|Win32.Build.0 = Release|Win32
+ {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}.Debug|Win32.ActiveCfg = Debug|Win32
+ {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}.Debug|Win32.Build.0 = Debug|Win32
+ {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}.Release|Win32.ActiveCfg = Release|Win32
+ {B98ABFCE-24D4-4B70-94DE-EF7F1E0662F9}.Release|Win32.Build.0 = Release|Win32
+ {768DB9DD-2694-4274-89B8-74106E8F7786}.Debug|Win32.ActiveCfg = Debug|Win32
+ {768DB9DD-2694-4274-89B8-74106E8F7786}.Debug|Win32.Build.0 = Debug|Win32
+ {768DB9DD-2694-4274-89B8-74106E8F7786}.Release|Win32.ActiveCfg = Release|Win32
+ {768DB9DD-2694-4274-89B8-74106E8F7786}.Release|Win32.Build.0 = Release|Win32
+ {D4BE29FB-E45C-4177-9647-74BBAFDC1257}.Debug|Win32.ActiveCfg = Debug|Win32
+ {D4BE29FB-E45C-4177-9647-74BBAFDC1257}.Debug|Win32.Build.0 = Debug|Win32
+ {D4BE29FB-E45C-4177-9647-74BBAFDC1257}.Release|Win32.ActiveCfg = Release|Win32
+ {D4BE29FB-E45C-4177-9647-74BBAFDC1257}.Release|Win32.Build.0 = Release|Win32
+ {F7C26008-6066-4AD3-8543-452EFE58BD2E}.Debug|Win32.ActiveCfg = Debug|Win32
+ {F7C26008-6066-4AD3-8543-452EFE58BD2E}.Debug|Win32.Build.0 = Debug|Win32
+ {F7C26008-6066-4AD3-8543-452EFE58BD2E}.Release|Win32.ActiveCfg = Release|Win32
+ {F7C26008-6066-4AD3-8543-452EFE58BD2E}.Release|Win32.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/libevent/autogen.sh b/libevent/autogen.sh
new file mode 100644
index 00000000000..6d4275a6392
--- /dev/null
+++ b/libevent/autogen.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+LIBTOOLIZE=libtoolize
+SYSNAME=`uname`
+if [ "x$SYSNAME" = "xDarwin" ] ; then
+ LIBTOOLIZE=glibtoolize
+fi
+aclocal && \
+ autoheader && \
+ $LIBTOOLIZE && \
+ autoconf && \
+ automake --add-missing --copy
diff --git a/libevent/buffer.c b/libevent/buffer.c
new file mode 100644
index 00000000000..9cb0f0ce323
--- /dev/null
+++ b/libevent/buffer.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright (c) 2002, 2003 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#endif
+
+#ifdef HAVE_VASPRINTF
+/* If we have vasprintf, we need to define this before we include stdio.h. */
+#define _GNU_SOURCE
+#endif
+
+#include <sys/types.h>
+
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+#ifdef HAVE_SYS_IOCTL_H
+#include <sys/ioctl.h>
+#endif
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_STDARG_H
+#include <stdarg.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "event.h"
+#include "config.h"
+#include "evutil.h"
+
+struct evbuffer *
+evbuffer_new(void)
+{
+ struct evbuffer *buffer;
+
+ buffer = calloc(1, sizeof(struct evbuffer));
+
+ return (buffer);
+}
+
+void
+evbuffer_free(struct evbuffer *buffer)
+{
+ if (buffer->orig_buffer != NULL)
+ free(buffer->orig_buffer);
+ free(buffer);
+}
+
+/*
+ * This is a destructive add. The data from one buffer moves into
+ * the other buffer.
+ */
+
+#define SWAP(x,y) do { \
+ (x)->buffer = (y)->buffer; \
+ (x)->orig_buffer = (y)->orig_buffer; \
+ (x)->misalign = (y)->misalign; \
+ (x)->totallen = (y)->totallen; \
+ (x)->off = (y)->off; \
+} while (0)
+
+int
+evbuffer_add_buffer(struct evbuffer *outbuf, struct evbuffer *inbuf)
+{
+ int res;
+
+ /* Short cut for better performance */
+ if (outbuf->off == 0) {
+ struct evbuffer tmp;
+ size_t oldoff = inbuf->off;
+
+ /* Swap them directly */
+ SWAP(&tmp, outbuf);
+ SWAP(outbuf, inbuf);
+ SWAP(inbuf, &tmp);
+
+ /*
+ * Optimization comes with a price; we need to notify the
+ * buffer if necessary of the changes. oldoff is the amount
+ * of data that we transfered from inbuf to outbuf
+ */
+ if (inbuf->off != oldoff && inbuf->cb != NULL)
+ (*inbuf->cb)(inbuf, oldoff, inbuf->off, inbuf->cbarg);
+ if (oldoff && outbuf->cb != NULL)
+ (*outbuf->cb)(outbuf, 0, oldoff, outbuf->cbarg);
+
+ return (0);
+ }
+
+ res = evbuffer_add(outbuf, inbuf->buffer, inbuf->off);
+ if (res == 0) {
+ /* We drain the input buffer on success */
+ evbuffer_drain(inbuf, inbuf->off);
+ }
+
+ return (res);
+}
+
+int
+evbuffer_add_vprintf(struct evbuffer *buf, const char *fmt, va_list ap)
+{
+ char *buffer;
+ size_t space;
+ size_t oldoff = buf->off;
+ int sz;
+ va_list aq;
+
+ /* make sure that at least some space is available */
+ evbuffer_expand(buf, 64);
+ for (;;) {
+ size_t used = buf->misalign + buf->off;
+ buffer = (char *)buf->buffer + buf->off;
+ assert(buf->totallen >= used);
+ space = buf->totallen - used;
+
+#ifndef va_copy
+#define va_copy(dst, src) memcpy(&(dst), &(src), sizeof(va_list))
+#endif
+ va_copy(aq, ap);
+
+ sz = evutil_vsnprintf(buffer, space, fmt, aq);
+
+ va_end(aq);
+
+ if (sz < 0)
+ return (-1);
+ if ((size_t)sz < space) {
+ buf->off += sz;
+ if (buf->cb != NULL)
+ (*buf->cb)(buf, oldoff, buf->off, buf->cbarg);
+ return (sz);
+ }
+ if (evbuffer_expand(buf, sz + 1) == -1)
+ return (-1);
+
+ }
+ /* NOTREACHED */
+}
+
+int
+evbuffer_add_printf(struct evbuffer *buf, const char *fmt, ...)
+{
+ int res = -1;
+ va_list ap;
+
+ va_start(ap, fmt);
+ res = evbuffer_add_vprintf(buf, fmt, ap);
+ va_end(ap);
+
+ return (res);
+}
+
+/* Reads data from an event buffer and drains the bytes read */
+
+int
+evbuffer_remove(struct evbuffer *buf, void *data, size_t datlen)
+{
+ size_t nread = datlen;
+ if (nread >= buf->off)
+ nread = buf->off;
+
+ memcpy(data, buf->buffer, nread);
+ evbuffer_drain(buf, nread);
+
+ return (nread);
+}
+
+/*
+ * Reads a line terminated by either '\r\n', '\n\r' or '\r' or '\n'.
+ * The returned buffer needs to be freed by the called.
+ */
+
+char *
+evbuffer_readline(struct evbuffer *buffer)
+{
+ u_char *data = EVBUFFER_DATA(buffer);
+ size_t len = EVBUFFER_LENGTH(buffer);
+ char *line;
+ unsigned int i;
+
+ for (i = 0; i < len; i++) {
+ if (data[i] == '\r' || data[i] == '\n')
+ break;
+ }
+
+ if (i == len)
+ return (NULL);
+
+ if ((line = malloc(i + 1)) == NULL) {
+ fprintf(stderr, "%s: out of memory\n", __func__);
+ evbuffer_drain(buffer, i);
+ return (NULL);
+ }
+
+ memcpy(line, data, i);
+ line[i] = '\0';
+
+ /*
+ * Some protocols terminate a line with '\r\n', so check for
+ * that, too.
+ */
+ if ( i < len - 1 ) {
+ char fch = data[i], sch = data[i+1];
+
+ /* Drain one more character if needed */
+ if ( (sch == '\r' || sch == '\n') && sch != fch )
+ i += 1;
+ }
+
+ evbuffer_drain(buffer, i + 1);
+
+ return (line);
+}
+
+/* Adds data to an event buffer */
+
+static void
+evbuffer_align(struct evbuffer *buf)
+{
+ memmove(buf->orig_buffer, buf->buffer, buf->off);
+ buf->buffer = buf->orig_buffer;
+ buf->misalign = 0;
+}
+
+/* Expands the available space in the event buffer to at least datlen */
+
+int
+evbuffer_expand(struct evbuffer *buf, size_t datlen)
+{
+ size_t need = buf->misalign + buf->off + datlen;
+
+ /* If we can fit all the data, then we don't have to do anything */
+ if (buf->totallen >= need)
+ return (0);
+
+ /*
+ * If the misalignment fulfills our data needs, we just force an
+ * alignment to happen. Afterwards, we have enough space.
+ */
+ if (buf->misalign >= datlen) {
+ evbuffer_align(buf);
+ } else {
+ void *newbuf;
+ size_t length = buf->totallen;
+
+ if (length < 256)
+ length = 256;
+ while (length < need)
+ length <<= 1;
+
+ if (buf->orig_buffer != buf->buffer)
+ evbuffer_align(buf);
+ if ((newbuf = realloc(buf->buffer, length)) == NULL)
+ return (-1);
+
+ buf->orig_buffer = buf->buffer = newbuf;
+ buf->totallen = length;
+ }
+
+ return (0);
+}
+
+int
+evbuffer_add(struct evbuffer *buf, const void *data, size_t datlen)
+{
+ size_t need = buf->misalign + buf->off + datlen;
+ size_t oldoff = buf->off;
+
+ if (buf->totallen < need) {
+ if (evbuffer_expand(buf, datlen) == -1)
+ return (-1);
+ }
+
+ memcpy(buf->buffer + buf->off, data, datlen);
+ buf->off += datlen;
+
+ if (datlen && buf->cb != NULL)
+ (*buf->cb)(buf, oldoff, buf->off, buf->cbarg);
+
+ return (0);
+}
+
+void
+evbuffer_drain(struct evbuffer *buf, size_t len)
+{
+ size_t oldoff = buf->off;
+
+ if (len >= buf->off) {
+ buf->off = 0;
+ buf->buffer = buf->orig_buffer;
+ buf->misalign = 0;
+ goto done;
+ }
+
+ buf->buffer += len;
+ buf->misalign += len;
+
+ buf->off -= len;
+
+ done:
+ /* Tell someone about changes in this buffer */
+ if (buf->off != oldoff && buf->cb != NULL)
+ (*buf->cb)(buf, oldoff, buf->off, buf->cbarg);
+
+}
+
+/*
+ * Reads data from a file descriptor into a buffer.
+ */
+
+#define EVBUFFER_MAX_READ 4096
+
+int
+evbuffer_read(struct evbuffer *buf, int fd, int howmuch)
+{
+ u_char *p;
+ size_t oldoff = buf->off;
+ int n = EVBUFFER_MAX_READ;
+
+#if defined(FIONREAD)
+#ifdef WIN32
+ long lng = n;
+ if (ioctlsocket(fd, FIONREAD, &lng) == -1 || (n=lng) == 0) {
+#else
+ if (ioctl(fd, FIONREAD, &n) == -1 || n == 0) {
+#endif
+ n = EVBUFFER_MAX_READ;
+ } else if (n > EVBUFFER_MAX_READ && n > howmuch) {
+ /*
+ * It's possible that a lot of data is available for
+ * reading. We do not want to exhaust resources
+ * before the reader has a chance to do something
+ * about it. If the reader does not tell us how much
+ * data we should read, we artifically limit it.
+ */
+ if ((size_t)n > buf->totallen << 2)
+ n = buf->totallen << 2;
+ if (n < EVBUFFER_MAX_READ)
+ n = EVBUFFER_MAX_READ;
+ }
+#endif
+ if (howmuch < 0 || howmuch > n)
+ howmuch = n;
+
+ /* If we don't have FIONREAD, we might waste some space here */
+ if (evbuffer_expand(buf, howmuch) == -1)
+ return (-1);
+
+ /* We can append new data at this point */
+ p = buf->buffer + buf->off;
+
+#ifndef WIN32
+ n = read(fd, p, howmuch);
+#else
+ n = recv(fd, p, howmuch, 0);
+#endif
+ if (n == -1)
+ return (-1);
+ if (n == 0)
+ return (0);
+
+ buf->off += n;
+
+ /* Tell someone about changes in this buffer */
+ if (buf->off != oldoff && buf->cb != NULL)
+ (*buf->cb)(buf, oldoff, buf->off, buf->cbarg);
+
+ return (n);
+}
+
+int
+evbuffer_write(struct evbuffer *buffer, int fd)
+{
+ int n;
+
+#ifndef WIN32
+ n = write(fd, buffer->buffer, buffer->off);
+#else
+ n = send(fd, buffer->buffer, buffer->off, 0);
+#endif
+ if (n == -1)
+ return (-1);
+ if (n == 0)
+ return (0);
+ evbuffer_drain(buffer, n);
+
+ return (n);
+}
+
+u_char *
+evbuffer_find(struct evbuffer *buffer, const u_char *what, size_t len)
+{
+ u_char *search = buffer->buffer, *end = search + buffer->off;
+ u_char *p;
+
+ while (search < end &&
+ (p = memchr(search, *what, end - search)) != NULL) {
+ if (p + len > end)
+ break;
+ if (memcmp(p, what, len) == 0)
+ return (p);
+ search = p + 1;
+ }
+
+ return (NULL);
+}
+
+void evbuffer_setcb(struct evbuffer *buffer,
+ void (*cb)(struct evbuffer *, size_t, size_t, void *),
+ void *cbarg)
+{
+ buffer->cb = cb;
+ buffer->cbarg = cbarg;
+}
diff --git a/libevent/cmake_install.cmake b/libevent/cmake_install.cmake
new file mode 100644
index 00000000000..341d9b9d7b9
--- /dev/null
+++ b/libevent/cmake_install.cmake
@@ -0,0 +1,34 @@
+# Install script for directory: /my/maria-10.0-merge/libevent
+
+# Set the install prefix
+IF(NOT DEFINED CMAKE_INSTALL_PREFIX)
+ SET(CMAKE_INSTALL_PREFIX "/usr/local/mysql")
+ENDIF(NOT DEFINED CMAKE_INSTALL_PREFIX)
+STRING(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
+
+# Set the install configuration name.
+IF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+ IF(BUILD_TYPE)
+ STRING(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
+ CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
+ ELSE(BUILD_TYPE)
+ SET(CMAKE_INSTALL_CONFIG_NAME "Debug")
+ ENDIF(BUILD_TYPE)
+ MESSAGE(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
+ENDIF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+
+# Set the component getting installed.
+IF(NOT CMAKE_INSTALL_COMPONENT)
+ IF(COMPONENT)
+ MESSAGE(STATUS "Install component: \"${COMPONENT}\"")
+ SET(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
+ ELSE(COMPONENT)
+ SET(CMAKE_INSTALL_COMPONENT)
+ ENDIF(COMPONENT)
+ENDIF(NOT CMAKE_INSTALL_COMPONENT)
+
+# Install shared libraries without execute permission?
+IF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+ SET(CMAKE_INSTALL_SO_NO_EXE "0")
+ENDIF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+
diff --git a/libevent/compat/sys/_time.h b/libevent/compat/sys/_time.h
new file mode 100644
index 00000000000..8cabb0d55e7
--- /dev/null
+++ b/libevent/compat/sys/_time.h
@@ -0,0 +1,163 @@
+/* $OpenBSD: time.h,v 1.11 2000/10/10 13:36:48 itojun Exp $ */
+/* $NetBSD: time.h,v 1.18 1996/04/23 10:29:33 mycroft Exp $ */
+
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)time.h 8.2 (Berkeley) 7/10/94
+ */
+
+#ifndef _SYS_TIME_H_
+#define _SYS_TIME_H_
+
+#include <sys/types.h>
+
+/*
+ * Structure returned by gettimeofday(2) system call,
+ * and used in other calls.
+ */
+struct timeval {
+ long tv_sec; /* seconds */
+ long tv_usec; /* and microseconds */
+};
+
+/*
+ * Structure defined by POSIX.1b to be like a timeval.
+ */
+struct timespec {
+ time_t tv_sec; /* seconds */
+ long tv_nsec; /* and nanoseconds */
+};
+
+#define TIMEVAL_TO_TIMESPEC(tv, ts) { \
+ (ts)->tv_sec = (tv)->tv_sec; \
+ (ts)->tv_nsec = (tv)->tv_usec * 1000; \
+}
+#define TIMESPEC_TO_TIMEVAL(tv, ts) { \
+ (tv)->tv_sec = (ts)->tv_sec; \
+ (tv)->tv_usec = (ts)->tv_nsec / 1000; \
+}
+
+struct timezone {
+ int tz_minuteswest; /* minutes west of Greenwich */
+ int tz_dsttime; /* type of dst correction */
+};
+#define DST_NONE 0 /* not on dst */
+#define DST_USA 1 /* USA style dst */
+#define DST_AUST 2 /* Australian style dst */
+#define DST_WET 3 /* Western European dst */
+#define DST_MET 4 /* Middle European dst */
+#define DST_EET 5 /* Eastern European dst */
+#define DST_CAN 6 /* Canada */
+
+/* Operations on timevals. */
+#define timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_usec = 0
+#define timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec)
+#define timercmp(tvp, uvp, cmp) \
+ (((tvp)->tv_sec == (uvp)->tv_sec) ? \
+ ((tvp)->tv_usec cmp (uvp)->tv_usec) : \
+ ((tvp)->tv_sec cmp (uvp)->tv_sec))
+#define timeradd(tvp, uvp, vvp) \
+ do { \
+ (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec; \
+ (vvp)->tv_usec = (tvp)->tv_usec + (uvp)->tv_usec; \
+ if ((vvp)->tv_usec >= 1000000) { \
+ (vvp)->tv_sec++; \
+ (vvp)->tv_usec -= 1000000; \
+ } \
+ } while (0)
+#define timersub(tvp, uvp, vvp) \
+ do { \
+ (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
+ (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \
+ if ((vvp)->tv_usec < 0) { \
+ (vvp)->tv_sec--; \
+ (vvp)->tv_usec += 1000000; \
+ } \
+ } while (0)
+
+/* Operations on timespecs. */
+#define timespecclear(tsp) (tsp)->tv_sec = (tsp)->tv_nsec = 0
+#define timespecisset(tsp) ((tsp)->tv_sec || (tsp)->tv_nsec)
+#define timespeccmp(tsp, usp, cmp) \
+ (((tsp)->tv_sec == (usp)->tv_sec) ? \
+ ((tsp)->tv_nsec cmp (usp)->tv_nsec) : \
+ ((tsp)->tv_sec cmp (usp)->tv_sec))
+#define timespecadd(tsp, usp, vsp) \
+ do { \
+ (vsp)->tv_sec = (tsp)->tv_sec + (usp)->tv_sec; \
+ (vsp)->tv_nsec = (tsp)->tv_nsec + (usp)->tv_nsec; \
+ if ((vsp)->tv_nsec >= 1000000000L) { \
+ (vsp)->tv_sec++; \
+ (vsp)->tv_nsec -= 1000000000L; \
+ } \
+ } while (0)
+#define timespecsub(tsp, usp, vsp) \
+ do { \
+ (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \
+ (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \
+ if ((vsp)->tv_nsec < 0) { \
+ (vsp)->tv_sec--; \
+ (vsp)->tv_nsec += 1000000000L; \
+ } \
+ } while (0)
+
+/*
+ * Names of the interval timers, and structure
+ * defining a timer setting.
+ */
+#define ITIMER_REAL 0
+#define ITIMER_VIRTUAL 1
+#define ITIMER_PROF 2
+
+struct itimerval {
+ struct timeval it_interval; /* timer interval */
+ struct timeval it_value; /* current value */
+};
+
+/*
+ * Getkerninfo clock information structure
+ */
+struct clockinfo {
+ int hz; /* clock frequency */
+ int tick; /* micro-seconds per hz tick */
+ int tickadj; /* clock skew rate for adjtime() */
+ int stathz; /* statistics clock frequency */
+ int profhz; /* profiling clock frequency */
+};
+
+#define CLOCK_REALTIME 0
+#define CLOCK_VIRTUAL 1
+#define CLOCK_PROF 2
+
+#define TIMER_RELTIME 0x0 /* relative timer */
+#define TIMER_ABSTIME 0x1 /* absolute timer */
+
+/* --- stuff got cut here - niels --- */
+
+#endif /* !_SYS_TIME_H_ */
diff --git a/libevent/compat/sys/queue.h b/libevent/compat/sys/queue.h
new file mode 100644
index 00000000000..c0956ddce43
--- /dev/null
+++ b/libevent/compat/sys/queue.h
@@ -0,0 +1,488 @@
+/* $OpenBSD: queue.h,v 1.16 2000/09/07 19:47:59 art Exp $ */
+/* $NetBSD: queue.h,v 1.11 1996/05/16 05:17:14 mycroft Exp $ */
+
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)queue.h 8.5 (Berkeley) 8/20/94
+ */
+
+#ifndef _SYS_QUEUE_H_
+#define _SYS_QUEUE_H_
+
+/*
+ * This file defines five types of data structures: singly-linked lists,
+ * lists, simple queues, tail queues, and circular queues.
+ *
+ *
+ * A singly-linked list is headed by a single forward pointer. The elements
+ * are singly linked for minimum space and pointer manipulation overhead at
+ * the expense of O(n) removal for arbitrary elements. New elements can be
+ * added to the list after an existing element or at the head of the list.
+ * Elements being removed from the head of the list should use the explicit
+ * macro for this purpose for optimum efficiency. A singly-linked list may
+ * only be traversed in the forward direction. Singly-linked lists are ideal
+ * for applications with large datasets and few or no removals or for
+ * implementing a LIFO queue.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may only be traversed in the forward direction.
+ *
+ * A simple queue is headed by a pair of pointers, one the head of the
+ * list and the other to the tail of the list. The elements are singly
+ * linked to save space, so elements can only be removed from the
+ * head of the list. New elements can be added to the list before or after
+ * an existing element, at the head of the list, or at the end of the
+ * list. A simple queue may only be traversed in the forward direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * A circle queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or after
+ * an existing element, at the head of the list, or at the end of the list.
+ * A circle queue may be traversed in either direction, but has a more
+ * complex end of list detection.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ */
+
+/*
+ * Singly-linked List definitions.
+ */
+#define SLIST_HEAD(name, type) \
+struct name { \
+ struct type *slh_first; /* first element */ \
+}
+
+#define SLIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#ifndef WIN32
+#define SLIST_ENTRY(type) \
+struct { \
+ struct type *sle_next; /* next element */ \
+}
+#endif
+
+/*
+ * Singly-linked List access methods.
+ */
+#define SLIST_FIRST(head) ((head)->slh_first)
+#define SLIST_END(head) NULL
+#define SLIST_EMPTY(head) (SLIST_FIRST(head) == SLIST_END(head))
+#define SLIST_NEXT(elm, field) ((elm)->field.sle_next)
+
+#define SLIST_FOREACH(var, head, field) \
+ for((var) = SLIST_FIRST(head); \
+ (var) != SLIST_END(head); \
+ (var) = SLIST_NEXT(var, field))
+
+/*
+ * Singly-linked List functions.
+ */
+#define SLIST_INIT(head) { \
+ SLIST_FIRST(head) = SLIST_END(head); \
+}
+
+#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \
+ (elm)->field.sle_next = (slistelm)->field.sle_next; \
+ (slistelm)->field.sle_next = (elm); \
+} while (0)
+
+#define SLIST_INSERT_HEAD(head, elm, field) do { \
+ (elm)->field.sle_next = (head)->slh_first; \
+ (head)->slh_first = (elm); \
+} while (0)
+
+#define SLIST_REMOVE_HEAD(head, field) do { \
+ (head)->slh_first = (head)->slh_first->field.sle_next; \
+} while (0)
+
+/*
+ * List definitions.
+ */
+#define LIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#define LIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define LIST_ENTRY(type) \
+struct { \
+ struct type *le_next; /* next element */ \
+ struct type **le_prev; /* address of previous next element */ \
+}
+
+/*
+ * List access methods
+ */
+#define LIST_FIRST(head) ((head)->lh_first)
+#define LIST_END(head) NULL
+#define LIST_EMPTY(head) (LIST_FIRST(head) == LIST_END(head))
+#define LIST_NEXT(elm, field) ((elm)->field.le_next)
+
+#define LIST_FOREACH(var, head, field) \
+ for((var) = LIST_FIRST(head); \
+ (var)!= LIST_END(head); \
+ (var) = LIST_NEXT(var, field))
+
+/*
+ * List functions.
+ */
+#define LIST_INIT(head) do { \
+ LIST_FIRST(head) = LIST_END(head); \
+} while (0)
+
+#define LIST_INSERT_AFTER(listelm, elm, field) do { \
+ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \
+ (listelm)->field.le_next->field.le_prev = \
+ &(elm)->field.le_next; \
+ (listelm)->field.le_next = (elm); \
+ (elm)->field.le_prev = &(listelm)->field.le_next; \
+} while (0)
+
+#define LIST_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.le_prev = (listelm)->field.le_prev; \
+ (elm)->field.le_next = (listelm); \
+ *(listelm)->field.le_prev = (elm); \
+ (listelm)->field.le_prev = &(elm)->field.le_next; \
+} while (0)
+
+#define LIST_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.le_next = (head)->lh_first) != NULL) \
+ (head)->lh_first->field.le_prev = &(elm)->field.le_next;\
+ (head)->lh_first = (elm); \
+ (elm)->field.le_prev = &(head)->lh_first; \
+} while (0)
+
+#define LIST_REMOVE(elm, field) do { \
+ if ((elm)->field.le_next != NULL) \
+ (elm)->field.le_next->field.le_prev = \
+ (elm)->field.le_prev; \
+ *(elm)->field.le_prev = (elm)->field.le_next; \
+} while (0)
+
+#define LIST_REPLACE(elm, elm2, field) do { \
+ if (((elm2)->field.le_next = (elm)->field.le_next) != NULL) \
+ (elm2)->field.le_next->field.le_prev = \
+ &(elm2)->field.le_next; \
+ (elm2)->field.le_prev = (elm)->field.le_prev; \
+ *(elm2)->field.le_prev = (elm2); \
+} while (0)
+
+/*
+ * Simple queue definitions.
+ */
+#define SIMPLEQ_HEAD(name, type) \
+struct name { \
+ struct type *sqh_first; /* first element */ \
+ struct type **sqh_last; /* addr of last next element */ \
+}
+
+#define SIMPLEQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).sqh_first }
+
+#define SIMPLEQ_ENTRY(type) \
+struct { \
+ struct type *sqe_next; /* next element */ \
+}
+
+/*
+ * Simple queue access methods.
+ */
+#define SIMPLEQ_FIRST(head) ((head)->sqh_first)
+#define SIMPLEQ_END(head) NULL
+#define SIMPLEQ_EMPTY(head) (SIMPLEQ_FIRST(head) == SIMPLEQ_END(head))
+#define SIMPLEQ_NEXT(elm, field) ((elm)->field.sqe_next)
+
+#define SIMPLEQ_FOREACH(var, head, field) \
+ for((var) = SIMPLEQ_FIRST(head); \
+ (var) != SIMPLEQ_END(head); \
+ (var) = SIMPLEQ_NEXT(var, field))
+
+/*
+ * Simple queue functions.
+ */
+#define SIMPLEQ_INIT(head) do { \
+ (head)->sqh_first = NULL; \
+ (head)->sqh_last = &(head)->sqh_first; \
+} while (0)
+
+#define SIMPLEQ_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.sqe_next = (head)->sqh_first) == NULL) \
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+ (head)->sqh_first = (elm); \
+} while (0)
+
+#define SIMPLEQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.sqe_next = NULL; \
+ *(head)->sqh_last = (elm); \
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+} while (0)
+
+#define SIMPLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ if (((elm)->field.sqe_next = (listelm)->field.sqe_next) == NULL)\
+ (head)->sqh_last = &(elm)->field.sqe_next; \
+ (listelm)->field.sqe_next = (elm); \
+} while (0)
+
+#define SIMPLEQ_REMOVE_HEAD(head, elm, field) do { \
+ if (((head)->sqh_first = (elm)->field.sqe_next) == NULL) \
+ (head)->sqh_last = &(head)->sqh_first; \
+} while (0)
+
+/*
+ * Tail queue definitions.
+ */
+#define TAILQ_HEAD(name, type) \
+struct name { \
+ struct type *tqh_first; /* first element */ \
+ struct type **tqh_last; /* addr of last next element */ \
+}
+
+#define TAILQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).tqh_first }
+
+#define TAILQ_ENTRY(type) \
+struct { \
+ struct type *tqe_next; /* next element */ \
+ struct type **tqe_prev; /* address of previous next element */ \
+}
+
+/*
+ * tail queue access methods
+ */
+#define TAILQ_FIRST(head) ((head)->tqh_first)
+#define TAILQ_END(head) NULL
+#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+/* XXX */
+#define TAILQ_PREV(elm, headname, field) \
+ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+#define TAILQ_EMPTY(head) \
+ (TAILQ_FIRST(head) == TAILQ_END(head))
+
+#define TAILQ_FOREACH(var, head, field) \
+ for((var) = TAILQ_FIRST(head); \
+ (var) != TAILQ_END(head); \
+ (var) = TAILQ_NEXT(var, field))
+
+#define TAILQ_FOREACH_REVERSE(var, head, field, headname) \
+ for((var) = TAILQ_LAST(head, headname); \
+ (var) != TAILQ_END(head); \
+ (var) = TAILQ_PREV(var, headname, field))
+
+/*
+ * Tail queue functions.
+ */
+#define TAILQ_INIT(head) do { \
+ (head)->tqh_first = NULL; \
+ (head)->tqh_last = &(head)->tqh_first; \
+} while (0)
+
+#define TAILQ_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \
+ (head)->tqh_first->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (head)->tqh_first = (elm); \
+ (elm)->field.tqe_prev = &(head)->tqh_first; \
+} while (0)
+
+#define TAILQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.tqe_next = NULL; \
+ (elm)->field.tqe_prev = (head)->tqh_last; \
+ *(head)->tqh_last = (elm); \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+} while (0)
+
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
+ (elm)->field.tqe_next->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (listelm)->field.tqe_next = (elm); \
+ (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
+} while (0)
+
+#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
+ (elm)->field.tqe_next = (listelm); \
+ *(listelm)->field.tqe_prev = (elm); \
+ (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
+} while (0)
+
+#define TAILQ_REMOVE(head, elm, field) do { \
+ if (((elm)->field.tqe_next) != NULL) \
+ (elm)->field.tqe_next->field.tqe_prev = \
+ (elm)->field.tqe_prev; \
+ else \
+ (head)->tqh_last = (elm)->field.tqe_prev; \
+ *(elm)->field.tqe_prev = (elm)->field.tqe_next; \
+} while (0)
+
+#define TAILQ_REPLACE(head, elm, elm2, field) do { \
+ if (((elm2)->field.tqe_next = (elm)->field.tqe_next) != NULL) \
+ (elm2)->field.tqe_next->field.tqe_prev = \
+ &(elm2)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm2)->field.tqe_next; \
+ (elm2)->field.tqe_prev = (elm)->field.tqe_prev; \
+ *(elm2)->field.tqe_prev = (elm2); \
+} while (0)
+
+/*
+ * Circular queue definitions.
+ */
+#define CIRCLEQ_HEAD(name, type) \
+struct name { \
+ struct type *cqh_first; /* first element */ \
+ struct type *cqh_last; /* last element */ \
+}
+
+#define CIRCLEQ_HEAD_INITIALIZER(head) \
+ { CIRCLEQ_END(&head), CIRCLEQ_END(&head) }
+
+#define CIRCLEQ_ENTRY(type) \
+struct { \
+ struct type *cqe_next; /* next element */ \
+ struct type *cqe_prev; /* previous element */ \
+}
+
+/*
+ * Circular queue access methods
+ */
+#define CIRCLEQ_FIRST(head) ((head)->cqh_first)
+#define CIRCLEQ_LAST(head) ((head)->cqh_last)
+#define CIRCLEQ_END(head) ((void *)(head))
+#define CIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next)
+#define CIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev)
+#define CIRCLEQ_EMPTY(head) \
+ (CIRCLEQ_FIRST(head) == CIRCLEQ_END(head))
+
+#define CIRCLEQ_FOREACH(var, head, field) \
+ for((var) = CIRCLEQ_FIRST(head); \
+ (var) != CIRCLEQ_END(head); \
+ (var) = CIRCLEQ_NEXT(var, field))
+
+#define CIRCLEQ_FOREACH_REVERSE(var, head, field) \
+ for((var) = CIRCLEQ_LAST(head); \
+ (var) != CIRCLEQ_END(head); \
+ (var) = CIRCLEQ_PREV(var, field))
+
+/*
+ * Circular queue functions.
+ */
+#define CIRCLEQ_INIT(head) do { \
+ (head)->cqh_first = CIRCLEQ_END(head); \
+ (head)->cqh_last = CIRCLEQ_END(head); \
+} while (0)
+
+#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ (elm)->field.cqe_next = (listelm)->field.cqe_next; \
+ (elm)->field.cqe_prev = (listelm); \
+ if ((listelm)->field.cqe_next == CIRCLEQ_END(head)) \
+ (head)->cqh_last = (elm); \
+ else \
+ (listelm)->field.cqe_next->field.cqe_prev = (elm); \
+ (listelm)->field.cqe_next = (elm); \
+} while (0)
+
+#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \
+ (elm)->field.cqe_next = (listelm); \
+ (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \
+ if ((listelm)->field.cqe_prev == CIRCLEQ_END(head)) \
+ (head)->cqh_first = (elm); \
+ else \
+ (listelm)->field.cqe_prev->field.cqe_next = (elm); \
+ (listelm)->field.cqe_prev = (elm); \
+} while (0)
+
+#define CIRCLEQ_INSERT_HEAD(head, elm, field) do { \
+ (elm)->field.cqe_next = (head)->cqh_first; \
+ (elm)->field.cqe_prev = CIRCLEQ_END(head); \
+ if ((head)->cqh_last == CIRCLEQ_END(head)) \
+ (head)->cqh_last = (elm); \
+ else \
+ (head)->cqh_first->field.cqe_prev = (elm); \
+ (head)->cqh_first = (elm); \
+} while (0)
+
+#define CIRCLEQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.cqe_next = CIRCLEQ_END(head); \
+ (elm)->field.cqe_prev = (head)->cqh_last; \
+ if ((head)->cqh_first == CIRCLEQ_END(head)) \
+ (head)->cqh_first = (elm); \
+ else \
+ (head)->cqh_last->field.cqe_next = (elm); \
+ (head)->cqh_last = (elm); \
+} while (0)
+
+#define CIRCLEQ_REMOVE(head, elm, field) do { \
+ if ((elm)->field.cqe_next == CIRCLEQ_END(head)) \
+ (head)->cqh_last = (elm)->field.cqe_prev; \
+ else \
+ (elm)->field.cqe_next->field.cqe_prev = \
+ (elm)->field.cqe_prev; \
+ if ((elm)->field.cqe_prev == CIRCLEQ_END(head)) \
+ (head)->cqh_first = (elm)->field.cqe_next; \
+ else \
+ (elm)->field.cqe_prev->field.cqe_next = \
+ (elm)->field.cqe_next; \
+} while (0)
+
+#define CIRCLEQ_REPLACE(head, elm, elm2, field) do { \
+ if (((elm2)->field.cqe_next = (elm)->field.cqe_next) == \
+ CIRCLEQ_END(head)) \
+ (head).cqh_last = (elm2); \
+ else \
+ (elm2)->field.cqe_next->field.cqe_prev = (elm2); \
+ if (((elm2)->field.cqe_prev = (elm)->field.cqe_prev) == \
+ CIRCLEQ_END(head)) \
+ (head).cqh_first = (elm2); \
+ else \
+ (elm2)->field.cqe_prev->field.cqe_next = (elm2); \
+} while (0)
+
+#endif /* !_SYS_QUEUE_H_ */
diff --git a/libevent/configure.in b/libevent/configure.in
new file mode 100644
index 00000000000..bc3eca1f043
--- /dev/null
+++ b/libevent/configure.in
@@ -0,0 +1,387 @@
+dnl configure.in for libevent
+dnl Dug Song <dugsong@monkey.org>
+AC_INIT(event.c)
+
+AM_INIT_AUTOMAKE(libevent,1.4.12-stable)
+AM_CONFIG_HEADER(config.h)
+dnl AM_MAINTAINER_MODE
+
+dnl Initialize prefix.
+if test "$prefix" = "NONE"; then
+ prefix="/usr/local"
+fi
+
+dnl Checks for programs.
+AC_PROG_CC
+AC_PROG_INSTALL
+AC_PROG_LN_S
+
+AC_PROG_GCC_TRADITIONAL
+if test "$GCC" = yes ; then
+ CFLAGS="$CFLAGS -Wall"
+ # And disable the strict-aliasing optimization, since it breaks
+ # our sockaddr-handling code in strange ways.
+ CFLAGS="$CFLAGS -fno-strict-aliasing"
+fi
+
+AC_ARG_ENABLE(gcc-warnings,
+ AS_HELP_STRING(--enable-gcc-warnings, enable verbose warnings with GCC))
+
+AC_PROG_LIBTOOL
+
+dnl Uncomment "AC_DISABLE_SHARED" to make shared librraries not get
+dnl built by default. You can also turn shared libs on and off from
+dnl the command line with --enable-shared and --disable-shared.
+dnl AC_DISABLE_SHARED
+AC_SUBST(LIBTOOL_DEPS)
+
+dnl Checks for libraries.
+AC_CHECK_LIB(socket, socket)
+AC_CHECK_LIB(resolv, inet_aton)
+AC_CHECK_LIB(rt, clock_gettime)
+AC_CHECK_LIB(nsl, inet_ntoa)
+
+dnl Checks for header files.
+AC_HEADER_STDC
+AC_CHECK_HEADERS(fcntl.h stdarg.h inttypes.h stdint.h poll.h signal.h unistd.h sys/epoll.h sys/time.h sys/queue.h sys/event.h sys/param.h sys/ioctl.h sys/select.h sys/devpoll.h port.h netinet/in6.h sys/socket.h)
+if test "x$ac_cv_header_sys_queue_h" = "xyes"; then
+ AC_MSG_CHECKING(for TAILQ_FOREACH in sys/queue.h)
+ AC_EGREP_CPP(yes,
+[
+#include <sys/queue.h>
+#ifdef TAILQ_FOREACH
+ yes
+#endif
+], [AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_TAILQFOREACH, 1,
+ [Define if TAILQ_FOREACH is defined in <sys/queue.h>])],
+ AC_MSG_RESULT(no)
+ )
+fi
+
+if test "x$ac_cv_header_sys_time_h" = "xyes"; then
+ AC_MSG_CHECKING(for timeradd in sys/time.h)
+ AC_EGREP_CPP(yes,
+[
+#include <sys/time.h>
+#ifdef timeradd
+ yes
+#endif
+], [ AC_DEFINE(HAVE_TIMERADD, 1,
+ [Define if timeradd is defined in <sys/time.h>])
+ AC_MSG_RESULT(yes)] ,AC_MSG_RESULT(no)
+)
+fi
+
+if test "x$ac_cv_header_sys_time_h" = "xyes"; then
+ AC_MSG_CHECKING(for timercmp in sys/time.h)
+ AC_EGREP_CPP(yes,
+[
+#include <sys/time.h>
+#ifdef timercmp
+ yes
+#endif
+], [ AC_DEFINE(HAVE_TIMERCMP, 1,
+ [Define if timercmp is defined in <sys/time.h>])
+ AC_MSG_RESULT(yes)] ,AC_MSG_RESULT(no)
+)
+fi
+
+if test "x$ac_cv_header_sys_time_h" = "xyes"; then
+ AC_MSG_CHECKING(for timerclear in sys/time.h)
+ AC_EGREP_CPP(yes,
+[
+#include <sys/time.h>
+#ifdef timerclear
+ yes
+#endif
+], [ AC_DEFINE(HAVE_TIMERCLEAR, 1,
+ [Define if timerclear is defined in <sys/time.h>])
+ AC_MSG_RESULT(yes)] ,AC_MSG_RESULT(no)
+)
+fi
+
+if test "x$ac_cv_header_sys_time_h" = "xyes"; then
+ AC_MSG_CHECKING(for timerisset in sys/time.h)
+ AC_EGREP_CPP(yes,
+[
+#include <sys/time.h>
+#ifdef timerisset
+ yes
+#endif
+], [ AC_DEFINE(HAVE_TIMERISSET, 1,
+ [Define if timerisset is defined in <sys/time.h>])
+ AC_MSG_RESULT(yes)] ,AC_MSG_RESULT(no)
+)
+fi
+
+dnl - check if the macro WIN32 is defined on this compiler.
+dnl - (this is how we check for a windows version of GCC)
+AC_MSG_CHECKING(for WIN32)
+AC_TRY_COMPILE(,
+ [
+#ifndef WIN32
+die horribly
+#endif
+ ],
+ bwin32=true; AC_MSG_RESULT(yes),
+ bwin32=false; AC_MSG_RESULT(no),
+)
+
+AM_CONDITIONAL(BUILD_WIN32, test x$bwin32 = xtrue)
+
+dnl Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_C_INLINE
+AC_HEADER_TIME
+
+dnl Checks for library functions.
+AC_CHECK_FUNCS(gettimeofday vasprintf fcntl clock_gettime strtok_r strsep getaddrinfo getnameinfo strlcpy inet_ntop signal sigaction strtoll)
+
+AC_CHECK_SIZEOF(long)
+
+if test "x$ac_cv_func_clock_gettime" = "xyes"; then
+ AC_DEFINE(DNS_USE_CPU_CLOCK_FOR_ID, 1, [Define if clock_gettime is available in libc])
+else
+ AC_DEFINE(DNS_USE_GETTIMEOFDAY_FOR_ID, 1, [Define is no secure id variant is available])
+fi
+
+AC_MSG_CHECKING(for F_SETFD in fcntl.h)
+AC_EGREP_CPP(yes,
+[
+#define _GNU_SOURCE
+#include <fcntl.h>
+#ifdef F_SETFD
+yes
+#endif
+], [ AC_DEFINE(HAVE_SETFD, 1,
+ [Define if F_SETFD is defined in <fcntl.h>])
+ AC_MSG_RESULT(yes) ], AC_MSG_RESULT(no))
+
+needsignal=no
+haveselect=no
+AC_CHECK_FUNCS(select, [haveselect=yes], )
+if test "x$haveselect" = "xyes" ; then
+ AC_LIBOBJ(select)
+ needsignal=yes
+fi
+
+havepoll=no
+AC_CHECK_FUNCS(poll, [havepoll=yes], )
+if test "x$havepoll" = "xyes" ; then
+ AC_LIBOBJ(poll)
+ needsignal=yes
+fi
+
+haveepoll=no
+AC_CHECK_FUNCS(epoll_ctl, [haveepoll=yes], )
+if test "x$haveepoll" = "xyes" ; then
+ AC_DEFINE(HAVE_EPOLL, 1,
+ [Define if your system supports the epoll system calls])
+ AC_LIBOBJ(epoll)
+ needsignal=yes
+fi
+
+havedevpoll=no
+if test "x$ac_cv_header_sys_devpoll_h" = "xyes"; then
+ AC_DEFINE(HAVE_DEVPOLL, 1,
+ [Define if /dev/poll is available])
+ AC_LIBOBJ(devpoll)
+fi
+
+havekqueue=no
+if test "x$ac_cv_header_sys_event_h" = "xyes"; then
+ AC_CHECK_FUNCS(kqueue, [havekqueue=yes], )
+ if test "x$havekqueue" = "xyes" ; then
+ AC_MSG_CHECKING(for working kqueue)
+ AC_TRY_RUN(
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/event.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ int kq;
+ int n;
+ int fd[[2]];
+ struct kevent ev;
+ struct timespec ts;
+ char buf[[8000]];
+
+ if (pipe(fd) == -1)
+ exit(1);
+ if (fcntl(fd[[1]], F_SETFL, O_NONBLOCK) == -1)
+ exit(1);
+
+ while ((n = write(fd[[1]], buf, sizeof(buf))) == sizeof(buf))
+ ;
+
+ if ((kq = kqueue()) == -1)
+ exit(1);
+
+ ev.ident = fd[[1]];
+ ev.filter = EVFILT_WRITE;
+ ev.flags = EV_ADD | EV_ENABLE;
+ n = kevent(kq, &ev, 1, NULL, 0, NULL);
+ if (n == -1)
+ exit(1);
+
+ read(fd[[0]], buf, sizeof(buf));
+
+ ts.tv_sec = 0;
+ ts.tv_nsec = 0;
+ n = kevent(kq, NULL, 0, &ev, 1, &ts);
+ if (n == -1 || n == 0)
+ exit(1);
+
+ exit(0);
+}, [AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_WORKING_KQUEUE, 1,
+ [Define if kqueue works correctly with pipes])
+ AC_LIBOBJ(kqueue)], AC_MSG_RESULT(no), AC_MSG_RESULT(no))
+ fi
+fi
+
+haveepollsyscall=no
+if test "x$ac_cv_header_sys_epoll_h" = "xyes"; then
+ if test "x$haveepoll" = "xno" ; then
+ AC_MSG_CHECKING(for epoll system call)
+ AC_TRY_RUN(
+#include <stdint.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+int
+epoll_create(int size)
+{
+ return (syscall(__NR_epoll_create, size));
+}
+
+int
+main(int argc, char **argv)
+{
+ int epfd;
+
+ epfd = epoll_create(256);
+ exit (epfd == -1 ? 1 : 0);
+}, [AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_EPOLL, 1,
+ [Define if your system supports the epoll system calls])
+ needsignal=yes
+ AC_LIBOBJ(epoll_sub)
+ AC_LIBOBJ(epoll)], AC_MSG_RESULT(no), AC_MSG_RESULT(no))
+ fi
+fi
+
+haveeventports=no
+AC_CHECK_FUNCS(port_create, [haveeventports=yes], )
+if test "x$haveeventports" = "xyes" ; then
+ AC_DEFINE(HAVE_EVENT_PORTS, 1,
+ [Define if your system supports event ports])
+ AC_LIBOBJ(evport)
+ needsignal=yes
+fi
+if test "x$bwin32" = "xtrue"; then
+ needsignal=yes
+fi
+if test "x$bwin32" = "xtrue"; then
+ needsignal=yes
+fi
+if test "x$needsignal" = "xyes" ; then
+ AC_LIBOBJ(signal)
+fi
+
+AC_TYPE_PID_T
+AC_TYPE_SIZE_T
+AC_CHECK_TYPES([uint64_t, uint32_t, uint16_t, uint8_t], , ,
+[#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#elif defined(HAVE_INTTYPES_H)
+#include <inttypes.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif])
+AC_CHECK_SIZEOF(long long)
+AC_CHECK_SIZEOF(int)
+AC_CHECK_SIZEOF(short)
+AC_CHECK_TYPES([struct in6_addr], , ,
+[#ifdef WIN32
+#include <winsock2.h>
+#else
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#endif
+#ifdef HAVE_NETINET_IN6_H
+#include <netinet/in6.h>
+#endif])
+
+AC_MSG_CHECKING([for socklen_t])
+AC_TRY_COMPILE([
+ #include <sys/types.h>
+ #include <sys/socket.h>],
+ [socklen_t x;],
+ AC_MSG_RESULT([yes]),
+ [AC_MSG_RESULT([no])
+ AC_DEFINE(socklen_t, unsigned int,
+ [Define to unsigned int if you dont have it])]
+)
+
+AC_MSG_CHECKING([whether our compiler supports __func__])
+AC_TRY_COMPILE([],
+ [ const char *cp = __func__; ],
+ AC_MSG_RESULT([yes]),
+ AC_MSG_RESULT([no])
+ AC_MSG_CHECKING([whether our compiler supports __FUNCTION__])
+ AC_TRY_COMPILE([],
+ [ const char *cp = __FUNCTION__; ],
+ AC_MSG_RESULT([yes])
+ AC_DEFINE(__func__, __FUNCTION__,
+ [Define to appropriate substitue if compiler doesnt have __func__]),
+ AC_MSG_RESULT([no])
+ AC_DEFINE(__func__, __FILE__,
+ [Define to appropriate substitue if compiler doesnt have __func__])))
+
+
+# Add some more warnings which we use in development but not in the
+# released versions. (Some relevant gcc versions can't handle these.)
+if test x$enable_gcc_warnings = xyes; then
+
+ AC_COMPILE_IFELSE(AC_LANG_PROGRAM([], [
+#if !defined(__GNUC__) || (__GNUC__ < 4)
+#error
+#endif]), have_gcc4=yes, have_gcc4=no)
+
+ AC_COMPILE_IFELSE(AC_LANG_PROGRAM([], [
+#if !defined(__GNUC__) || (__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 2)
+#error
+#endif]), have_gcc42=yes, have_gcc42=no)
+
+ CFLAGS="$CFLAGS -W -Wfloat-equal -Wundef -Wpointer-arith -Wstrict-prototypes -Wmissing-prototypes -Wwrite-strings -Wredundant-decls -Wchar-subscripts -Wcomment -Wformat=2 -Wwrite-strings -Wmissing-declarations -Wredundant-decls -Wnested-externs -Wbad-function-cast -Wswitch-enum -Werror"
+ CFLAGS="$CFLAGS -Wno-unused-parameter -Wno-sign-compare -Wstrict-aliasing"
+
+ if test x$have_gcc4 = xyes ; then
+ # These warnings break gcc 3.3.5 and work on gcc 4.0.2
+ CFLAGS="$CFLAGS -Winit-self -Wmissing-field-initializers -Wdeclaration-after-statement"
+ #CFLAGS="$CFLAGS -Wold-style-definition"
+ fi
+
+ if test x$have_gcc42 = xyes ; then
+ # These warnings break gcc 4.0.2 and work on gcc 4.2
+ CFLAGS="$CFLAGS -Waddress -Wnormalized=id -Woverride-init"
+ fi
+
+##This will break the world on some 64-bit architectures
+# CFLAGS="$CFLAGS -Winline"
+
+fi
+
+AC_OUTPUT(Makefile test/Makefile sample/Makefile)
diff --git a/libevent/devpoll.c b/libevent/devpoll.c
new file mode 100644
index 00000000000..cbd27309079
--- /dev/null
+++ b/libevent/devpoll.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/resource.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#include <sys/queue.h>
+#include <sys/devpoll.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "event.h"
+#include "event-internal.h"
+#include "evsignal.h"
+#include "log.h"
+
+/* due to limitations in the devpoll interface, we need to keep track of
+ * all file descriptors outself.
+ */
+struct evdevpoll {
+ struct event *evread;
+ struct event *evwrite;
+};
+
+struct devpollop {
+ struct evdevpoll *fds;
+ int nfds;
+ struct pollfd *events;
+ int nevents;
+ int dpfd;
+ struct pollfd *changes;
+ int nchanges;
+};
+
+static void *devpoll_init (struct event_base *);
+static int devpoll_add (void *, struct event *);
+static int devpoll_del (void *, struct event *);
+static int devpoll_dispatch (struct event_base *, void *, struct timeval *);
+static void devpoll_dealloc (struct event_base *, void *);
+
+const struct eventop devpollops = {
+ "devpoll",
+ devpoll_init,
+ devpoll_add,
+ devpoll_del,
+ devpoll_dispatch,
+ devpoll_dealloc,
+ 1 /* need reinit */
+};
+
+#define NEVENT 32000
+
+static int
+devpoll_commit(struct devpollop *devpollop)
+{
+ /*
+ * Due to a bug in Solaris, we have to use pwrite with an offset of 0.
+ * Write is limited to 2GB of data, until it will fail.
+ */
+ if (pwrite(devpollop->dpfd, devpollop->changes,
+ sizeof(struct pollfd) * devpollop->nchanges, 0) == -1)
+ return(-1);
+
+ devpollop->nchanges = 0;
+ return(0);
+}
+
+static int
+devpoll_queue(struct devpollop *devpollop, int fd, int events) {
+ struct pollfd *pfd;
+
+ if (devpollop->nchanges >= devpollop->nevents) {
+ /*
+ * Change buffer is full, must commit it to /dev/poll before
+ * adding more
+ */
+ if (devpoll_commit(devpollop) != 0)
+ return(-1);
+ }
+
+ pfd = &devpollop->changes[devpollop->nchanges++];
+ pfd->fd = fd;
+ pfd->events = events;
+ pfd->revents = 0;
+
+ return(0);
+}
+
+static void *
+devpoll_init(struct event_base *base)
+{
+ int dpfd, nfiles = NEVENT;
+ struct rlimit rl;
+ struct devpollop *devpollop;
+
+ /* Disable devpoll when this environment variable is set */
+ if (getenv("EVENT_NODEVPOLL"))
+ return (NULL);
+
+ if (!(devpollop = calloc(1, sizeof(struct devpollop))))
+ return (NULL);
+
+ if (getrlimit(RLIMIT_NOFILE, &rl) == 0 &&
+ rl.rlim_cur != RLIM_INFINITY)
+ nfiles = rl.rlim_cur;
+
+ /* Initialize the kernel queue */
+ if ((dpfd = open("/dev/poll", O_RDWR)) == -1) {
+ event_warn("open: /dev/poll");
+ free(devpollop);
+ return (NULL);
+ }
+
+ devpollop->dpfd = dpfd;
+
+ /* Initialize fields */
+ devpollop->events = calloc(nfiles, sizeof(struct pollfd));
+ if (devpollop->events == NULL) {
+ free(devpollop);
+ close(dpfd);
+ return (NULL);
+ }
+ devpollop->nevents = nfiles;
+
+ devpollop->fds = calloc(nfiles, sizeof(struct evdevpoll));
+ if (devpollop->fds == NULL) {
+ free(devpollop->events);
+ free(devpollop);
+ close(dpfd);
+ return (NULL);
+ }
+ devpollop->nfds = nfiles;
+
+ devpollop->changes = calloc(nfiles, sizeof(struct pollfd));
+ if (devpollop->changes == NULL) {
+ free(devpollop->fds);
+ free(devpollop->events);
+ free(devpollop);
+ close(dpfd);
+ return (NULL);
+ }
+
+ evsignal_init(base);
+
+ return (devpollop);
+}
+
+static int
+devpoll_recalc(struct event_base *base, void *arg, int max)
+{
+ struct devpollop *devpollop = arg;
+
+ if (max >= devpollop->nfds) {
+ struct evdevpoll *fds;
+ int nfds;
+
+ nfds = devpollop->nfds;
+ while (nfds <= max)
+ nfds <<= 1;
+
+ fds = realloc(devpollop->fds, nfds * sizeof(struct evdevpoll));
+ if (fds == NULL) {
+ event_warn("realloc");
+ return (-1);
+ }
+ devpollop->fds = fds;
+ memset(fds + devpollop->nfds, 0,
+ (nfds - devpollop->nfds) * sizeof(struct evdevpoll));
+ devpollop->nfds = nfds;
+ }
+
+ return (0);
+}
+
+static int
+devpoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+ struct devpollop *devpollop = arg;
+ struct pollfd *events = devpollop->events;
+ struct dvpoll dvp;
+ struct evdevpoll *evdp;
+ int i, res, timeout = -1;
+
+ if (devpollop->nchanges)
+ devpoll_commit(devpollop);
+
+ if (tv != NULL)
+ timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
+
+ dvp.dp_fds = devpollop->events;
+ dvp.dp_nfds = devpollop->nevents;
+ dvp.dp_timeout = timeout;
+
+ res = ioctl(devpollop->dpfd, DP_POLL, &dvp);
+
+ if (res == -1) {
+ if (errno != EINTR) {
+ event_warn("ioctl: DP_POLL");
+ return (-1);
+ }
+
+ evsignal_process(base);
+ return (0);
+ } else if (base->sig.evsignal_caught) {
+ evsignal_process(base);
+ }
+
+ event_debug(("%s: devpoll_wait reports %d", __func__, res));
+
+ for (i = 0; i < res; i++) {
+ int which = 0;
+ int what = events[i].revents;
+ struct event *evread = NULL, *evwrite = NULL;
+
+ assert(events[i].fd < devpollop->nfds);
+ evdp = &devpollop->fds[events[i].fd];
+
+ if (what & POLLHUP)
+ what |= POLLIN | POLLOUT;
+ else if (what & POLLERR)
+ what |= POLLIN | POLLOUT;
+
+ if (what & POLLIN) {
+ evread = evdp->evread;
+ which |= EV_READ;
+ }
+
+ if (what & POLLOUT) {
+ evwrite = evdp->evwrite;
+ which |= EV_WRITE;
+ }
+
+ if (!which)
+ continue;
+
+ if (evread != NULL && !(evread->ev_events & EV_PERSIST))
+ event_del(evread);
+ if (evwrite != NULL && evwrite != evread &&
+ !(evwrite->ev_events & EV_PERSIST))
+ event_del(evwrite);
+
+ if (evread != NULL)
+ event_active(evread, EV_READ, 1);
+ if (evwrite != NULL)
+ event_active(evwrite, EV_WRITE, 1);
+ }
+
+ return (0);
+}
+
+
+static int
+devpoll_add(void *arg, struct event *ev)
+{
+ struct devpollop *devpollop = arg;
+ struct evdevpoll *evdp;
+ int fd, events;
+
+ if (ev->ev_events & EV_SIGNAL)
+ return (evsignal_add(ev));
+
+ fd = ev->ev_fd;
+ if (fd >= devpollop->nfds) {
+ /* Extend the file descriptor array as necessary */
+ if (devpoll_recalc(ev->ev_base, devpollop, fd) == -1)
+ return (-1);
+ }
+ evdp = &devpollop->fds[fd];
+
+ /*
+ * It's not necessary to OR the existing read/write events that we
+ * are currently interested in with the new event we are adding.
+ * The /dev/poll driver ORs any new events with the existing events
+ * that it has cached for the fd.
+ */
+
+ events = 0;
+ if (ev->ev_events & EV_READ) {
+ if (evdp->evread && evdp->evread != ev) {
+ /* There is already a different read event registered */
+ return(-1);
+ }
+ events |= POLLIN;
+ }
+
+ if (ev->ev_events & EV_WRITE) {
+ if (evdp->evwrite && evdp->evwrite != ev) {
+ /* There is already a different write event registered */
+ return(-1);
+ }
+ events |= POLLOUT;
+ }
+
+ if (devpoll_queue(devpollop, fd, events) != 0)
+ return(-1);
+
+ /* Update events responsible */
+ if (ev->ev_events & EV_READ)
+ evdp->evread = ev;
+ if (ev->ev_events & EV_WRITE)
+ evdp->evwrite = ev;
+
+ return (0);
+}
+
+static int
+devpoll_del(void *arg, struct event *ev)
+{
+ struct devpollop *devpollop = arg;
+ struct evdevpoll *evdp;
+ int fd, events;
+ int needwritedelete = 1, needreaddelete = 1;
+
+ if (ev->ev_events & EV_SIGNAL)
+ return (evsignal_del(ev));
+
+ fd = ev->ev_fd;
+ if (fd >= devpollop->nfds)
+ return (0);
+ evdp = &devpollop->fds[fd];
+
+ events = 0;
+ if (ev->ev_events & EV_READ)
+ events |= POLLIN;
+ if (ev->ev_events & EV_WRITE)
+ events |= POLLOUT;
+
+ /*
+ * The only way to remove an fd from the /dev/poll monitored set is
+ * to use POLLREMOVE by itself. This removes ALL events for the fd
+ * provided so if we care about two events and are only removing one
+ * we must re-add the other event after POLLREMOVE.
+ */
+
+ if (devpoll_queue(devpollop, fd, POLLREMOVE) != 0)
+ return(-1);
+
+ if ((events & (POLLIN|POLLOUT)) != (POLLIN|POLLOUT)) {
+ /*
+ * We're not deleting all events, so we must resubmit the
+ * event that we are still interested in if one exists.
+ */
+
+ if ((events & POLLIN) && evdp->evwrite != NULL) {
+ /* Deleting read, still care about write */
+ devpoll_queue(devpollop, fd, POLLOUT);
+ needwritedelete = 0;
+ } else if ((events & POLLOUT) && evdp->evread != NULL) {
+ /* Deleting write, still care about read */
+ devpoll_queue(devpollop, fd, POLLIN);
+ needreaddelete = 0;
+ }
+ }
+
+ if (needreaddelete)
+ evdp->evread = NULL;
+ if (needwritedelete)
+ evdp->evwrite = NULL;
+
+ return (0);
+}
+
+static void
+devpoll_dealloc(struct event_base *base, void *arg)
+{
+ struct devpollop *devpollop = arg;
+
+ evsignal_dealloc(base);
+ if (devpollop->fds)
+ free(devpollop->fds);
+ if (devpollop->events)
+ free(devpollop->events);
+ if (devpollop->changes)
+ free(devpollop->changes);
+ if (devpollop->dpfd >= 0)
+ close(devpollop->dpfd);
+
+ memset(devpollop, 0, sizeof(struct devpollop));
+ free(devpollop);
+}
diff --git a/libevent/epoll.c b/libevent/epoll.c
new file mode 100644
index 00000000000..b479b9c07e9
--- /dev/null
+++ b/libevent/epoll.c
@@ -0,0 +1,373 @@
+/*
+ * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#include <sys/queue.h>
+#include <sys/epoll.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+
+#include "event.h"
+#include "event-internal.h"
+#include "evsignal.h"
+#include "log.h"
+
+/* due to limitations in the epoll interface, we need to keep track of
+ * all file descriptors outself.
+ */
+struct evepoll {
+ struct event *evread;
+ struct event *evwrite;
+};
+
+struct epollop {
+ struct evepoll *fds;
+ int nfds;
+ struct epoll_event *events;
+ int nevents;
+ int epfd;
+};
+
+static void *epoll_init (struct event_base *);
+static int epoll_add (void *, struct event *);
+static int epoll_del (void *, struct event *);
+static int epoll_dispatch (struct event_base *, void *, struct timeval *);
+static void epoll_dealloc (struct event_base *, void *);
+
+const struct eventop epollops = {
+ "epoll",
+ epoll_init,
+ epoll_add,
+ epoll_del,
+ epoll_dispatch,
+ epoll_dealloc,
+ 1 /* need reinit */
+};
+
+#ifdef HAVE_SETFD
+#define FD_CLOSEONEXEC(x) do { \
+ if (fcntl(x, F_SETFD, 1) == -1) \
+ event_warn("fcntl(%d, F_SETFD)", x); \
+} while (0)
+#else
+#define FD_CLOSEONEXEC(x)
+#endif
+
+#define NEVENT 32000
+
+/* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
+ * values bigger than (LONG_MAX - 999ULL)/HZ. HZ in the wild can be
+ * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
+ * largest number of msec we can support here is 2147482. Let's
+ * round that down by 47 seconds.
+ */
+#define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
+
+static void *
+epoll_init(struct event_base *base)
+{
+ int epfd, nfiles = NEVENT;
+ struct rlimit rl;
+ struct epollop *epollop;
+
+ /* Disable epollueue when this environment variable is set */
+ if (getenv("EVENT_NOEPOLL"))
+ return (NULL);
+
+ if (getrlimit(RLIMIT_NOFILE, &rl) == 0 &&
+ rl.rlim_cur != RLIM_INFINITY) {
+ /*
+ * Solaris is somewhat retarded - it's important to drop
+ * backwards compatibility when making changes. So, don't
+ * dare to put rl.rlim_cur here.
+ */
+ nfiles = rl.rlim_cur - 1;
+ }
+
+ /* Initalize the kernel queue */
+
+ if ((epfd = epoll_create(nfiles)) == -1) {
+ if (errno != ENOSYS)
+ event_warn("epoll_create");
+ return (NULL);
+ }
+
+ FD_CLOSEONEXEC(epfd);
+
+ if (!(epollop = calloc(1, sizeof(struct epollop))))
+ return (NULL);
+
+ epollop->epfd = epfd;
+
+ /* Initalize fields */
+ epollop->events = malloc(nfiles * sizeof(struct epoll_event));
+ if (epollop->events == NULL) {
+ free(epollop);
+ return (NULL);
+ }
+ epollop->nevents = nfiles;
+
+ epollop->fds = calloc(nfiles, sizeof(struct evepoll));
+ if (epollop->fds == NULL) {
+ free(epollop->events);
+ free(epollop);
+ return (NULL);
+ }
+ epollop->nfds = nfiles;
+
+ evsignal_init(base);
+
+ return (epollop);
+}
+
+static int
+epoll_recalc(struct event_base *base, void *arg, int max)
+{
+ struct epollop *epollop = arg;
+
+ if (max >= epollop->nfds) {
+ struct evepoll *fds;
+ int nfds;
+
+ nfds = epollop->nfds;
+ while (nfds <= max)
+ nfds <<= 1;
+
+ fds = realloc(epollop->fds, nfds * sizeof(struct evepoll));
+ if (fds == NULL) {
+ event_warn("realloc");
+ return (-1);
+ }
+ epollop->fds = fds;
+ memset(fds + epollop->nfds, 0,
+ (nfds - epollop->nfds) * sizeof(struct evepoll));
+ epollop->nfds = nfds;
+ }
+
+ return (0);
+}
+
+static int
+epoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+ struct epollop *epollop = arg;
+ struct epoll_event *events = epollop->events;
+ struct evepoll *evep;
+ int i, res, timeout = -1;
+
+ if (tv != NULL)
+ timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
+
+ if (timeout > MAX_EPOLL_TIMEOUT_MSEC) {
+ /* Linux kernels can wait forever if the timeout is too big;
+ * see comment on MAX_EPOLL_TIMEOUT_MSEC. */
+ timeout = MAX_EPOLL_TIMEOUT_MSEC;
+ }
+
+ res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
+
+ if (res == -1) {
+ if (errno != EINTR) {
+ event_warn("epoll_wait");
+ return (-1);
+ }
+
+ evsignal_process(base);
+ return (0);
+ } else if (base->sig.evsignal_caught) {
+ evsignal_process(base);
+ }
+
+ event_debug(("%s: epoll_wait reports %d", __func__, res));
+
+ for (i = 0; i < res; i++) {
+ int what = events[i].events;
+ struct event *evread = NULL, *evwrite = NULL;
+ int fd = events[i].data.fd;
+
+ if (fd < 0 || fd >= epollop->nfds)
+ continue;
+ evep = &epollop->fds[fd];
+
+ if (what & (EPOLLHUP|EPOLLERR)) {
+ evread = evep->evread;
+ evwrite = evep->evwrite;
+ } else {
+ if (what & EPOLLIN) {
+ evread = evep->evread;
+ }
+
+ if (what & EPOLLOUT) {
+ evwrite = evep->evwrite;
+ }
+ }
+
+ if (!(evread||evwrite))
+ continue;
+
+ if (evread != NULL)
+ event_active(evread, EV_READ, 1);
+ if (evwrite != NULL)
+ event_active(evwrite, EV_WRITE, 1);
+ }
+
+ return (0);
+}
+
+
+static int
+epoll_add(void *arg, struct event *ev)
+{
+ struct epollop *epollop = arg;
+ struct epoll_event epev = {0, {0}};
+ struct evepoll *evep;
+ int fd, op, events;
+
+ if (ev->ev_events & EV_SIGNAL)
+ return (evsignal_add(ev));
+
+ fd = ev->ev_fd;
+ if (fd >= epollop->nfds) {
+ /* Extent the file descriptor array as necessary */
+ if (epoll_recalc(ev->ev_base, epollop, fd) == -1)
+ return (-1);
+ }
+ evep = &epollop->fds[fd];
+ op = EPOLL_CTL_ADD;
+ events = 0;
+ if (evep->evread != NULL) {
+ events |= EPOLLIN;
+ op = EPOLL_CTL_MOD;
+ }
+ if (evep->evwrite != NULL) {
+ events |= EPOLLOUT;
+ op = EPOLL_CTL_MOD;
+ }
+
+ if (ev->ev_events & EV_READ)
+ events |= EPOLLIN;
+ if (ev->ev_events & EV_WRITE)
+ events |= EPOLLOUT;
+
+ epev.data.fd = fd;
+ epev.events = events;
+ if (epoll_ctl(epollop->epfd, op, ev->ev_fd, &epev) == -1)
+ return (-1);
+
+ /* Update events responsible */
+ if (ev->ev_events & EV_READ)
+ evep->evread = ev;
+ if (ev->ev_events & EV_WRITE)
+ evep->evwrite = ev;
+
+ return (0);
+}
+
+static int
+epoll_del(void *arg, struct event *ev)
+{
+ struct epollop *epollop = arg;
+ struct epoll_event epev = {0, {0}};
+ struct evepoll *evep;
+ int fd, events, op;
+ int needwritedelete = 1, needreaddelete = 1;
+
+ if (ev->ev_events & EV_SIGNAL)
+ return (evsignal_del(ev));
+
+ fd = ev->ev_fd;
+ if (fd >= epollop->nfds)
+ return (0);
+ evep = &epollop->fds[fd];
+
+ op = EPOLL_CTL_DEL;
+ events = 0;
+
+ if (ev->ev_events & EV_READ)
+ events |= EPOLLIN;
+ if (ev->ev_events & EV_WRITE)
+ events |= EPOLLOUT;
+
+ if ((events & (EPOLLIN|EPOLLOUT)) != (EPOLLIN|EPOLLOUT)) {
+ if ((events & EPOLLIN) && evep->evwrite != NULL) {
+ needwritedelete = 0;
+ events = EPOLLOUT;
+ op = EPOLL_CTL_MOD;
+ } else if ((events & EPOLLOUT) && evep->evread != NULL) {
+ needreaddelete = 0;
+ events = EPOLLIN;
+ op = EPOLL_CTL_MOD;
+ }
+ }
+
+ epev.events = events;
+ epev.data.fd = fd;
+
+ if (needreaddelete)
+ evep->evread = NULL;
+ if (needwritedelete)
+ evep->evwrite = NULL;
+
+ if (epoll_ctl(epollop->epfd, op, fd, &epev) == -1)
+ return (-1);
+
+ return (0);
+}
+
+static void
+epoll_dealloc(struct event_base *base, void *arg)
+{
+ struct epollop *epollop = arg;
+
+ evsignal_dealloc(base);
+ if (epollop->fds)
+ free(epollop->fds);
+ if (epollop->events)
+ free(epollop->events);
+ if (epollop->epfd >= 0)
+ close(epollop->epfd);
+
+ memset(epollop, 0, sizeof(struct epollop));
+ free(epollop);
+}
diff --git a/libevent/epoll_sub.c b/libevent/epoll_sub.c
new file mode 100644
index 00000000000..431970c73a6
--- /dev/null
+++ b/libevent/epoll_sub.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2003 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+int
+epoll_create(int size)
+{
+ return (syscall(__NR_epoll_create, size));
+}
+
+int
+epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
+{
+
+ return (syscall(__NR_epoll_ctl, epfd, op, fd, event));
+}
+
+int
+epoll_wait(int epfd, struct epoll_event *events, int maxevents, int timeout)
+{
+ return (syscall(__NR_epoll_wait, epfd, events, maxevents, timeout));
+}
diff --git a/libevent/evbuffer.c b/libevent/evbuffer.c
new file mode 100644
index 00000000000..f2179a5044f
--- /dev/null
+++ b/libevent/evbuffer.c
@@ -0,0 +1,455 @@
+/*
+ * Copyright (c) 2002-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_STDARG_H
+#include <stdarg.h>
+#endif
+
+#ifdef WIN32
+#include <winsock2.h>
+#endif
+
+#include "evutil.h"
+#include "event.h"
+
+/* prototypes */
+
+void bufferevent_read_pressure_cb(struct evbuffer *, size_t, size_t, void *);
+
+static int
+bufferevent_add(struct event *ev, int timeout)
+{
+ struct timeval tv, *ptv = NULL;
+
+ if (timeout) {
+ evutil_timerclear(&tv);
+ tv.tv_sec = timeout;
+ ptv = &tv;
+ }
+
+ return (event_add(ev, ptv));
+}
+
+/*
+ * This callback is executed when the size of the input buffer changes.
+ * We use it to apply back pressure on the reading side.
+ */
+
+void
+bufferevent_read_pressure_cb(struct evbuffer *buf, size_t old, size_t now,
+ void *arg) {
+ struct bufferevent *bufev = arg;
+ /*
+ * If we are below the watermark then reschedule reading if it's
+ * still enabled.
+ */
+ if (bufev->wm_read.high == 0 || now < bufev->wm_read.high) {
+ evbuffer_setcb(buf, NULL, NULL);
+
+ if (bufev->enabled & EV_READ)
+ bufferevent_add(&bufev->ev_read, bufev->timeout_read);
+ }
+}
+
+static void
+bufferevent_readcb(int fd, short event, void *arg)
+{
+ struct bufferevent *bufev = arg;
+ int res = 0;
+ short what = EVBUFFER_READ;
+ size_t len;
+ int howmuch = -1;
+
+ if (event == EV_TIMEOUT) {
+ what |= EVBUFFER_TIMEOUT;
+ goto error;
+ }
+
+ /*
+ * If we have a high watermark configured then we don't want to
+ * read more data than would make us reach the watermark.
+ */
+ if (bufev->wm_read.high != 0) {
+ howmuch = bufev->wm_read.high - EVBUFFER_LENGTH(bufev->input);
+ /* we might have lowered the watermark, stop reading */
+ if (howmuch <= 0) {
+ struct evbuffer *buf = bufev->input;
+ event_del(&bufev->ev_read);
+ evbuffer_setcb(buf,
+ bufferevent_read_pressure_cb, bufev);
+ return;
+ }
+ }
+
+ res = evbuffer_read(bufev->input, fd, howmuch);
+ if (res == -1) {
+ if (errno == EAGAIN || errno == EINTR)
+ goto reschedule;
+ /* error case */
+ what |= EVBUFFER_ERROR;
+ } else if (res == 0) {
+ /* eof case */
+ what |= EVBUFFER_EOF;
+ }
+
+ if (res <= 0)
+ goto error;
+
+ bufferevent_add(&bufev->ev_read, bufev->timeout_read);
+
+ /* See if this callbacks meets the water marks */
+ len = EVBUFFER_LENGTH(bufev->input);
+ if (bufev->wm_read.low != 0 && len < bufev->wm_read.low)
+ return;
+ if (bufev->wm_read.high != 0 && len >= bufev->wm_read.high) {
+ struct evbuffer *buf = bufev->input;
+ event_del(&bufev->ev_read);
+
+ /* Now schedule a callback for us when the buffer changes */
+ evbuffer_setcb(buf, bufferevent_read_pressure_cb, bufev);
+ }
+
+ /* Invoke the user callback - must always be called last */
+ if (bufev->readcb != NULL)
+ (*bufev->readcb)(bufev, bufev->cbarg);
+ return;
+
+ reschedule:
+ bufferevent_add(&bufev->ev_read, bufev->timeout_read);
+ return;
+
+ error:
+ (*bufev->errorcb)(bufev, what, bufev->cbarg);
+}
+
+static void
+bufferevent_writecb(int fd, short event, void *arg)
+{
+ struct bufferevent *bufev = arg;
+ int res = 0;
+ short what = EVBUFFER_WRITE;
+
+ if (event == EV_TIMEOUT) {
+ what |= EVBUFFER_TIMEOUT;
+ goto error;
+ }
+
+ if (EVBUFFER_LENGTH(bufev->output)) {
+ res = evbuffer_write(bufev->output, fd);
+ if (res == -1) {
+#ifndef WIN32
+/*todo. evbuffer uses WriteFile when WIN32 is set. WIN32 system calls do not
+ *set errno. thus this error checking is not portable*/
+ if (errno == EAGAIN ||
+ errno == EINTR ||
+ errno == EINPROGRESS)
+ goto reschedule;
+ /* error case */
+ what |= EVBUFFER_ERROR;
+
+#else
+ goto reschedule;
+#endif
+
+ } else if (res == 0) {
+ /* eof case */
+ what |= EVBUFFER_EOF;
+ }
+ if (res <= 0)
+ goto error;
+ }
+
+ if (EVBUFFER_LENGTH(bufev->output) != 0)
+ bufferevent_add(&bufev->ev_write, bufev->timeout_write);
+
+ /*
+ * Invoke the user callback if our buffer is drained or below the
+ * low watermark.
+ */
+ if (bufev->writecb != NULL &&
+ EVBUFFER_LENGTH(bufev->output) <= bufev->wm_write.low)
+ (*bufev->writecb)(bufev, bufev->cbarg);
+
+ return;
+
+ reschedule:
+ if (EVBUFFER_LENGTH(bufev->output) != 0)
+ bufferevent_add(&bufev->ev_write, bufev->timeout_write);
+ return;
+
+ error:
+ (*bufev->errorcb)(bufev, what, bufev->cbarg);
+}
+
+/*
+ * Create a new buffered event object.
+ *
+ * The read callback is invoked whenever we read new data.
+ * The write callback is invoked whenever the output buffer is drained.
+ * The error callback is invoked on a write/read error or on EOF.
+ *
+ * Both read and write callbacks maybe NULL. The error callback is not
+ * allowed to be NULL and have to be provided always.
+ */
+
+struct bufferevent *
+bufferevent_new(int fd, evbuffercb readcb, evbuffercb writecb,
+ everrorcb errorcb, void *cbarg)
+{
+ struct bufferevent *bufev;
+
+ if ((bufev = calloc(1, sizeof(struct bufferevent))) == NULL)
+ return (NULL);
+
+ if ((bufev->input = evbuffer_new()) == NULL) {
+ free(bufev);
+ return (NULL);
+ }
+
+ if ((bufev->output = evbuffer_new()) == NULL) {
+ evbuffer_free(bufev->input);
+ free(bufev);
+ return (NULL);
+ }
+
+ event_set(&bufev->ev_read, fd, EV_READ, bufferevent_readcb, bufev);
+ event_set(&bufev->ev_write, fd, EV_WRITE, bufferevent_writecb, bufev);
+
+ bufferevent_setcb(bufev, readcb, writecb, errorcb, cbarg);
+
+ /*
+ * Set to EV_WRITE so that using bufferevent_write is going to
+ * trigger a callback. Reading needs to be explicitly enabled
+ * because otherwise no data will be available.
+ */
+ bufev->enabled = EV_WRITE;
+
+ return (bufev);
+}
+
+void
+bufferevent_setcb(struct bufferevent *bufev,
+ evbuffercb readcb, evbuffercb writecb, everrorcb errorcb, void *cbarg)
+{
+ bufev->readcb = readcb;
+ bufev->writecb = writecb;
+ bufev->errorcb = errorcb;
+
+ bufev->cbarg = cbarg;
+}
+
+void
+bufferevent_setfd(struct bufferevent *bufev, int fd)
+{
+ event_del(&bufev->ev_read);
+ event_del(&bufev->ev_write);
+
+ event_set(&bufev->ev_read, fd, EV_READ, bufferevent_readcb, bufev);
+ event_set(&bufev->ev_write, fd, EV_WRITE, bufferevent_writecb, bufev);
+ if (bufev->ev_base != NULL) {
+ event_base_set(bufev->ev_base, &bufev->ev_read);
+ event_base_set(bufev->ev_base, &bufev->ev_write);
+ }
+
+ /* might have to manually trigger event registration */
+}
+
+int
+bufferevent_priority_set(struct bufferevent *bufev, int priority)
+{
+ if (event_priority_set(&bufev->ev_read, priority) == -1)
+ return (-1);
+ if (event_priority_set(&bufev->ev_write, priority) == -1)
+ return (-1);
+
+ return (0);
+}
+
+/* Closing the file descriptor is the responsibility of the caller */
+
+void
+bufferevent_free(struct bufferevent *bufev)
+{
+ event_del(&bufev->ev_read);
+ event_del(&bufev->ev_write);
+
+ evbuffer_free(bufev->input);
+ evbuffer_free(bufev->output);
+
+ free(bufev);
+}
+
+/*
+ * Returns 0 on success;
+ * -1 on failure.
+ */
+
+int
+bufferevent_write(struct bufferevent *bufev, const void *data, size_t size)
+{
+ int res;
+
+ res = evbuffer_add(bufev->output, data, size);
+
+ if (res == -1)
+ return (res);
+
+ /* If everything is okay, we need to schedule a write */
+ if (size > 0 && (bufev->enabled & EV_WRITE))
+ bufferevent_add(&bufev->ev_write, bufev->timeout_write);
+
+ return (res);
+}
+
+int
+bufferevent_write_buffer(struct bufferevent *bufev, struct evbuffer *buf)
+{
+ int res;
+
+ res = bufferevent_write(bufev, buf->buffer, buf->off);
+ if (res != -1)
+ evbuffer_drain(buf, buf->off);
+
+ return (res);
+}
+
+size_t
+bufferevent_read(struct bufferevent *bufev, void *data, size_t size)
+{
+ struct evbuffer *buf = bufev->input;
+
+ if (buf->off < size)
+ size = buf->off;
+
+ /* Copy the available data to the user buffer */
+ memcpy(data, buf->buffer, size);
+
+ if (size)
+ evbuffer_drain(buf, size);
+
+ return (size);
+}
+
+int
+bufferevent_enable(struct bufferevent *bufev, short event)
+{
+ if (event & EV_READ) {
+ if (bufferevent_add(&bufev->ev_read, bufev->timeout_read) == -1)
+ return (-1);
+ }
+ if (event & EV_WRITE) {
+ if (bufferevent_add(&bufev->ev_write, bufev->timeout_write) == -1)
+ return (-1);
+ }
+
+ bufev->enabled |= event;
+ return (0);
+}
+
+int
+bufferevent_disable(struct bufferevent *bufev, short event)
+{
+ if (event & EV_READ) {
+ if (event_del(&bufev->ev_read) == -1)
+ return (-1);
+ }
+ if (event & EV_WRITE) {
+ if (event_del(&bufev->ev_write) == -1)
+ return (-1);
+ }
+
+ bufev->enabled &= ~event;
+ return (0);
+}
+
+/*
+ * Sets the read and write timeout for a buffered event.
+ */
+
+void
+bufferevent_settimeout(struct bufferevent *bufev,
+ int timeout_read, int timeout_write) {
+ bufev->timeout_read = timeout_read;
+ bufev->timeout_write = timeout_write;
+
+ if (event_pending(&bufev->ev_read, EV_READ, NULL))
+ bufferevent_add(&bufev->ev_read, timeout_read);
+ if (event_pending(&bufev->ev_write, EV_WRITE, NULL))
+ bufferevent_add(&bufev->ev_write, timeout_write);
+}
+
+/*
+ * Sets the water marks
+ */
+
+void
+bufferevent_setwatermark(struct bufferevent *bufev, short events,
+ size_t lowmark, size_t highmark)
+{
+ if (events & EV_READ) {
+ bufev->wm_read.low = lowmark;
+ bufev->wm_read.high = highmark;
+ }
+
+ if (events & EV_WRITE) {
+ bufev->wm_write.low = lowmark;
+ bufev->wm_write.high = highmark;
+ }
+
+ /* If the watermarks changed then see if we should call read again */
+ bufferevent_read_pressure_cb(bufev->input,
+ 0, EVBUFFER_LENGTH(bufev->input), bufev);
+}
+
+int
+bufferevent_base_set(struct event_base *base, struct bufferevent *bufev)
+{
+ int res;
+
+ bufev->ev_base = base;
+
+ res = event_base_set(base, &bufev->ev_read);
+ if (res == -1)
+ return (res);
+
+ res = event_base_set(base, &bufev->ev_write);
+ return (res);
+}
diff --git a/libevent/evdns.3 b/libevent/evdns.3
new file mode 100644
index 00000000000..10414fa2efb
--- /dev/null
+++ b/libevent/evdns.3
@@ -0,0 +1,322 @@
+.\"
+.\" Copyright (c) 2006 Niels Provos <provos@citi.umich.edu>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\"
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. The name of the author may not be used to endorse or promote products
+.\" derived from this software without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+.\" AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+.\" THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.\" EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+.\" OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+.\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+.\" ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd October 7, 2006
+.Dt EVDNS 3
+.Os
+.Sh NAME
+.Nm evdns_init
+.Nm evdns_shutdown
+.Nm evdns_err_to_string
+.Nm evdns_nameserver_add
+.Nm evdns_count_nameservers
+.Nm evdns_clear_nameservers_and_suspend
+.Nm evdns_resume
+.Nm evdns_nameserver_ip_add
+.Nm evdns_resolve_ipv4
+.Nm evdns_resolve_reverse
+.Nm evdns_resolv_conf_parse
+.Nm evdns_config_windows_nameservers
+.Nm evdns_search_clear
+.Nm evdns_search_add
+.Nm evdns_search_ndots_set
+.Nm evdns_set_log_fn
+.Nd asynchronous functions for DNS resolution.
+.Sh SYNOPSIS
+.Fd #include <sys/time.h>
+.Fd #include <event.h>
+.Fd #include <evdns.h>
+.Ft int
+.Fn evdns_init
+.Ft void
+.Fn evdns_shutdown "int fail_requests"
+.Ft "const char *"
+.Fn evdns_err_to_string "int err"
+.Ft int
+.Fn evdns_nameserver_add "unsigned long int address"
+.Ft int
+.Fn evdns_count_nameservers
+.Ft int
+.Fn evdns_clear_nameservers_and_suspend
+.Ft int
+.Fn evdns_resume
+.Ft int
+.Fn evdns_nameserver_ip_add(const char *ip_as_string);
+.Ft int
+.Fn evdns_resolve_ipv4 "const char *name" "int flags" "evdns_callback_type callback" "void *ptr"
+.Ft int
+.Fn evdns_resolve_reverse "struct in_addr *in" "int flags" "evdns_callback_type callback" "void *ptr"
+.Ft int
+.Fn evdns_resolv_conf_parse "int flags" "const char *"
+.Ft void
+.Fn evdns_search_clear
+.Ft void
+.Fn evdns_search_add "const char *domain"
+.Ft void
+.Fn evdns_search_ndots_set "const int ndots"
+.Ft void
+.Fn evdns_set_log_fn "evdns_debug_log_fn_type fn"
+.Ft int
+.Fn evdns_config_windows_nameservers
+.Sh DESCRIPTION
+Welcome, gentle reader
+.Pp
+Async DNS lookups are really a whole lot harder than they should be,
+mostly stemming from the fact that the libc resolver has never been
+very good at them. Before you use this library you should see if libc
+can do the job for you with the modern async call getaddrinfo_a
+(see http://www.imperialviolet.org/page25.html#e498). Otherwise,
+please continue.
+.Pp
+This code is based on libevent and you must call event_init before
+any of the APIs in this file. You must also seed the OpenSSL random
+source if you are using OpenSSL for ids (see below).
+.Pp
+This library is designed to be included and shipped with your source
+code. You statically link with it. You should also test for the
+existence of strtok_r and define HAVE_STRTOK_R if you have it.
+.Pp
+The DNS protocol requires a good source of id numbers and these
+numbers should be unpredictable for spoofing reasons. There are
+three methods for generating them here and you must define exactly
+one of them. In increasing order of preference:
+.Pp
+.Bl -tag -width "DNS_USE_GETTIMEOFDAY_FOR_ID" -compact -offset indent
+.It DNS_USE_GETTIMEOFDAY_FOR_ID
+Using the bottom 16 bits of the usec result from gettimeofday. This
+is a pretty poor solution but should work anywhere.
+.It DNS_USE_CPU_CLOCK_FOR_ID
+Using the bottom 16 bits of the nsec result from the CPU's time
+counter. This is better, but may not work everywhere. Requires
+POSIX realtime support and you'll need to link against -lrt on
+glibc systems at least.
+.It DNS_USE_OPENSSL_FOR_ID
+Uses the OpenSSL RAND_bytes call to generate the data. You must
+have seeded the pool before making any calls to this library.
+.El
+.Pp
+The library keeps track of the state of nameservers and will avoid
+them when they go down. Otherwise it will round robin between them.
+.Pp
+Quick start guide:
+ #include "evdns.h"
+ void callback(int result, char type, int count, int ttl,
+ void *addresses, void *arg);
+ evdns_resolv_conf_parse(DNS_OPTIONS_ALL, "/etc/resolv.conf");
+ evdns_resolve("www.hostname.com", 0, callback, NULL);
+.Pp
+When the lookup is complete the callback function is called. The
+first argument will be one of the DNS_ERR_* defines in evdns.h.
+Hopefully it will be DNS_ERR_NONE, in which case type will be
+DNS_IPv4_A, count will be the number of IP addresses, ttl is the time
+which the data can be cached for (in seconds), addresses will point
+to an array of uint32_t's and arg will be whatever you passed to
+evdns_resolve.
+.Pp
+Searching:
+.Pp
+In order for this library to be a good replacement for glibc's resolver it
+supports searching. This involves setting a list of default domains, in
+which names will be queried for. The number of dots in the query name
+determines the order in which this list is used.
+.Pp
+Searching appears to be a single lookup from the point of view of the API,
+although many DNS queries may be generated from a single call to
+evdns_resolve. Searching can also drastically slow down the resolution
+of names.
+.Pp
+To disable searching:
+.Bl -enum -compact -offset indent
+.It
+Never set it up. If you never call
+.Fn evdns_resolv_conf_parse,
+.Fn evdns_init,
+or
+.Fn evdns_search_add
+then no searching will occur.
+.It
+If you do call
+.Fn evdns_resolv_conf_parse
+then don't pass
+.Va DNS_OPTION_SEARCH
+(or
+.Va DNS_OPTIONS_ALL,
+which implies it).
+.It
+When calling
+.Fn evdns_resolve,
+pass the
+.Va DNS_QUERY_NO_SEARCH
+flag.
+.El
+.Pp
+The order of searches depends on the number of dots in the name. If the
+number is greater than the ndots setting then the names is first tried
+globally. Otherwise each search domain is appended in turn.
+.Pp
+The ndots setting can either be set from a resolv.conf, or by calling
+evdns_search_ndots_set.
+.Pp
+For example, with ndots set to 1 (the default) and a search domain list of
+["myhome.net"]:
+ Query: www
+ Order: www.myhome.net, www.
+.Pp
+ Query: www.abc
+ Order: www.abc., www.abc.myhome.net
+.Pp
+.Sh API reference
+.Pp
+.Bl -tag -width 0123456
+.It Ft int Fn evdns_init
+Initializes support for non-blocking name resolution by calling
+.Fn evdns_resolv_conf_parse
+on UNIX and
+.Fn evdns_config_windows_nameservers
+on Windows.
+.It Ft int Fn evdns_nameserver_add "unsigned long int address"
+Add a nameserver. The address should be an IP address in
+network byte order. The type of address is chosen so that
+it matches in_addr.s_addr.
+Returns non-zero on error.
+.It Ft int Fn evdns_nameserver_ip_add "const char *ip_as_string"
+This wraps the above function by parsing a string as an IP
+address and adds it as a nameserver.
+Returns non-zero on error
+.It Ft int Fn evdns_resolve "const char *name" "int flags" "evdns_callback_type callback" "void *ptr"
+Resolve a name. The name parameter should be a DNS name.
+The flags parameter should be 0, or DNS_QUERY_NO_SEARCH
+which disables searching for this query. (see defn of
+searching above).
+.Pp
+The callback argument is a function which is called when
+this query completes and ptr is an argument which is passed
+to that callback function.
+.Pp
+Returns non-zero on error
+.It Ft void Fn evdns_search_clear
+Clears the list of search domains
+.It Ft void Fn evdns_search_add "const char *domain"
+Add a domain to the list of search domains
+.It Ft void Fn evdns_search_ndots_set "int ndots"
+Set the number of dots which, when found in a name, causes
+the first query to be without any search domain.
+.It Ft int Fn evdns_count_nameservers "void"
+Return the number of configured nameservers (not necessarily the
+number of running nameservers). This is useful for double-checking
+whether our calls to the various nameserver configuration functions
+have been successful.
+.It Ft int Fn evdns_clear_nameservers_and_suspend "void"
+Remove all currently configured nameservers, and suspend all pending
+resolves. Resolves will not necessarily be re-attempted until
+evdns_resume() is called.
+.It Ft int Fn evdns_resume "void"
+Re-attempt resolves left in limbo after an earlier call to
+evdns_clear_nameservers_and_suspend().
+.It Ft int Fn evdns_config_windows_nameservers "void"
+Attempt to configure a set of nameservers based on platform settings on
+a win32 host. Preferentially tries to use GetNetworkParams; if that fails,
+looks in the registry. Returns 0 on success, nonzero on failure.
+.It Ft int Fn evdns_resolv_conf_parse "int flags" "const char *filename"
+Parse a resolv.conf like file from the given filename.
+.Pp
+See the man page for resolv.conf for the format of this file.
+The flags argument determines what information is parsed from
+this file:
+.Bl -tag -width "DNS_OPTION_NAMESERVERS" -offset indent -compact -nested
+.It DNS_OPTION_SEARCH
+domain, search and ndots options
+.It DNS_OPTION_NAMESERVERS
+nameserver lines
+.It DNS_OPTION_MISC
+timeout and attempts options
+.It DNS_OPTIONS_ALL
+all of the above
+.El
+.Pp
+The following directives are not parsed from the file:
+ sortlist, rotate, no-check-names, inet6, debug
+.Pp
+Returns non-zero on error:
+.Bl -tag -width "0" -offset indent -compact -nested
+.It 0
+no errors
+.It 1
+failed to open file
+.It 2
+failed to stat file
+.It 3
+file too large
+.It 4
+out of memory
+.It 5
+short read from file
+.El
+.El
+.Sh Internals:
+Requests are kept in two queues. The first is the inflight queue. In
+this queue requests have an allocated transaction id and nameserver.
+They will soon be transmitted if they haven't already been.
+.Pp
+The second is the waiting queue. The size of the inflight ring is
+limited and all other requests wait in waiting queue for space. This
+bounds the number of concurrent requests so that we don't flood the
+nameserver. Several algorithms require a full walk of the inflight
+queue and so bounding its size keeps thing going nicely under huge
+(many thousands of requests) loads.
+.Pp
+If a nameserver loses too many requests it is considered down and we
+try not to use it. After a while we send a probe to that nameserver
+(a lookup for google.com) and, if it replies, we consider it working
+again. If the nameserver fails a probe we wait longer to try again
+with the next probe.
+.Sh SEE ALSO
+.Xr event 3 ,
+.Xr gethostbyname 3 ,
+.Xr resolv.conf 5
+.Sh HISTORY
+The
+.Nm evdns
+API was developed by Adam Langley on top of the
+.Nm libevent
+API.
+The code was integrate into
+.Nm Tor
+by Nick Mathewson and finally put into
+.Nm libevent
+itself by Niels Provos.
+.Sh AUTHORS
+The
+.Nm evdns
+API and code was written by Adam Langley with significant
+contributions by Nick Mathewson.
+.Sh BUGS
+This documentation is neither complete nor authoritative.
+If you are in doubt about the usage of this API then
+check the source code to find out how it works, write
+up the missing piece of documentation and send it to
+me for inclusion in this man page.
diff --git a/libevent/evdns.c b/libevent/evdns.c
new file mode 100644
index 00000000000..e13357f1596
--- /dev/null
+++ b/libevent/evdns.c
@@ -0,0 +1,3200 @@
+/* $Id: evdns.c 6979 2006-08-04 18:31:13Z nickm $ */
+
+/* The original version of this module was written by Adam Langley; for
+ * a history of modifications, check out the subversion logs.
+ *
+ * When editing this module, try to keep it re-mergeable by Adam. Don't
+ * reformat the whitespace, add Tor dependencies, or so on.
+ *
+ * TODO:
+ * - Support IPv6 and PTR records.
+ * - Replace all externally visible magic numbers with #defined constants.
+ * - Write doccumentation for APIs of all external functions.
+ */
+
+/* Async DNS Library
+ * Adam Langley <agl@imperialviolet.org>
+ * http://www.imperialviolet.org/eventdns.html
+ * Public Domain code
+ *
+ * This software is Public Domain. To view a copy of the public domain dedication,
+ * visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
+ * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
+ *
+ * I ask and expect, but do not require, that all derivative works contain an
+ * attribution similar to:
+ * Parts developed by Adam Langley <agl@imperialviolet.org>
+ *
+ * You may wish to replace the word "Parts" with something else depending on
+ * the amount of original code.
+ *
+ * (Derivative works does not include programs which link against, run or include
+ * the source verbatim in their source distributions)
+ *
+ * Version: 0.1b
+ */
+
+#include <sys/types.h>
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef DNS_USE_FTIME_FOR_ID
+#include <sys/timeb.h>
+#endif
+
+#ifndef DNS_USE_CPU_CLOCK_FOR_ID
+#ifdef HAVE_GETTIMEOFDAY
+#define DNS_USE_GETTIMEOFDAY_FOR_ID 1
+#endif
+#endif
+
+#ifndef DNS_USE_CPU_CLOCK_FOR_ID
+#ifdef HAVE_GETTIMEOFDAY
+#define DNS_USE_GETTIMEOFDAY_FOR_ID 1
+#endif
+#endif
+
+#ifndef DNS_USE_CPU_CLOCK_FOR_ID
+#ifndef DNS_USE_GETTIMEOFDAY_FOR_ID
+#ifndef DNS_USE_OPENSSL_FOR_ID
+#ifndef DNS_USE_FTIME_FOR_ID
+#error Must configure at least one id generation method.
+#error Please see the documentation.
+#endif
+#endif
+#endif
+#endif
+
+/* #define _POSIX_C_SOURCE 200507 */
+#define _GNU_SOURCE
+
+#ifdef DNS_USE_CPU_CLOCK_FOR_ID
+#ifdef DNS_USE_OPENSSL_FOR_ID
+#error Multiple id options selected
+#endif
+#ifdef DNS_USE_GETTIMEOFDAY_FOR_ID
+#error Multiple id options selected
+#endif
+#include <time.h>
+#endif
+
+#ifdef DNS_USE_OPENSSL_FOR_ID
+#ifdef DNS_USE_GETTIMEOFDAY_FOR_ID
+#error Multiple id options selected
+#endif
+#include <openssl/rand.h>
+#endif
+
+#ifndef _FORTIFY_SOURCE
+#define _FORTIFY_SOURCE 3
+#endif
+
+#include <string.h>
+#include <fcntl.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <limits.h>
+#include <sys/stat.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "evdns.h"
+#include "evutil.h"
+#include "log.h"
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#include <iphlpapi.h>
+#include <io.h>
+#else
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#endif
+
+#ifdef HAVE_NETINET_IN6_H
+#include <netinet/in6.h>
+#endif
+
+#define EVDNS_LOG_DEBUG 0
+#define EVDNS_LOG_WARN 1
+
+#ifndef HOST_NAME_MAX
+#define HOST_NAME_MAX 255
+#endif
+
+#include <stdio.h>
+
+#undef MIN
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
+#ifdef __USE_ISOC99B
+/* libevent doesn't work without this */
+typedef ev_uint8_t u_char;
+typedef unsigned int uint;
+#endif
+#include <event.h>
+
+#define u64 ev_uint64_t
+#define u32 ev_uint32_t
+#define u16 ev_uint16_t
+#define u8 ev_uint8_t
+
+#ifdef WIN32
+#define open _open
+#define read _read
+#define close _close
+#define strdup _strdup
+#endif
+
+#define MAX_ADDRS 32 /* maximum number of addresses from a single packet */
+/* which we bother recording */
+
+#define TYPE_A EVDNS_TYPE_A
+#define TYPE_CNAME 5
+#define TYPE_PTR EVDNS_TYPE_PTR
+#define TYPE_AAAA EVDNS_TYPE_AAAA
+
+#define CLASS_INET EVDNS_CLASS_INET
+
+struct request {
+ u8 *request; /* the dns packet data */
+ unsigned int request_len;
+ int reissue_count;
+ int tx_count; /* the number of times that this packet has been sent */
+ unsigned int request_type; /* TYPE_PTR or TYPE_A */
+ void *user_pointer; /* the pointer given to us for this request */
+ evdns_callback_type user_callback;
+ struct nameserver *ns; /* the server which we last sent it */
+
+ /* elements used by the searching code */
+ int search_index;
+ struct search_state *search_state;
+ char *search_origname; /* needs to be free()ed */
+ int search_flags;
+
+ /* these objects are kept in a circular list */
+ struct request *next, *prev;
+
+ struct event timeout_event;
+
+ u16 trans_id; /* the transaction id */
+ char request_appended; /* true if the request pointer is data which follows this struct */
+ char transmit_me; /* needs to be transmitted */
+};
+
+#ifndef HAVE_STRUCT_IN6_ADDR
+struct in6_addr {
+ u8 s6_addr[16];
+};
+#endif
+
+struct reply {
+ unsigned int type;
+ unsigned int have_answer;
+ union {
+ struct {
+ u32 addrcount;
+ u32 addresses[MAX_ADDRS];
+ } a;
+ struct {
+ u32 addrcount;
+ struct in6_addr addresses[MAX_ADDRS];
+ } aaaa;
+ struct {
+ char name[HOST_NAME_MAX];
+ } ptr;
+ } data;
+};
+
+struct nameserver {
+ int socket; /* a connected UDP socket */
+ u32 address;
+ u16 port;
+ int failed_times; /* number of times which we have given this server a chance */
+ int timedout; /* number of times in a row a request has timed out */
+ struct event event;
+ /* these objects are kept in a circular list */
+ struct nameserver *next, *prev;
+ struct event timeout_event; /* used to keep the timeout for */
+ /* when we next probe this server. */
+ /* Valid if state == 0 */
+ char state; /* zero if we think that this server is down */
+ char choked; /* true if we have an EAGAIN from this server's socket */
+ char write_waiting; /* true if we are waiting for EV_WRITE events */
+};
+
+static struct request *req_head = NULL, *req_waiting_head = NULL;
+static struct nameserver *server_head = NULL;
+
+/* Represents a local port where we're listening for DNS requests. Right now, */
+/* only UDP is supported. */
+struct evdns_server_port {
+ int socket; /* socket we use to read queries and write replies. */
+ int refcnt; /* reference count. */
+ char choked; /* Are we currently blocked from writing? */
+ char closing; /* Are we trying to close this port, pending writes? */
+ evdns_request_callback_fn_type user_callback; /* Fn to handle requests */
+ void *user_data; /* Opaque pointer passed to user_callback */
+ struct event event; /* Read/write event */
+ /* circular list of replies that we want to write. */
+ struct server_request *pending_replies;
+};
+
+/* Represents part of a reply being built. (That is, a single RR.) */
+struct server_reply_item {
+ struct server_reply_item *next; /* next item in sequence. */
+ char *name; /* name part of the RR */
+ u16 type : 16; /* The RR type */
+ u16 class : 16; /* The RR class (usually CLASS_INET) */
+ u32 ttl; /* The RR TTL */
+ char is_name; /* True iff data is a label */
+ u16 datalen; /* Length of data; -1 if data is a label */
+ void *data; /* The contents of the RR */
+};
+
+/* Represents a request that we've received as a DNS server, and holds */
+/* the components of the reply as we're constructing it. */
+struct server_request {
+ /* Pointers to the next and previous entries on the list of replies */
+ /* that we're waiting to write. Only set if we have tried to respond */
+ /* and gotten EAGAIN. */
+ struct server_request *next_pending;
+ struct server_request *prev_pending;
+
+ u16 trans_id; /* Transaction id. */
+ struct evdns_server_port *port; /* Which port received this request on? */
+ struct sockaddr_storage addr; /* Where to send the response */
+ socklen_t addrlen; /* length of addr */
+
+ int n_answer; /* how many answer RRs have been set? */
+ int n_authority; /* how many authority RRs have been set? */
+ int n_additional; /* how many additional RRs have been set? */
+
+ struct server_reply_item *answer; /* linked list of answer RRs */
+ struct server_reply_item *authority; /* linked list of authority RRs */
+ struct server_reply_item *additional; /* linked list of additional RRs */
+
+ /* Constructed response. Only set once we're ready to send a reply. */
+ /* Once this is set, the RR fields are cleared, and no more should be set. */
+ char *response;
+ size_t response_len;
+
+ /* Caller-visible fields: flags, questions. */
+ struct evdns_server_request base;
+};
+
+/* helper macro */
+#define OFFSET_OF(st, member) ((off_t) (((char*)&((st*)0)->member)-(char*)0))
+
+/* Given a pointer to an evdns_server_request, get the corresponding */
+/* server_request. */
+#define TO_SERVER_REQUEST(base_ptr) \
+ ((struct server_request*) \
+ (((char*)(base_ptr) - OFFSET_OF(struct server_request, base))))
+
+/* The number of good nameservers that we have */
+static int global_good_nameservers = 0;
+
+/* inflight requests are contained in the req_head list */
+/* and are actually going out across the network */
+static int global_requests_inflight = 0;
+/* requests which aren't inflight are in the waiting list */
+/* and are counted here */
+static int global_requests_waiting = 0;
+
+static int global_max_requests_inflight = 64;
+
+static struct timeval global_timeout = {5, 0}; /* 5 seconds */
+static int global_max_reissues = 1; /* a reissue occurs when we get some errors from the server */
+static int global_max_retransmits = 3; /* number of times we'll retransmit a request which timed out */
+/* number of timeouts in a row before we consider this server to be down */
+static int global_max_nameserver_timeout = 3;
+
+/* These are the timeout values for nameservers. If we find a nameserver is down */
+/* we try to probe it at intervals as given below. Values are in seconds. */
+static const struct timeval global_nameserver_timeouts[] = {{10, 0}, {60, 0}, {300, 0}, {900, 0}, {3600, 0}};
+static const int global_nameserver_timeouts_length = sizeof(global_nameserver_timeouts)/sizeof(struct timeval);
+
+static struct nameserver *nameserver_pick(void);
+static void evdns_request_insert(struct request *req, struct request **head);
+static void nameserver_ready_callback(int fd, short events, void *arg);
+static int evdns_transmit(void);
+static int evdns_request_transmit(struct request *req);
+static void nameserver_send_probe(struct nameserver *const ns);
+static void search_request_finished(struct request *const);
+static int search_try_next(struct request *const req);
+static int search_request_new(int type, const char *const name, int flags, evdns_callback_type user_callback, void *user_arg);
+static void evdns_requests_pump_waiting_queue(void);
+static u16 transaction_id_pick(void);
+static struct request *request_new(int type, const char *name, int flags, evdns_callback_type callback, void *ptr);
+static void request_submit(struct request *const req);
+
+static int server_request_free(struct server_request *req);
+static void server_request_free_answers(struct server_request *req);
+static void server_port_free(struct evdns_server_port *port);
+static void server_port_ready_callback(int fd, short events, void *arg);
+
+static int strtoint(const char *const str);
+
+#ifdef WIN32
+static int
+last_error(int sock)
+{
+ int optval, optvallen=sizeof(optval);
+ int err = WSAGetLastError();
+ if (err == WSAEWOULDBLOCK && sock >= 0) {
+ if (getsockopt(sock, SOL_SOCKET, SO_ERROR, (void*)&optval,
+ &optvallen))
+ return err;
+ if (optval)
+ return optval;
+ }
+ return err;
+
+}
+static int
+error_is_eagain(int err)
+{
+ return err == EAGAIN || err == WSAEWOULDBLOCK;
+}
+static int
+inet_aton(const char *c, struct in_addr *addr)
+{
+ ev_uint32_t r;
+ if (strcmp(c, "255.255.255.255") == 0) {
+ addr->s_addr = 0xffffffffu;
+ } else {
+ r = inet_addr(c);
+ if (r == INADDR_NONE)
+ return 0;
+ addr->s_addr = r;
+ }
+ return 1;
+}
+#else
+#define last_error(sock) (errno)
+#define error_is_eagain(err) ((err) == EAGAIN)
+#endif
+#define CLOSE_SOCKET(s) EVUTIL_CLOSESOCKET(s)
+
+#define ISSPACE(c) isspace((int)(unsigned char)(c))
+#define ISDIGIT(c) isdigit((int)(unsigned char)(c))
+
+static const char *
+debug_ntoa(u32 address)
+{
+ static char buf[32];
+ u32 a = ntohl(address);
+ evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d",
+ (int)(u8)((a>>24)&0xff),
+ (int)(u8)((a>>16)&0xff),
+ (int)(u8)((a>>8 )&0xff),
+ (int)(u8)((a )&0xff));
+ return buf;
+}
+
+static evdns_debug_log_fn_type evdns_log_fn = NULL;
+
+void
+evdns_set_log_fn(evdns_debug_log_fn_type fn)
+{
+ evdns_log_fn = fn;
+}
+
+#ifdef __GNUC__
+#define EVDNS_LOG_CHECK __attribute__ ((format(printf, 2, 3)))
+#else
+#define EVDNS_LOG_CHECK
+#endif
+
+static void _evdns_log(int warn, const char *fmt, ...) EVDNS_LOG_CHECK;
+static void
+_evdns_log(int warn, const char *fmt, ...)
+{
+ va_list args;
+ static char buf[512];
+ if (!evdns_log_fn)
+ return;
+ va_start(args,fmt);
+ evutil_vsnprintf(buf, sizeof(buf), fmt, args);
+ buf[sizeof(buf)-1] = '\0';
+ evdns_log_fn(warn, buf);
+ va_end(args);
+}
+
+#define log _evdns_log
+
+/* This walks the list of inflight requests to find the */
+/* one with a matching transaction id. Returns NULL on */
+/* failure */
+static struct request *
+request_find_from_trans_id(u16 trans_id) {
+ struct request *req = req_head, *const started_at = req_head;
+
+ if (req) {
+ do {
+ if (req->trans_id == trans_id) return req;
+ req = req->next;
+ } while (req != started_at);
+ }
+
+ return NULL;
+}
+
+/* a libevent callback function which is called when a nameserver */
+/* has gone down and we want to test if it has came back to life yet */
+static void
+nameserver_prod_callback(int fd, short events, void *arg) {
+ struct nameserver *const ns = (struct nameserver *) arg;
+ (void)fd;
+ (void)events;
+
+ nameserver_send_probe(ns);
+}
+
+/* a libevent callback which is called when a nameserver probe (to see if */
+/* it has come back to life) times out. We increment the count of failed_times */
+/* and wait longer to send the next probe packet. */
+static void
+nameserver_probe_failed(struct nameserver *const ns) {
+ const struct timeval * timeout;
+ (void) evtimer_del(&ns->timeout_event);
+ if (ns->state == 1) {
+ /* This can happen if the nameserver acts in a way which makes us mark */
+ /* it as bad and then starts sending good replies. */
+ return;
+ }
+
+ timeout =
+ &global_nameserver_timeouts[MIN(ns->failed_times,
+ global_nameserver_timeouts_length - 1)];
+ ns->failed_times++;
+
+ if (evtimer_add(&ns->timeout_event, (struct timeval *) timeout) < 0) {
+ log(EVDNS_LOG_WARN,
+ "Error from libevent when adding timer event for %s",
+ debug_ntoa(ns->address));
+ /* ???? Do more? */
+ }
+}
+
+/* called when a nameserver has been deemed to have failed. For example, too */
+/* many packets have timed out etc */
+static void
+nameserver_failed(struct nameserver *const ns, const char *msg) {
+ struct request *req, *started_at;
+ /* if this nameserver has already been marked as failed */
+ /* then don't do anything */
+ if (!ns->state) return;
+
+ log(EVDNS_LOG_WARN, "Nameserver %s has failed: %s",
+ debug_ntoa(ns->address), msg);
+ global_good_nameservers--;
+ assert(global_good_nameservers >= 0);
+ if (global_good_nameservers == 0) {
+ log(EVDNS_LOG_WARN, "All nameservers have failed");
+ }
+
+ ns->state = 0;
+ ns->failed_times = 1;
+
+ if (evtimer_add(&ns->timeout_event, (struct timeval *) &global_nameserver_timeouts[0]) < 0) {
+ log(EVDNS_LOG_WARN,
+ "Error from libevent when adding timer event for %s",
+ debug_ntoa(ns->address));
+ /* ???? Do more? */
+ }
+
+ /* walk the list of inflight requests to see if any can be reassigned to */
+ /* a different server. Requests in the waiting queue don't have a */
+ /* nameserver assigned yet */
+
+ /* if we don't have *any* good nameservers then there's no point */
+ /* trying to reassign requests to one */
+ if (!global_good_nameservers) return;
+
+ req = req_head;
+ started_at = req_head;
+ if (req) {
+ do {
+ if (req->tx_count == 0 && req->ns == ns) {
+ /* still waiting to go out, can be moved */
+ /* to another server */
+ req->ns = nameserver_pick();
+ }
+ req = req->next;
+ } while (req != started_at);
+ }
+}
+
+static void
+nameserver_up(struct nameserver *const ns) {
+ if (ns->state) return;
+ log(EVDNS_LOG_WARN, "Nameserver %s is back up",
+ debug_ntoa(ns->address));
+ evtimer_del(&ns->timeout_event);
+ ns->state = 1;
+ ns->failed_times = 0;
+ ns->timedout = 0;
+ global_good_nameservers++;
+}
+
+static void
+request_trans_id_set(struct request *const req, const u16 trans_id) {
+ req->trans_id = trans_id;
+ *((u16 *) req->request) = htons(trans_id);
+}
+
+/* Called to remove a request from a list and dealloc it. */
+/* head is a pointer to the head of the list it should be */
+/* removed from or NULL if the request isn't in a list. */
+static void
+request_finished(struct request *const req, struct request **head) {
+ if (head) {
+ if (req->next == req) {
+ /* only item in the list */
+ *head = NULL;
+ } else {
+ req->next->prev = req->prev;
+ req->prev->next = req->next;
+ if (*head == req) *head = req->next;
+ }
+ }
+
+ log(EVDNS_LOG_DEBUG, "Removing timeout for request %lx",
+ (unsigned long) req);
+ evtimer_del(&req->timeout_event);
+
+ search_request_finished(req);
+ global_requests_inflight--;
+
+ if (!req->request_appended) {
+ /* need to free the request data on it's own */
+ free(req->request);
+ } else {
+ /* the request data is appended onto the header */
+ /* so everything gets free()ed when we: */
+ }
+
+ free(req);
+
+ evdns_requests_pump_waiting_queue();
+}
+
+/* This is called when a server returns a funny error code. */
+/* We try the request again with another server. */
+/* */
+/* return: */
+/* 0 ok */
+/* 1 failed/reissue is pointless */
+static int
+request_reissue(struct request *req) {
+ const struct nameserver *const last_ns = req->ns;
+ /* the last nameserver should have been marked as failing */
+ /* by the caller of this function, therefore pick will try */
+ /* not to return it */
+ req->ns = nameserver_pick();
+ if (req->ns == last_ns) {
+ /* ... but pick did return it */
+ /* not a lot of point in trying again with the */
+ /* same server */
+ return 1;
+ }
+
+ req->reissue_count++;
+ req->tx_count = 0;
+ req->transmit_me = 1;
+
+ return 0;
+}
+
+/* this function looks for space on the inflight queue and promotes */
+/* requests from the waiting queue if it can. */
+static void
+evdns_requests_pump_waiting_queue(void) {
+ while (global_requests_inflight < global_max_requests_inflight &&
+ global_requests_waiting) {
+ struct request *req;
+ /* move a request from the waiting queue to the inflight queue */
+ assert(req_waiting_head);
+ if (req_waiting_head->next == req_waiting_head) {
+ /* only one item in the queue */
+ req = req_waiting_head;
+ req_waiting_head = NULL;
+ } else {
+ req = req_waiting_head;
+ req->next->prev = req->prev;
+ req->prev->next = req->next;
+ req_waiting_head = req->next;
+ }
+
+ global_requests_waiting--;
+ global_requests_inflight++;
+
+ req->ns = nameserver_pick();
+ request_trans_id_set(req, transaction_id_pick());
+
+ evdns_request_insert(req, &req_head);
+ evdns_request_transmit(req);
+ evdns_transmit();
+ }
+}
+
+static void
+reply_callback(struct request *const req, u32 ttl, u32 err, struct reply *reply) {
+ switch (req->request_type) {
+ case TYPE_A:
+ if (reply)
+ req->user_callback(DNS_ERR_NONE, DNS_IPv4_A,
+ reply->data.a.addrcount, ttl,
+ reply->data.a.addresses,
+ req->user_pointer);
+ else
+ req->user_callback(err, 0, 0, 0, NULL, req->user_pointer);
+ return;
+ case TYPE_PTR:
+ if (reply) {
+ char *name = reply->data.ptr.name;
+ req->user_callback(DNS_ERR_NONE, DNS_PTR, 1, ttl,
+ &name, req->user_pointer);
+ } else {
+ req->user_callback(err, 0, 0, 0, NULL,
+ req->user_pointer);
+ }
+ return;
+ case TYPE_AAAA:
+ if (reply)
+ req->user_callback(DNS_ERR_NONE, DNS_IPv6_AAAA,
+ reply->data.aaaa.addrcount, ttl,
+ reply->data.aaaa.addresses,
+ req->user_pointer);
+ else
+ req->user_callback(err, 0, 0, 0, NULL, req->user_pointer);
+ return;
+ }
+ assert(0);
+}
+
+/* this processes a parsed reply packet */
+static void
+reply_handle(struct request *const req, u16 flags, u32 ttl, struct reply *reply) {
+ int error;
+ static const int error_codes[] = {
+ DNS_ERR_FORMAT, DNS_ERR_SERVERFAILED, DNS_ERR_NOTEXIST,
+ DNS_ERR_NOTIMPL, DNS_ERR_REFUSED
+ };
+
+ if (flags & 0x020f || !reply || !reply->have_answer) {
+ /* there was an error */
+ if (flags & 0x0200) {
+ error = DNS_ERR_TRUNCATED;
+ } else {
+ u16 error_code = (flags & 0x000f) - 1;
+ if (error_code > 4) {
+ error = DNS_ERR_UNKNOWN;
+ } else {
+ error = error_codes[error_code];
+ }
+ }
+
+ switch(error) {
+ case DNS_ERR_NOTIMPL:
+ case DNS_ERR_REFUSED:
+ /* we regard these errors as marking a bad nameserver */
+ if (req->reissue_count < global_max_reissues) {
+ char msg[64];
+ evutil_snprintf(msg, sizeof(msg),
+ "Bad response %d (%s)",
+ error, evdns_err_to_string(error));
+ nameserver_failed(req->ns, msg);
+ if (!request_reissue(req)) return;
+ }
+ break;
+ case DNS_ERR_SERVERFAILED:
+ /* rcode 2 (servfailed) sometimes means "we
+ * are broken" and sometimes (with some binds)
+ * means "that request was very confusing."
+ * Treat this as a timeout, not a failure.
+ */
+ log(EVDNS_LOG_DEBUG, "Got a SERVERFAILED from nameserver %s; "
+ "will allow the request to time out.",
+ debug_ntoa(req->ns->address));
+ break;
+ default:
+ /* we got a good reply from the nameserver */
+ nameserver_up(req->ns);
+ }
+
+ if (req->search_state && req->request_type != TYPE_PTR) {
+ /* if we have a list of domains to search in,
+ * try the next one */
+ if (!search_try_next(req)) {
+ /* a new request was issued so this
+ * request is finished and */
+ /* the user callback will be made when
+ * that request (or a */
+ /* child of it) finishes. */
+ request_finished(req, &req_head);
+ return;
+ }
+ }
+
+ /* all else failed. Pass the failure up */
+ reply_callback(req, 0, error, NULL);
+ request_finished(req, &req_head);
+ } else {
+ /* all ok, tell the user */
+ reply_callback(req, ttl, 0, reply);
+ nameserver_up(req->ns);
+ request_finished(req, &req_head);
+ }
+}
+
+static int
+name_parse(u8 *packet, int length, int *idx, char *name_out, int name_out_len) {
+ int name_end = -1;
+ int j = *idx;
+ int ptr_count = 0;
+#define GET32(x) do { if (j + 4 > length) goto err; memcpy(&_t32, packet + j, 4); j += 4; x = ntohl(_t32); } while(0)
+#define GET16(x) do { if (j + 2 > length) goto err; memcpy(&_t, packet + j, 2); j += 2; x = ntohs(_t); } while(0)
+#define GET8(x) do { if (j >= length) goto err; x = packet[j++]; } while(0)
+
+ char *cp = name_out;
+ const char *const end = name_out + name_out_len;
+
+ /* Normally, names are a series of length prefixed strings terminated */
+ /* with a length of 0 (the lengths are u8's < 63). */
+ /* However, the length can start with a pair of 1 bits and that */
+ /* means that the next 14 bits are a pointer within the current */
+ /* packet. */
+
+ for(;;) {
+ u8 label_len;
+ if (j >= length) return -1;
+ GET8(label_len);
+ if (!label_len) break;
+ if (label_len & 0xc0) {
+ u8 ptr_low;
+ GET8(ptr_low);
+ if (name_end < 0) name_end = j;
+ j = (((int)label_len & 0x3f) << 8) + ptr_low;
+ /* Make sure that the target offset is in-bounds. */
+ if (j < 0 || j >= length) return -1;
+ /* If we've jumped more times than there are characters in the
+ * message, we must have a loop. */
+ if (++ptr_count > length) return -1;
+ continue;
+ }
+ if (label_len > 63) return -1;
+ if (cp != name_out) {
+ if (cp + 1 >= end) return -1;
+ *cp++ = '.';
+ }
+ if (cp + label_len >= end) return -1;
+ memcpy(cp, packet + j, label_len);
+ cp += label_len;
+ j += label_len;
+ }
+ if (cp >= end) return -1;
+ *cp = '\0';
+ if (name_end < 0)
+ *idx = j;
+ else
+ *idx = name_end;
+ return 0;
+ err:
+ return -1;
+}
+
+/* parses a raw request from a nameserver */
+static int
+reply_parse(u8 *packet, int length) {
+ int j = 0, k = 0; /* index into packet */
+ u16 _t; /* used by the macros */
+ u32 _t32; /* used by the macros */
+ char tmp_name[256], cmp_name[256]; /* used by the macros */
+
+ u16 trans_id, questions, answers, authority, additional, datalength;
+ u16 flags = 0;
+ u32 ttl, ttl_r = 0xffffffff;
+ struct reply reply;
+ struct request *req = NULL;
+ unsigned int i;
+
+ GET16(trans_id);
+ GET16(flags);
+ GET16(questions);
+ GET16(answers);
+ GET16(authority);
+ GET16(additional);
+ (void) authority; /* suppress "unused variable" warnings. */
+ (void) additional; /* suppress "unused variable" warnings. */
+
+ req = request_find_from_trans_id(trans_id);
+ if (!req) return -1;
+
+ memset(&reply, 0, sizeof(reply));
+
+ /* If it's not an answer, it doesn't correspond to any request. */
+ if (!(flags & 0x8000)) return -1; /* must be an answer */
+ if (flags & 0x020f) {
+ /* there was an error */
+ goto err;
+ }
+ /* if (!answers) return; */ /* must have an answer of some form */
+
+ /* This macro skips a name in the DNS reply. */
+#define SKIP_NAME \
+ do { tmp_name[0] = '\0'; \
+ if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)\
+ goto err; \
+ } while(0)
+#define TEST_NAME \
+ do { tmp_name[0] = '\0'; \
+ cmp_name[0] = '\0'; \
+ k = j; \
+ if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)\
+ goto err; \
+ if (name_parse(req->request, req->request_len, &k, cmp_name, sizeof(cmp_name))<0) \
+ goto err; \
+ if (memcmp(tmp_name, cmp_name, strlen (tmp_name)) != 0) \
+ return (-1); /* we ignore mismatching names */ \
+ } while(0)
+
+ reply.type = req->request_type;
+
+ /* skip over each question in the reply */
+ for (i = 0; i < questions; ++i) {
+ /* the question looks like
+ * <label:name><u16:type><u16:class>
+ */
+ TEST_NAME;
+ j += 4;
+ if (j > length) goto err;
+ }
+
+ /* now we have the answer section which looks like
+ * <label:name><u16:type><u16:class><u32:ttl><u16:len><data...>
+ */
+
+ for (i = 0; i < answers; ++i) {
+ u16 type, class;
+
+ SKIP_NAME;
+ GET16(type);
+ GET16(class);
+ GET32(ttl);
+ GET16(datalength);
+
+ if (type == TYPE_A && class == CLASS_INET) {
+ int addrcount, addrtocopy;
+ if (req->request_type != TYPE_A) {
+ j += datalength; continue;
+ }
+ if ((datalength & 3) != 0) /* not an even number of As. */
+ goto err;
+ addrcount = datalength >> 2;
+ addrtocopy = MIN(MAX_ADDRS - reply.data.a.addrcount, (unsigned)addrcount);
+
+ ttl_r = MIN(ttl_r, ttl);
+ /* we only bother with the first four addresses. */
+ if (j + 4*addrtocopy > length) goto err;
+ memcpy(&reply.data.a.addresses[reply.data.a.addrcount],
+ packet + j, 4*addrtocopy);
+ j += 4*addrtocopy;
+ reply.data.a.addrcount += addrtocopy;
+ reply.have_answer = 1;
+ if (reply.data.a.addrcount == MAX_ADDRS) break;
+ } else if (type == TYPE_PTR && class == CLASS_INET) {
+ if (req->request_type != TYPE_PTR) {
+ j += datalength; continue;
+ }
+ if (name_parse(packet, length, &j, reply.data.ptr.name,
+ sizeof(reply.data.ptr.name))<0)
+ goto err;
+ ttl_r = MIN(ttl_r, ttl);
+ reply.have_answer = 1;
+ break;
+ } else if (type == TYPE_AAAA && class == CLASS_INET) {
+ int addrcount, addrtocopy;
+ if (req->request_type != TYPE_AAAA) {
+ j += datalength; continue;
+ }
+ if ((datalength & 15) != 0) /* not an even number of AAAAs. */
+ goto err;
+ addrcount = datalength >> 4; /* each address is 16 bytes long */
+ addrtocopy = MIN(MAX_ADDRS - reply.data.aaaa.addrcount, (unsigned)addrcount);
+ ttl_r = MIN(ttl_r, ttl);
+
+ /* we only bother with the first four addresses. */
+ if (j + 16*addrtocopy > length) goto err;
+ memcpy(&reply.data.aaaa.addresses[reply.data.aaaa.addrcount],
+ packet + j, 16*addrtocopy);
+ reply.data.aaaa.addrcount += addrtocopy;
+ j += 16*addrtocopy;
+ reply.have_answer = 1;
+ if (reply.data.aaaa.addrcount == MAX_ADDRS) break;
+ } else {
+ /* skip over any other type of resource */
+ j += datalength;
+ }
+ }
+
+ reply_handle(req, flags, ttl_r, &reply);
+ return 0;
+ err:
+ if (req)
+ reply_handle(req, flags, 0, NULL);
+ return -1;
+}
+
+/* Parse a raw request (packet,length) sent to a nameserver port (port) from */
+/* a DNS client (addr,addrlen), and if it's well-formed, call the corresponding */
+/* callback. */
+static int
+request_parse(u8 *packet, int length, struct evdns_server_port *port, struct sockaddr *addr, socklen_t addrlen)
+{
+ int j = 0; /* index into packet */
+ u16 _t; /* used by the macros */
+ char tmp_name[256]; /* used by the macros */
+
+ int i;
+ u16 trans_id, flags, questions, answers, authority, additional;
+ struct server_request *server_req = NULL;
+
+ /* Get the header fields */
+ GET16(trans_id);
+ GET16(flags);
+ GET16(questions);
+ GET16(answers);
+ GET16(authority);
+ GET16(additional);
+
+ if (flags & 0x8000) return -1; /* Must not be an answer. */
+ flags &= 0x0110; /* Only RD and CD get preserved. */
+
+ server_req = malloc(sizeof(struct server_request));
+ if (server_req == NULL) return -1;
+ memset(server_req, 0, sizeof(struct server_request));
+
+ server_req->trans_id = trans_id;
+ memcpy(&server_req->addr, addr, addrlen);
+ server_req->addrlen = addrlen;
+
+ server_req->base.flags = flags;
+ server_req->base.nquestions = 0;
+ server_req->base.questions = malloc(sizeof(struct evdns_server_question *) * questions);
+ if (server_req->base.questions == NULL)
+ goto err;
+
+ for (i = 0; i < questions; ++i) {
+ u16 type, class;
+ struct evdns_server_question *q;
+ int namelen;
+ if (name_parse(packet, length, &j, tmp_name, sizeof(tmp_name))<0)
+ goto err;
+ GET16(type);
+ GET16(class);
+ namelen = strlen(tmp_name);
+ q = malloc(sizeof(struct evdns_server_question) + namelen);
+ if (!q)
+ goto err;
+ q->type = type;
+ q->dns_question_class = class;
+ memcpy(q->name, tmp_name, namelen+1);
+ server_req->base.questions[server_req->base.nquestions++] = q;
+ }
+
+ /* Ignore answers, authority, and additional. */
+
+ server_req->port = port;
+ port->refcnt++;
+
+ /* Only standard queries are supported. */
+ if (flags & 0x7800) {
+ evdns_server_request_respond(&(server_req->base), DNS_ERR_NOTIMPL);
+ return -1;
+ }
+
+ port->user_callback(&(server_req->base), port->user_data);
+
+ return 0;
+err:
+ if (server_req) {
+ if (server_req->base.questions) {
+ for (i = 0; i < server_req->base.nquestions; ++i)
+ free(server_req->base.questions[i]);
+ free(server_req->base.questions);
+ }
+ free(server_req);
+ }
+ return -1;
+
+#undef SKIP_NAME
+#undef GET32
+#undef GET16
+#undef GET8
+}
+
+static u16
+default_transaction_id_fn(void)
+{
+ u16 trans_id;
+#ifdef DNS_USE_CPU_CLOCK_FOR_ID
+ struct timespec ts;
+ static int clkid = -1;
+ if (clkid == -1) {
+ clkid = CLOCK_REALTIME;
+#ifdef CLOCK_MONOTONIC
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) != -1)
+ clkid = CLOCK_MONOTONIC;
+#endif
+ }
+ if (clock_gettime(clkid, &ts) == -1)
+ event_err(1, "clock_gettime");
+ trans_id = ts.tv_nsec & 0xffff;
+#endif
+
+#ifdef DNS_USE_FTIME_FOR_ID
+ struct _timeb tb;
+ _ftime(&tb);
+ trans_id = tb.millitm & 0xffff;
+#endif
+
+#ifdef DNS_USE_GETTIMEOFDAY_FOR_ID
+ struct timeval tv;
+ evutil_gettimeofday(&tv, NULL);
+ trans_id = tv.tv_usec & 0xffff;
+#endif
+
+#ifdef DNS_USE_OPENSSL_FOR_ID
+ if (RAND_pseudo_bytes((u8 *) &trans_id, 2) == -1) {
+ /* in the case that the RAND call fails we back */
+ /* down to using gettimeofday. */
+ /*
+ struct timeval tv;
+ evutil_gettimeofday(&tv, NULL);
+ trans_id = tv.tv_usec & 0xffff;
+ */
+ abort();
+ }
+#endif
+ return trans_id;
+}
+
+static ev_uint16_t (*trans_id_function)(void) = default_transaction_id_fn;
+
+void
+evdns_set_transaction_id_fn(ev_uint16_t (*fn)(void))
+{
+ if (fn)
+ trans_id_function = fn;
+ else
+ trans_id_function = default_transaction_id_fn;
+}
+
+/* Try to choose a strong transaction id which isn't already in flight */
+static u16
+transaction_id_pick(void) {
+ for (;;) {
+ const struct request *req = req_head, *started_at;
+ u16 trans_id = trans_id_function();
+
+ if (trans_id == 0xffff) continue;
+ /* now check to see if that id is already inflight */
+ req = started_at = req_head;
+ if (req) {
+ do {
+ if (req->trans_id == trans_id) break;
+ req = req->next;
+ } while (req != started_at);
+ }
+ /* we didn't find it, so this is a good id */
+ if (req == started_at) return trans_id;
+ }
+}
+
+/* choose a namesever to use. This function will try to ignore */
+/* nameservers which we think are down and load balance across the rest */
+/* by updating the server_head global each time. */
+static struct nameserver *
+nameserver_pick(void) {
+ struct nameserver *started_at = server_head, *picked;
+ if (!server_head) return NULL;
+
+ /* if we don't have any good nameservers then there's no */
+ /* point in trying to find one. */
+ if (!global_good_nameservers) {
+ server_head = server_head->next;
+ return server_head;
+ }
+
+ /* remember that nameservers are in a circular list */
+ for (;;) {
+ if (server_head->state) {
+ /* we think this server is currently good */
+ picked = server_head;
+ server_head = server_head->next;
+ return picked;
+ }
+
+ server_head = server_head->next;
+ if (server_head == started_at) {
+ /* all the nameservers seem to be down */
+ /* so we just return this one and hope for the */
+ /* best */
+ assert(global_good_nameservers == 0);
+ picked = server_head;
+ server_head = server_head->next;
+ return picked;
+ }
+ }
+}
+
+static int
+address_is_correct(struct nameserver *ns, struct sockaddr *sa, socklen_t slen)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in*) sa;
+ if (sa->sa_family != AF_INET || slen != sizeof(struct sockaddr_in))
+ return 0;
+ if (sin->sin_addr.s_addr != ns->address)
+ return 0;
+ return 1;
+}
+
+/* this is called when a namesever socket is ready for reading */
+static void
+nameserver_read(struct nameserver *ns) {
+ u8 packet[1500];
+ struct sockaddr_storage ss;
+ socklen_t addrlen = sizeof(ss);
+
+ for (;;) {
+ const int r = recvfrom(ns->socket, packet, sizeof(packet), 0,
+ (struct sockaddr*)&ss, &addrlen);
+ if (r < 0) {
+ int err = last_error(ns->socket);
+ if (error_is_eagain(err)) return;
+ nameserver_failed(ns, strerror(err));
+ return;
+ }
+ if (!address_is_correct(ns, (struct sockaddr*)&ss, addrlen)) {
+ log(EVDNS_LOG_WARN, "Address mismatch on received "
+ "DNS packet.");
+ return;
+ }
+ ns->timedout = 0;
+ reply_parse(packet, r);
+ }
+}
+
+/* Read a packet from a DNS client on a server port s, parse it, and */
+/* act accordingly. */
+static void
+server_port_read(struct evdns_server_port *s) {
+ u8 packet[1500];
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ int r;
+
+ for (;;) {
+ addrlen = sizeof(struct sockaddr_storage);
+ r = recvfrom(s->socket, packet, sizeof(packet), 0,
+ (struct sockaddr*) &addr, &addrlen);
+ if (r < 0) {
+ int err = last_error(s->socket);
+ if (error_is_eagain(err)) return;
+ log(EVDNS_LOG_WARN, "Error %s (%d) while reading request.",
+ strerror(err), err);
+ return;
+ }
+ request_parse(packet, r, s, (struct sockaddr*) &addr, addrlen);
+ }
+}
+
+/* Try to write all pending replies on a given DNS server port. */
+static void
+server_port_flush(struct evdns_server_port *port)
+{
+ while (port->pending_replies) {
+ struct server_request *req = port->pending_replies;
+ int r = sendto(port->socket, req->response, req->response_len, 0,
+ (struct sockaddr*) &req->addr, req->addrlen);
+ if (r < 0) {
+ int err = last_error(port->socket);
+ if (error_is_eagain(err))
+ return;
+ log(EVDNS_LOG_WARN, "Error %s (%d) while writing response to port; dropping", strerror(err), err);
+ }
+ if (server_request_free(req)) {
+ /* we released the last reference to req->port. */
+ return;
+ }
+ }
+
+ /* We have no more pending requests; stop listening for 'writeable' events. */
+ (void) event_del(&port->event);
+ event_set(&port->event, port->socket, EV_READ | EV_PERSIST,
+ server_port_ready_callback, port);
+ if (event_add(&port->event, NULL) < 0) {
+ log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server.");
+ /* ???? Do more? */
+ }
+}
+
+/* set if we are waiting for the ability to write to this server. */
+/* if waiting is true then we ask libevent for EV_WRITE events, otherwise */
+/* we stop these events. */
+static void
+nameserver_write_waiting(struct nameserver *ns, char waiting) {
+ if (ns->write_waiting == waiting) return;
+
+ ns->write_waiting = waiting;
+ (void) event_del(&ns->event);
+ event_set(&ns->event, ns->socket, EV_READ | (waiting ? EV_WRITE : 0) | EV_PERSIST,
+ nameserver_ready_callback, ns);
+ if (event_add(&ns->event, NULL) < 0) {
+ log(EVDNS_LOG_WARN, "Error from libevent when adding event for %s",
+ debug_ntoa(ns->address));
+ /* ???? Do more? */
+ }
+}
+
+/* a callback function. Called by libevent when the kernel says that */
+/* a nameserver socket is ready for writing or reading */
+static void
+nameserver_ready_callback(int fd, short events, void *arg) {
+ struct nameserver *ns = (struct nameserver *) arg;
+ (void)fd;
+
+ if (events & EV_WRITE) {
+ ns->choked = 0;
+ if (!evdns_transmit()) {
+ nameserver_write_waiting(ns, 0);
+ }
+ }
+ if (events & EV_READ) {
+ nameserver_read(ns);
+ }
+}
+
+/* a callback function. Called by libevent when the kernel says that */
+/* a server socket is ready for writing or reading. */
+static void
+server_port_ready_callback(int fd, short events, void *arg) {
+ struct evdns_server_port *port = (struct evdns_server_port *) arg;
+ (void) fd;
+
+ if (events & EV_WRITE) {
+ port->choked = 0;
+ server_port_flush(port);
+ }
+ if (events & EV_READ) {
+ server_port_read(port);
+ }
+}
+
+/* This is an inefficient representation; only use it via the dnslabel_table_*
+ * functions, so that is can be safely replaced with something smarter later. */
+#define MAX_LABELS 128
+/* Structures used to implement name compression */
+struct dnslabel_entry { char *v; off_t pos; };
+struct dnslabel_table {
+ int n_labels; /* number of current entries */
+ /* map from name to position in message */
+ struct dnslabel_entry labels[MAX_LABELS];
+};
+
+/* Initialize dnslabel_table. */
+static void
+dnslabel_table_init(struct dnslabel_table *table)
+{
+ table->n_labels = 0;
+}
+
+/* Free all storage held by table, but not the table itself. */
+static void
+dnslabel_clear(struct dnslabel_table *table)
+{
+ int i;
+ for (i = 0; i < table->n_labels; ++i)
+ free(table->labels[i].v);
+ table->n_labels = 0;
+}
+
+/* return the position of the label in the current message, or -1 if the label */
+/* hasn't been used yet. */
+static int
+dnslabel_table_get_pos(const struct dnslabel_table *table, const char *label)
+{
+ int i;
+ for (i = 0; i < table->n_labels; ++i) {
+ if (!strcmp(label, table->labels[i].v))
+ return table->labels[i].pos;
+ }
+ return -1;
+}
+
+/* remember that we've used the label at position pos */
+static int
+dnslabel_table_add(struct dnslabel_table *table, const char *label, off_t pos)
+{
+ char *v;
+ int p;
+ if (table->n_labels == MAX_LABELS)
+ return (-1);
+ v = strdup(label);
+ if (v == NULL)
+ return (-1);
+ p = table->n_labels++;
+ table->labels[p].v = v;
+ table->labels[p].pos = pos;
+
+ return (0);
+}
+
+/* Converts a string to a length-prefixed set of DNS labels, starting */
+/* at buf[j]. name and buf must not overlap. name_len should be the length */
+/* of name. table is optional, and is used for compression. */
+/* */
+/* Input: abc.def */
+/* Output: <3>abc<3>def<0> */
+/* */
+/* Returns the first index after the encoded name, or negative on error. */
+/* -1 label was > 63 bytes */
+/* -2 name too long to fit in buffer. */
+/* */
+static off_t
+dnsname_to_labels(u8 *const buf, size_t buf_len, off_t j,
+ const char *name, const int name_len,
+ struct dnslabel_table *table) {
+ const char *end = name + name_len;
+ int ref = 0;
+ u16 _t;
+
+#define APPEND16(x) do { \
+ if (j + 2 > (off_t)buf_len) \
+ goto overflow; \
+ _t = htons(x); \
+ memcpy(buf + j, &_t, 2); \
+ j += 2; \
+ } while (0)
+#define APPEND32(x) do { \
+ if (j + 4 > (off_t)buf_len) \
+ goto overflow; \
+ _t32 = htonl(x); \
+ memcpy(buf + j, &_t32, 4); \
+ j += 4; \
+ } while (0)
+
+ if (name_len > 255) return -2;
+
+ for (;;) {
+ const char *const start = name;
+ if (table && (ref = dnslabel_table_get_pos(table, name)) >= 0) {
+ APPEND16(ref | 0xc000);
+ return j;
+ }
+ name = strchr(name, '.');
+ if (!name) {
+ const unsigned int label_len = end - start;
+ if (label_len > 63) return -1;
+ if ((size_t)(j+label_len+1) > buf_len) return -2;
+ if (table) dnslabel_table_add(table, start, j);
+ buf[j++] = label_len;
+
+ memcpy(buf + j, start, end - start);
+ j += end - start;
+ break;
+ } else {
+ /* append length of the label. */
+ const unsigned int label_len = name - start;
+ if (label_len > 63) return -1;
+ if ((size_t)(j+label_len+1) > buf_len) return -2;
+ if (table) dnslabel_table_add(table, start, j);
+ buf[j++] = label_len;
+
+ memcpy(buf + j, start, name - start);
+ j += name - start;
+ /* hop over the '.' */
+ name++;
+ }
+ }
+
+ /* the labels must be terminated by a 0. */
+ /* It's possible that the name ended in a . */
+ /* in which case the zero is already there */
+ if (!j || buf[j-1]) buf[j++] = 0;
+ return j;
+ overflow:
+ return (-2);
+}
+
+/* Finds the length of a dns request for a DNS name of the given */
+/* length. The actual request may be smaller than the value returned */
+/* here */
+static int
+evdns_request_len(const int name_len) {
+ return 96 + /* length of the DNS standard header */
+ name_len + 2 +
+ 4; /* space for the resource type */
+}
+
+/* build a dns request packet into buf. buf should be at least as long */
+/* as evdns_request_len told you it should be. */
+/* */
+/* Returns the amount of space used. Negative on error. */
+static int
+evdns_request_data_build(const char *const name, const int name_len,
+ const u16 trans_id, const u16 type, const u16 class,
+ u8 *const buf, size_t buf_len) {
+ off_t j = 0; /* current offset into buf */
+ u16 _t; /* used by the macros */
+
+ APPEND16(trans_id);
+ APPEND16(0x0100); /* standard query, recusion needed */
+ APPEND16(1); /* one question */
+ APPEND16(0); /* no answers */
+ APPEND16(0); /* no authority */
+ APPEND16(0); /* no additional */
+
+ j = dnsname_to_labels(buf, buf_len, j, name, name_len, NULL);
+ if (j < 0) {
+ return (int)j;
+ }
+
+ APPEND16(type);
+ APPEND16(class);
+
+ return (int)j;
+ overflow:
+ return (-1);
+}
+
+/* exported function */
+struct evdns_server_port *
+evdns_add_server_port(int socket, int is_tcp, evdns_request_callback_fn_type cb, void *user_data)
+{
+ struct evdns_server_port *port;
+ if (!(port = malloc(sizeof(struct evdns_server_port))))
+ return NULL;
+ memset(port, 0, sizeof(struct evdns_server_port));
+
+ assert(!is_tcp); /* TCP sockets not yet implemented */
+ port->socket = socket;
+ port->refcnt = 1;
+ port->choked = 0;
+ port->closing = 0;
+ port->user_callback = cb;
+ port->user_data = user_data;
+ port->pending_replies = NULL;
+
+ event_set(&port->event, port->socket, EV_READ | EV_PERSIST,
+ server_port_ready_callback, port);
+ event_add(&port->event, NULL); /* check return. */
+ return port;
+}
+
+/* exported function */
+void
+evdns_close_server_port(struct evdns_server_port *port)
+{
+ if (--port->refcnt == 0)
+ server_port_free(port);
+ port->closing = 1;
+}
+
+/* exported function */
+int
+evdns_server_request_add_reply(struct evdns_server_request *_req, int section, const char *name, int type, int class, int ttl, int datalen, int is_name, const char *data)
+{
+ struct server_request *req = TO_SERVER_REQUEST(_req);
+ struct server_reply_item **itemp, *item;
+ int *countp;
+
+ if (req->response) /* have we already answered? */
+ return (-1);
+
+ switch (section) {
+ case EVDNS_ANSWER_SECTION:
+ itemp = &req->answer;
+ countp = &req->n_answer;
+ break;
+ case EVDNS_AUTHORITY_SECTION:
+ itemp = &req->authority;
+ countp = &req->n_authority;
+ break;
+ case EVDNS_ADDITIONAL_SECTION:
+ itemp = &req->additional;
+ countp = &req->n_additional;
+ break;
+ default:
+ return (-1);
+ }
+ while (*itemp) {
+ itemp = &((*itemp)->next);
+ }
+ item = malloc(sizeof(struct server_reply_item));
+ if (!item)
+ return -1;
+ item->next = NULL;
+ if (!(item->name = strdup(name))) {
+ free(item);
+ return -1;
+ }
+ item->type = type;
+ item->dns_question_class = class;
+ item->ttl = ttl;
+ item->is_name = is_name != 0;
+ item->datalen = 0;
+ item->data = NULL;
+ if (data) {
+ if (item->is_name) {
+ if (!(item->data = strdup(data))) {
+ free(item->name);
+ free(item);
+ return -1;
+ }
+ item->datalen = (u16)-1;
+ } else {
+ if (!(item->data = malloc(datalen))) {
+ free(item->name);
+ free(item);
+ return -1;
+ }
+ item->datalen = datalen;
+ memcpy(item->data, data, datalen);
+ }
+ }
+
+ *itemp = item;
+ ++(*countp);
+ return 0;
+}
+
+/* exported function */
+int
+evdns_server_request_add_a_reply(struct evdns_server_request *req, const char *name, int n, void *addrs, int ttl)
+{
+ return evdns_server_request_add_reply(
+ req, EVDNS_ANSWER_SECTION, name, TYPE_A, CLASS_INET,
+ ttl, n*4, 0, addrs);
+}
+
+/* exported function */
+int
+evdns_server_request_add_aaaa_reply(struct evdns_server_request *req, const char *name, int n, void *addrs, int ttl)
+{
+ return evdns_server_request_add_reply(
+ req, EVDNS_ANSWER_SECTION, name, TYPE_AAAA, CLASS_INET,
+ ttl, n*16, 0, addrs);
+}
+
+/* exported function */
+int
+evdns_server_request_add_ptr_reply(struct evdns_server_request *req, struct in_addr *in, const char *inaddr_name, const char *hostname, int ttl)
+{
+ u32 a;
+ char buf[32];
+ assert(in || inaddr_name);
+ assert(!(in && inaddr_name));
+ if (in) {
+ a = ntohl(in->s_addr);
+ evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
+ (int)(u8)((a )&0xff),
+ (int)(u8)((a>>8 )&0xff),
+ (int)(u8)((a>>16)&0xff),
+ (int)(u8)((a>>24)&0xff));
+ inaddr_name = buf;
+ }
+ return evdns_server_request_add_reply(
+ req, EVDNS_ANSWER_SECTION, inaddr_name, TYPE_PTR, CLASS_INET,
+ ttl, -1, 1, hostname);
+}
+
+/* exported function */
+int
+evdns_server_request_add_cname_reply(struct evdns_server_request *req, const char *name, const char *cname, int ttl)
+{
+ return evdns_server_request_add_reply(
+ req, EVDNS_ANSWER_SECTION, name, TYPE_CNAME, CLASS_INET,
+ ttl, -1, 1, cname);
+}
+
+
+static int
+evdns_server_request_format_response(struct server_request *req, int err)
+{
+ unsigned char buf[1500];
+ size_t buf_len = sizeof(buf);
+ off_t j = 0, r;
+ u16 _t;
+ u32 _t32;
+ int i;
+ u16 flags;
+ struct dnslabel_table table;
+
+ if (err < 0 || err > 15) return -1;
+
+ /* Set response bit and error code; copy OPCODE and RD fields from
+ * question; copy RA and AA if set by caller. */
+ flags = req->base.flags;
+ flags |= (0x8000 | err);
+
+ dnslabel_table_init(&table);
+ APPEND16(req->trans_id);
+ APPEND16(flags);
+ APPEND16(req->base.nquestions);
+ APPEND16(req->n_answer);
+ APPEND16(req->n_authority);
+ APPEND16(req->n_additional);
+
+ /* Add questions. */
+ for (i=0; i < req->base.nquestions; ++i) {
+ const char *s = req->base.questions[i]->name;
+ j = dnsname_to_labels(buf, buf_len, j, s, strlen(s), &table);
+ if (j < 0) {
+ dnslabel_clear(&table);
+ return (int) j;
+ }
+ APPEND16(req->base.questions[i]->type);
+ APPEND16(req->base.questions[i]->dns_question_class);
+ }
+
+ /* Add answer, authority, and additional sections. */
+ for (i=0; i<3; ++i) {
+ struct server_reply_item *item;
+ if (i==0)
+ item = req->answer;
+ else if (i==1)
+ item = req->authority;
+ else
+ item = req->additional;
+ while (item) {
+ r = dnsname_to_labels(buf, buf_len, j, item->name, strlen(item->name), &table);
+ if (r < 0)
+ goto overflow;
+ j = r;
+
+ APPEND16(item->type);
+ APPEND16(item->dns_question_class);
+ APPEND32(item->ttl);
+ if (item->is_name) {
+ off_t len_idx = j, name_start;
+ j += 2;
+ name_start = j;
+ r = dnsname_to_labels(buf, buf_len, j, item->data, strlen(item->data), &table);
+ if (r < 0)
+ goto overflow;
+ j = r;
+ _t = htons( (short) (j-name_start) );
+ memcpy(buf+len_idx, &_t, 2);
+ } else {
+ APPEND16(item->datalen);
+ if (j+item->datalen > (off_t)buf_len)
+ goto overflow;
+ memcpy(buf+j, item->data, item->datalen);
+ j += item->datalen;
+ }
+ item = item->next;
+ }
+ }
+
+ if (j > 512) {
+overflow:
+ j = 512;
+ buf[2] |= 0x02; /* set the truncated bit. */
+ }
+
+ req->response_len = j;
+
+ if (!(req->response = malloc(req->response_len))) {
+ server_request_free_answers(req);
+ dnslabel_clear(&table);
+ return (-1);
+ }
+ memcpy(req->response, buf, req->response_len);
+ server_request_free_answers(req);
+ dnslabel_clear(&table);
+ return (0);
+}
+
+/* exported function */
+int
+evdns_server_request_respond(struct evdns_server_request *_req, int err)
+{
+ struct server_request *req = TO_SERVER_REQUEST(_req);
+ struct evdns_server_port *port = req->port;
+ int r;
+ if (!req->response) {
+ if ((r = evdns_server_request_format_response(req, err))<0)
+ return r;
+ }
+
+ r = sendto(port->socket, req->response, req->response_len, 0,
+ (struct sockaddr*) &req->addr, req->addrlen);
+ if (r<0) {
+ int sock_err = last_error(port->socket);
+ if (! error_is_eagain(sock_err))
+ return -1;
+
+ if (port->pending_replies) {
+ req->prev_pending = port->pending_replies->prev_pending;
+ req->next_pending = port->pending_replies;
+ req->prev_pending->next_pending =
+ req->next_pending->prev_pending = req;
+ } else {
+ req->prev_pending = req->next_pending = req;
+ port->pending_replies = req;
+ port->choked = 1;
+
+ (void) event_del(&port->event);
+ event_set(&port->event, port->socket, (port->closing?0:EV_READ) | EV_WRITE | EV_PERSIST, server_port_ready_callback, port);
+
+ if (event_add(&port->event, NULL) < 0) {
+ log(EVDNS_LOG_WARN, "Error from libevent when adding event for DNS server");
+ }
+
+ }
+
+ return 1;
+ }
+ if (server_request_free(req))
+ return 0;
+
+ if (port->pending_replies)
+ server_port_flush(port);
+
+ return 0;
+}
+
+/* Free all storage held by RRs in req. */
+static void
+server_request_free_answers(struct server_request *req)
+{
+ struct server_reply_item *victim, *next, **list;
+ int i;
+ for (i = 0; i < 3; ++i) {
+ if (i==0)
+ list = &req->answer;
+ else if (i==1)
+ list = &req->authority;
+ else
+ list = &req->additional;
+
+ victim = *list;
+ while (victim) {
+ next = victim->next;
+ free(victim->name);
+ if (victim->data)
+ free(victim->data);
+ free(victim);
+ victim = next;
+ }
+ *list = NULL;
+ }
+}
+
+/* Free all storage held by req, and remove links to it. */
+/* return true iff we just wound up freeing the server_port. */
+static int
+server_request_free(struct server_request *req)
+{
+ int i, rc=1;
+ if (req->base.questions) {
+ for (i = 0; i < req->base.nquestions; ++i)
+ free(req->base.questions[i]);
+ free(req->base.questions);
+ }
+
+ if (req->port) {
+ if (req->port->pending_replies == req) {
+ if (req->next_pending)
+ req->port->pending_replies = req->next_pending;
+ else
+ req->port->pending_replies = NULL;
+ }
+ rc = --req->port->refcnt;
+ }
+
+ if (req->response) {
+ free(req->response);
+ }
+
+ server_request_free_answers(req);
+
+ if (req->next_pending && req->next_pending != req) {
+ req->next_pending->prev_pending = req->prev_pending;
+ req->prev_pending->next_pending = req->next_pending;
+ }
+
+ if (rc == 0) {
+ server_port_free(req->port);
+ free(req);
+ return (1);
+ }
+ free(req);
+ return (0);
+}
+
+/* Free all storage held by an evdns_server_port. Only called when */
+static void
+server_port_free(struct evdns_server_port *port)
+{
+ assert(port);
+ assert(!port->refcnt);
+ assert(!port->pending_replies);
+ if (port->socket > 0) {
+ CLOSE_SOCKET(port->socket);
+ port->socket = -1;
+ }
+ (void) event_del(&port->event);
+ /* XXXX actually free the port? -NM */
+}
+
+/* exported function */
+int
+evdns_server_request_drop(struct evdns_server_request *_req)
+{
+ struct server_request *req = TO_SERVER_REQUEST(_req);
+ server_request_free(req);
+ return 0;
+}
+
+/* exported function */
+int
+evdns_server_request_get_requesting_addr(struct evdns_server_request *_req, struct sockaddr *sa, int addr_len)
+{
+ struct server_request *req = TO_SERVER_REQUEST(_req);
+ if (addr_len < (int)req->addrlen)
+ return -1;
+ memcpy(sa, &(req->addr), req->addrlen);
+ return req->addrlen;
+}
+
+#undef APPEND16
+#undef APPEND32
+
+/* this is a libevent callback function which is called when a request */
+/* has timed out. */
+static void
+evdns_request_timeout_callback(int fd, short events, void *arg) {
+ struct request *const req = (struct request *) arg;
+ (void) fd;
+ (void) events;
+
+ log(EVDNS_LOG_DEBUG, "Request %lx timed out", (unsigned long) arg);
+
+ req->ns->timedout++;
+ if (req->ns->timedout > global_max_nameserver_timeout) {
+ req->ns->timedout = 0;
+ nameserver_failed(req->ns, "request timed out.");
+ }
+
+ (void) evtimer_del(&req->timeout_event);
+ if (req->tx_count >= global_max_retransmits) {
+ /* this request has failed */
+ reply_callback(req, 0, DNS_ERR_TIMEOUT, NULL);
+ request_finished(req, &req_head);
+ } else {
+ /* retransmit it */
+ evdns_request_transmit(req);
+ }
+}
+
+/* try to send a request to a given server. */
+/* */
+/* return: */
+/* 0 ok */
+/* 1 temporary failure */
+/* 2 other failure */
+static int
+evdns_request_transmit_to(struct request *req, struct nameserver *server) {
+ struct sockaddr_in sin;
+ int r;
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_addr.s_addr = req->ns->address;
+ sin.sin_port = req->ns->port;
+ sin.sin_family = AF_INET;
+
+ r = sendto(server->socket, req->request, req->request_len, 0,
+ (struct sockaddr*)&sin, sizeof(sin));
+ if (r < 0) {
+ int err = last_error(server->socket);
+ if (error_is_eagain(err)) return 1;
+ nameserver_failed(req->ns, strerror(err));
+ return 2;
+ } else if (r != (int)req->request_len) {
+ return 1; /* short write */
+ } else {
+ return 0;
+ }
+}
+
+/* try to send a request, updating the fields of the request */
+/* as needed */
+/* */
+/* return: */
+/* 0 ok */
+/* 1 failed */
+static int
+evdns_request_transmit(struct request *req) {
+ int retcode = 0, r;
+
+ /* if we fail to send this packet then this flag marks it */
+ /* for evdns_transmit */
+ req->transmit_me = 1;
+ if (req->trans_id == 0xffff) abort();
+
+ if (req->ns->choked) {
+ /* don't bother trying to write to a socket */
+ /* which we have had EAGAIN from */
+ return 1;
+ }
+
+ r = evdns_request_transmit_to(req, req->ns);
+ switch (r) {
+ case 1:
+ /* temp failure */
+ req->ns->choked = 1;
+ nameserver_write_waiting(req->ns, 1);
+ return 1;
+ case 2:
+ /* failed in some other way */
+ retcode = 1;
+ /* fall through */
+ default:
+ /* all ok */
+ log(EVDNS_LOG_DEBUG,
+ "Setting timeout for request %lx", (unsigned long) req);
+ if (evtimer_add(&req->timeout_event, &global_timeout) < 0) {
+ log(EVDNS_LOG_WARN,
+ "Error from libevent when adding timer for request %lx",
+ (unsigned long) req);
+ /* ???? Do more? */
+ }
+ req->tx_count++;
+ req->transmit_me = 0;
+ return retcode;
+ }
+}
+
+static void
+nameserver_probe_callback(int result, char type, int count, int ttl, void *addresses, void *arg) {
+ struct nameserver *const ns = (struct nameserver *) arg;
+ (void) type;
+ (void) count;
+ (void) ttl;
+ (void) addresses;
+
+ if (result == DNS_ERR_NONE || result == DNS_ERR_NOTEXIST) {
+ /* this is a good reply */
+ nameserver_up(ns);
+ } else nameserver_probe_failed(ns);
+}
+
+static void
+nameserver_send_probe(struct nameserver *const ns) {
+ struct request *req;
+ /* here we need to send a probe to a given nameserver */
+ /* in the hope that it is up now. */
+
+ log(EVDNS_LOG_DEBUG, "Sending probe to %s", debug_ntoa(ns->address));
+
+ req = request_new(TYPE_A, "www.google.com", DNS_QUERY_NO_SEARCH, nameserver_probe_callback, ns);
+ if (!req) return;
+ /* we force this into the inflight queue no matter what */
+ request_trans_id_set(req, transaction_id_pick());
+ req->ns = ns;
+ request_submit(req);
+}
+
+/* returns: */
+/* 0 didn't try to transmit anything */
+/* 1 tried to transmit something */
+static int
+evdns_transmit(void) {
+ char did_try_to_transmit = 0;
+
+ if (req_head) {
+ struct request *const started_at = req_head, *req = req_head;
+ /* first transmit all the requests which are currently waiting */
+ do {
+ if (req->transmit_me) {
+ did_try_to_transmit = 1;
+ evdns_request_transmit(req);
+ }
+
+ req = req->next;
+ } while (req != started_at);
+ }
+
+ return did_try_to_transmit;
+}
+
+/* exported function */
+int
+evdns_count_nameservers(void)
+{
+ const struct nameserver *server = server_head;
+ int n = 0;
+ if (!server)
+ return 0;
+ do {
+ ++n;
+ server = server->next;
+ } while (server != server_head);
+ return n;
+}
+
+/* exported function */
+int
+evdns_clear_nameservers_and_suspend(void)
+{
+ struct nameserver *server = server_head, *started_at = server_head;
+ struct request *req = req_head, *req_started_at = req_head;
+
+ if (!server)
+ return 0;
+ while (1) {
+ struct nameserver *next = server->next;
+ (void) event_del(&server->event);
+ if (evtimer_initialized(&server->timeout_event))
+ (void) evtimer_del(&server->timeout_event);
+ if (server->socket >= 0)
+ CLOSE_SOCKET(server->socket);
+ free(server);
+ if (next == started_at)
+ break;
+ server = next;
+ }
+ server_head = NULL;
+ global_good_nameservers = 0;
+
+ while (req) {
+ struct request *next = req->next;
+ req->tx_count = req->reissue_count = 0;
+ req->ns = NULL;
+ /* ???? What to do about searches? */
+ (void) evtimer_del(&req->timeout_event);
+ req->trans_id = 0;
+ req->transmit_me = 0;
+
+ global_requests_waiting++;
+ evdns_request_insert(req, &req_waiting_head);
+ /* We want to insert these suspended elements at the front of
+ * the waiting queue, since they were pending before any of
+ * the waiting entries were added. This is a circular list,
+ * so we can just shift the start back by one.*/
+ req_waiting_head = req_waiting_head->prev;
+
+ if (next == req_started_at)
+ break;
+ req = next;
+ }
+ req_head = NULL;
+ global_requests_inflight = 0;
+
+ return 0;
+}
+
+
+/* exported function */
+int
+evdns_resume(void)
+{
+ evdns_requests_pump_waiting_queue();
+ return 0;
+}
+
+static int
+_evdns_nameserver_add_impl(unsigned long int address, int port) {
+ /* first check to see if we already have this nameserver */
+
+ const struct nameserver *server = server_head, *const started_at = server_head;
+ struct nameserver *ns;
+ int err = 0;
+ if (server) {
+ do {
+ if (server->address == address) return 3;
+ server = server->next;
+ } while (server != started_at);
+ }
+
+ ns = (struct nameserver *) malloc(sizeof(struct nameserver));
+ if (!ns) return -1;
+
+ memset(ns, 0, sizeof(struct nameserver));
+
+ evtimer_set(&ns->timeout_event, nameserver_prod_callback, ns);
+
+ ns->socket = socket(PF_INET, SOCK_DGRAM, 0);
+ if (ns->socket < 0) { err = 1; goto out1; }
+ evutil_make_socket_nonblocking(ns->socket);
+
+ ns->address = address;
+ ns->port = htons(port);
+ ns->state = 1;
+ event_set(&ns->event, ns->socket, EV_READ | EV_PERSIST, nameserver_ready_callback, ns);
+ if (event_add(&ns->event, NULL) < 0) {
+ err = 2;
+ goto out2;
+ }
+
+ log(EVDNS_LOG_DEBUG, "Added nameserver %s", debug_ntoa(address));
+
+ /* insert this nameserver into the list of them */
+ if (!server_head) {
+ ns->next = ns->prev = ns;
+ server_head = ns;
+ } else {
+ ns->next = server_head->next;
+ ns->prev = server_head;
+ server_head->next = ns;
+ if (server_head->prev == server_head) {
+ server_head->prev = ns;
+ }
+ }
+
+ global_good_nameservers++;
+
+ return 0;
+
+out2:
+ CLOSE_SOCKET(ns->socket);
+out1:
+ free(ns);
+ log(EVDNS_LOG_WARN, "Unable to add nameserver %s: error %d", debug_ntoa(address), err);
+ return err;
+}
+
+/* exported function */
+int
+evdns_nameserver_add(unsigned long int address) {
+ return _evdns_nameserver_add_impl(address, 53);
+}
+
+/* exported function */
+int
+evdns_nameserver_ip_add(const char *ip_as_string) {
+ struct in_addr ina;
+ int port;
+ char buf[20];
+ const char *cp;
+ cp = strchr(ip_as_string, ':');
+ if (! cp) {
+ cp = ip_as_string;
+ port = 53;
+ } else {
+ port = strtoint(cp+1);
+ if (port < 0 || port > 65535) {
+ return 4;
+ }
+ if ((cp-ip_as_string) >= (int)sizeof(buf)) {
+ return 4;
+ }
+ memcpy(buf, ip_as_string, cp-ip_as_string);
+ buf[cp-ip_as_string] = '\0';
+ cp = buf;
+ }
+ if (!inet_aton(cp, &ina)) {
+ return 4;
+ }
+ return _evdns_nameserver_add_impl(ina.s_addr, port);
+}
+
+/* insert into the tail of the queue */
+static void
+evdns_request_insert(struct request *req, struct request **head) {
+ if (!*head) {
+ *head = req;
+ req->next = req->prev = req;
+ return;
+ }
+
+ req->prev = (*head)->prev;
+ req->prev->next = req;
+ req->next = *head;
+ (*head)->prev = req;
+}
+
+static int
+string_num_dots(const char *s) {
+ int count = 0;
+ while ((s = strchr(s, '.'))) {
+ s++;
+ count++;
+ }
+ return count;
+}
+
+static struct request *
+request_new(int type, const char *name, int flags,
+ evdns_callback_type callback, void *user_ptr) {
+ const char issuing_now =
+ (global_requests_inflight < global_max_requests_inflight) ? 1 : 0;
+
+ const int name_len = strlen(name);
+ const int request_max_len = evdns_request_len(name_len);
+ const u16 trans_id = issuing_now ? transaction_id_pick() : 0xffff;
+ /* the request data is alloced in a single block with the header */
+ struct request *const req =
+ (struct request *) malloc(sizeof(struct request) + request_max_len);
+ int rlen;
+ (void) flags;
+
+ if (!req) return NULL;
+ memset(req, 0, sizeof(struct request));
+
+ evtimer_set(&req->timeout_event, evdns_request_timeout_callback, req);
+
+ /* request data lives just after the header */
+ req->request = ((u8 *) req) + sizeof(struct request);
+ /* denotes that the request data shouldn't be free()ed */
+ req->request_appended = 1;
+ rlen = evdns_request_data_build(name, name_len, trans_id,
+ type, CLASS_INET, req->request, request_max_len);
+ if (rlen < 0)
+ goto err1;
+ req->request_len = rlen;
+ req->trans_id = trans_id;
+ req->tx_count = 0;
+ req->request_type = type;
+ req->user_pointer = user_ptr;
+ req->user_callback = callback;
+ req->ns = issuing_now ? nameserver_pick() : NULL;
+ req->next = req->prev = NULL;
+
+ return req;
+err1:
+ free(req);
+ return NULL;
+}
+
+static void
+request_submit(struct request *const req) {
+ if (req->ns) {
+ /* if it has a nameserver assigned then this is going */
+ /* straight into the inflight queue */
+ evdns_request_insert(req, &req_head);
+ global_requests_inflight++;
+ evdns_request_transmit(req);
+ } else {
+ evdns_request_insert(req, &req_waiting_head);
+ global_requests_waiting++;
+ }
+}
+
+/* exported function */
+int evdns_resolve_ipv4(const char *name, int flags,
+ evdns_callback_type callback, void *ptr) {
+ log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
+ if (flags & DNS_QUERY_NO_SEARCH) {
+ struct request *const req =
+ request_new(TYPE_A, name, flags, callback, ptr);
+ if (req == NULL)
+ return (1);
+ request_submit(req);
+ return (0);
+ } else {
+ return (search_request_new(TYPE_A, name, flags, callback, ptr));
+ }
+}
+
+/* exported function */
+int evdns_resolve_ipv6(const char *name, int flags,
+ evdns_callback_type callback, void *ptr) {
+ log(EVDNS_LOG_DEBUG, "Resolve requested for %s", name);
+ if (flags & DNS_QUERY_NO_SEARCH) {
+ struct request *const req =
+ request_new(TYPE_AAAA, name, flags, callback, ptr);
+ if (req == NULL)
+ return (1);
+ request_submit(req);
+ return (0);
+ } else {
+ return (search_request_new(TYPE_AAAA, name, flags, callback, ptr));
+ }
+}
+
+int evdns_resolve_reverse(const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr) {
+ char buf[32];
+ struct request *req;
+ u32 a;
+ assert(in);
+ a = ntohl(in->s_addr);
+ evutil_snprintf(buf, sizeof(buf), "%d.%d.%d.%d.in-addr.arpa",
+ (int)(u8)((a )&0xff),
+ (int)(u8)((a>>8 )&0xff),
+ (int)(u8)((a>>16)&0xff),
+ (int)(u8)((a>>24)&0xff));
+ log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
+ req = request_new(TYPE_PTR, buf, flags, callback, ptr);
+ if (!req) return 1;
+ request_submit(req);
+ return 0;
+}
+
+int evdns_resolve_reverse_ipv6(const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr) {
+ /* 32 nybbles, 32 periods, "ip6.arpa", NUL. */
+ char buf[73];
+ char *cp;
+ struct request *req;
+ int i;
+ assert(in);
+ cp = buf;
+ for (i=15; i >= 0; --i) {
+ u8 byte = in->s6_addr[i];
+ *cp++ = "0123456789abcdef"[byte & 0x0f];
+ *cp++ = '.';
+ *cp++ = "0123456789abcdef"[byte >> 4];
+ *cp++ = '.';
+ }
+ assert(cp + strlen("ip6.arpa") < buf+sizeof(buf));
+ memcpy(cp, "ip6.arpa", strlen("ip6.arpa")+1);
+ log(EVDNS_LOG_DEBUG, "Resolve requested for %s (reverse)", buf);
+ req = request_new(TYPE_PTR, buf, flags, callback, ptr);
+ if (!req) return 1;
+ request_submit(req);
+ return 0;
+}
+
+/*/////////////////////////////////////////////////////////////////// */
+/* Search support */
+/* */
+/* the libc resolver has support for searching a number of domains */
+/* to find a name. If nothing else then it takes the single domain */
+/* from the gethostname() call. */
+/* */
+/* It can also be configured via the domain and search options in a */
+/* resolv.conf. */
+/* */
+/* The ndots option controls how many dots it takes for the resolver */
+/* to decide that a name is non-local and so try a raw lookup first. */
+
+struct search_domain {
+ int len;
+ struct search_domain *next;
+ /* the text string is appended to this structure */
+};
+
+struct search_state {
+ int refcount;
+ int ndots;
+ int num_domains;
+ struct search_domain *head;
+};
+
+static struct search_state *global_search_state = NULL;
+
+static void
+search_state_decref(struct search_state *const state) {
+ if (!state) return;
+ state->refcount--;
+ if (!state->refcount) {
+ struct search_domain *next, *dom;
+ for (dom = state->head; dom; dom = next) {
+ next = dom->next;
+ free(dom);
+ }
+ free(state);
+ }
+}
+
+static struct search_state *
+search_state_new(void) {
+ struct search_state *state = (struct search_state *) malloc(sizeof(struct search_state));
+ if (!state) return NULL;
+ memset(state, 0, sizeof(struct search_state));
+ state->refcount = 1;
+ state->ndots = 1;
+
+ return state;
+}
+
+static void
+search_postfix_clear(void) {
+ search_state_decref(global_search_state);
+
+ global_search_state = search_state_new();
+}
+
+/* exported function */
+void
+evdns_search_clear(void) {
+ search_postfix_clear();
+}
+
+static void
+search_postfix_add(const char *domain) {
+ int domain_len;
+ struct search_domain *sdomain;
+ while (domain[0] == '.') domain++;
+ domain_len = strlen(domain);
+
+ if (!global_search_state) global_search_state = search_state_new();
+ if (!global_search_state) return;
+ global_search_state->num_domains++;
+
+ sdomain = (struct search_domain *) malloc(sizeof(struct search_domain) + domain_len);
+ if (!sdomain) return;
+ memcpy( ((u8 *) sdomain) + sizeof(struct search_domain), domain, domain_len);
+ sdomain->next = global_search_state->head;
+ sdomain->len = domain_len;
+
+ global_search_state->head = sdomain;
+}
+
+/* reverse the order of members in the postfix list. This is needed because, */
+/* when parsing resolv.conf we push elements in the wrong order */
+static void
+search_reverse(void) {
+ struct search_domain *cur, *prev = NULL, *next;
+ cur = global_search_state->head;
+ while (cur) {
+ next = cur->next;
+ cur->next = prev;
+ prev = cur;
+ cur = next;
+ }
+
+ global_search_state->head = prev;
+}
+
+/* exported function */
+void
+evdns_search_add(const char *domain) {
+ search_postfix_add(domain);
+}
+
+/* exported function */
+void
+evdns_search_ndots_set(const int ndots) {
+ if (!global_search_state) global_search_state = search_state_new();
+ if (!global_search_state) return;
+ global_search_state->ndots = ndots;
+}
+
+static void
+search_set_from_hostname(void) {
+ char hostname[HOST_NAME_MAX + 1], *domainname;
+
+ search_postfix_clear();
+ if (gethostname(hostname, sizeof(hostname))) return;
+ domainname = strchr(hostname, '.');
+ if (!domainname) return;
+ search_postfix_add(domainname);
+}
+
+/* warning: returns malloced string */
+static char *
+search_make_new(const struct search_state *const state, int n, const char *const base_name) {
+ const int base_len = strlen(base_name);
+ const char need_to_append_dot = base_name[base_len - 1] == '.' ? 0 : 1;
+ struct search_domain *dom;
+
+ for (dom = state->head; dom; dom = dom->next) {
+ if (!n--) {
+ /* this is the postfix we want */
+ /* the actual postfix string is kept at the end of the structure */
+ const u8 *const postfix = ((u8 *) dom) + sizeof(struct search_domain);
+ const int postfix_len = dom->len;
+ char *const newname = (char *) malloc(base_len + need_to_append_dot + postfix_len + 1);
+ if (!newname) return NULL;
+ memcpy(newname, base_name, base_len);
+ if (need_to_append_dot) newname[base_len] = '.';
+ memcpy(newname + base_len + need_to_append_dot, postfix, postfix_len);
+ newname[base_len + need_to_append_dot + postfix_len] = 0;
+ return newname;
+ }
+ }
+
+ /* we ran off the end of the list and still didn't find the requested string */
+ abort();
+ return NULL; /* unreachable; stops warnings in some compilers. */
+}
+
+static int
+search_request_new(int type, const char *const name, int flags, evdns_callback_type user_callback, void *user_arg) {
+ assert(type == TYPE_A || type == TYPE_AAAA);
+ if ( ((flags & DNS_QUERY_NO_SEARCH) == 0) &&
+ global_search_state &&
+ global_search_state->num_domains) {
+ /* we have some domains to search */
+ struct request *req;
+ if (string_num_dots(name) >= global_search_state->ndots) {
+ req = request_new(type, name, flags, user_callback, user_arg);
+ if (!req) return 1;
+ req->search_index = -1;
+ } else {
+ char *const new_name = search_make_new(global_search_state, 0, name);
+ if (!new_name) return 1;
+ req = request_new(type, new_name, flags, user_callback, user_arg);
+ free(new_name);
+ if (!req) return 1;
+ req->search_index = 0;
+ }
+ req->search_origname = strdup(name);
+ req->search_state = global_search_state;
+ req->search_flags = flags;
+ global_search_state->refcount++;
+ request_submit(req);
+ return 0;
+ } else {
+ struct request *const req = request_new(type, name, flags, user_callback, user_arg);
+ if (!req) return 1;
+ request_submit(req);
+ return 0;
+ }
+}
+
+/* this is called when a request has failed to find a name. We need to check */
+/* if it is part of a search and, if so, try the next name in the list */
+/* returns: */
+/* 0 another request has been submitted */
+/* 1 no more requests needed */
+static int
+search_try_next(struct request *const req) {
+ if (req->search_state) {
+ /* it is part of a search */
+ char *new_name;
+ struct request *newreq;
+ req->search_index++;
+ if (req->search_index >= req->search_state->num_domains) {
+ /* no more postfixes to try, however we may need to try */
+ /* this name without a postfix */
+ if (string_num_dots(req->search_origname) < req->search_state->ndots) {
+ /* yep, we need to try it raw */
+ newreq = request_new(req->request_type, req->search_origname, req->search_flags, req->user_callback, req->user_pointer);
+ log(EVDNS_LOG_DEBUG, "Search: trying raw query %s", req->search_origname);
+ if (newreq) {
+ request_submit(newreq);
+ return 0;
+ }
+ }
+ return 1;
+ }
+
+ new_name = search_make_new(req->search_state, req->search_index, req->search_origname);
+ if (!new_name) return 1;
+ log(EVDNS_LOG_DEBUG, "Search: now trying %s (%d)", new_name, req->search_index);
+ newreq = request_new(req->request_type, new_name, req->search_flags, req->user_callback, req->user_pointer);
+ free(new_name);
+ if (!newreq) return 1;
+ newreq->search_origname = req->search_origname;
+ req->search_origname = NULL;
+ newreq->search_state = req->search_state;
+ newreq->search_flags = req->search_flags;
+ newreq->search_index = req->search_index;
+ newreq->search_state->refcount++;
+ request_submit(newreq);
+ return 0;
+ }
+ return 1;
+}
+
+static void
+search_request_finished(struct request *const req) {
+ if (req->search_state) {
+ search_state_decref(req->search_state);
+ req->search_state = NULL;
+ }
+ if (req->search_origname) {
+ free(req->search_origname);
+ req->search_origname = NULL;
+ }
+}
+
+/*/////////////////////////////////////////////////////////////////// */
+/* Parsing resolv.conf files */
+
+static void
+evdns_resolv_set_defaults(int flags) {
+ /* if the file isn't found then we assume a local resolver */
+ if (flags & DNS_OPTION_SEARCH) search_set_from_hostname();
+ if (flags & DNS_OPTION_NAMESERVERS) evdns_nameserver_ip_add("127.0.0.1");
+}
+
+#ifndef HAVE_STRTOK_R
+static char *
+strtok_r(char *s, const char *delim, char **state) {
+ return strtok(s, delim);
+}
+#endif
+
+/* helper version of atoi which returns -1 on error */
+static int
+strtoint(const char *const str) {
+ char *endptr;
+ const int r = strtol(str, &endptr, 10);
+ if (*endptr) return -1;
+ return r;
+}
+
+/* helper version of atoi that returns -1 on error and clips to bounds. */
+static int
+strtoint_clipped(const char *const str, int min, int max)
+{
+ int r = strtoint(str);
+ if (r == -1)
+ return r;
+ else if (r<min)
+ return min;
+ else if (r>max)
+ return max;
+ else
+ return r;
+}
+
+/* exported function */
+int
+evdns_set_option(const char *option, const char *val, int flags)
+{
+ if (!strncmp(option, "ndots:", 6)) {
+ const int ndots = strtoint(val);
+ if (ndots == -1) return -1;
+ if (!(flags & DNS_OPTION_SEARCH)) return 0;
+ log(EVDNS_LOG_DEBUG, "Setting ndots to %d", ndots);
+ if (!global_search_state) global_search_state = search_state_new();
+ if (!global_search_state) return -1;
+ global_search_state->ndots = ndots;
+ } else if (!strncmp(option, "timeout:", 8)) {
+ const int timeout = strtoint(val);
+ if (timeout == -1) return -1;
+ if (!(flags & DNS_OPTION_MISC)) return 0;
+ log(EVDNS_LOG_DEBUG, "Setting timeout to %d", timeout);
+ global_timeout.tv_sec = timeout;
+ } else if (!strncmp(option, "max-timeouts:", 12)) {
+ const int maxtimeout = strtoint_clipped(val, 1, 255);
+ if (maxtimeout == -1) return -1;
+ if (!(flags & DNS_OPTION_MISC)) return 0;
+ log(EVDNS_LOG_DEBUG, "Setting maximum allowed timeouts to %d",
+ maxtimeout);
+ global_max_nameserver_timeout = maxtimeout;
+ } else if (!strncmp(option, "max-inflight:", 13)) {
+ const int maxinflight = strtoint_clipped(val, 1, 65000);
+ if (maxinflight == -1) return -1;
+ if (!(flags & DNS_OPTION_MISC)) return 0;
+ log(EVDNS_LOG_DEBUG, "Setting maximum inflight requests to %d",
+ maxinflight);
+ global_max_requests_inflight = maxinflight;
+ } else if (!strncmp(option, "attempts:", 9)) {
+ int retries = strtoint(val);
+ if (retries == -1) return -1;
+ if (retries > 255) retries = 255;
+ if (!(flags & DNS_OPTION_MISC)) return 0;
+ log(EVDNS_LOG_DEBUG, "Setting retries to %d", retries);
+ global_max_retransmits = retries;
+ }
+ return 0;
+}
+
+static void
+resolv_conf_parse_line(char *const start, int flags) {
+ char *strtok_state;
+ static const char *const delims = " \t";
+#define NEXT_TOKEN strtok_r(NULL, delims, &strtok_state)
+
+ char *const first_token = strtok_r(start, delims, &strtok_state);
+ if (!first_token) return;
+
+ if (!strcmp(first_token, "nameserver") && (flags & DNS_OPTION_NAMESERVERS)) {
+ const char *const nameserver = NEXT_TOKEN;
+ struct in_addr ina;
+
+ if (inet_aton(nameserver, &ina)) {
+ /* address is valid */
+ evdns_nameserver_add(ina.s_addr);
+ }
+ } else if (!strcmp(first_token, "domain") && (flags & DNS_OPTION_SEARCH)) {
+ const char *const domain = NEXT_TOKEN;
+ if (domain) {
+ search_postfix_clear();
+ search_postfix_add(domain);
+ }
+ } else if (!strcmp(first_token, "search") && (flags & DNS_OPTION_SEARCH)) {
+ const char *domain;
+ search_postfix_clear();
+
+ while ((domain = NEXT_TOKEN)) {
+ search_postfix_add(domain);
+ }
+ search_reverse();
+ } else if (!strcmp(first_token, "options")) {
+ const char *option;
+ while ((option = NEXT_TOKEN)) {
+ const char *val = strchr(option, ':');
+ evdns_set_option(option, val ? val+1 : "", flags);
+ }
+ }
+#undef NEXT_TOKEN
+}
+
+/* exported function */
+/* returns: */
+/* 0 no errors */
+/* 1 failed to open file */
+/* 2 failed to stat file */
+/* 3 file too large */
+/* 4 out of memory */
+/* 5 short read from file */
+int
+evdns_resolv_conf_parse(int flags, const char *const filename) {
+ struct stat st;
+ int fd, n, r;
+ u8 *resolv;
+ char *start;
+ int err = 0;
+
+ log(EVDNS_LOG_DEBUG, "Parsing resolv.conf file %s", filename);
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ evdns_resolv_set_defaults(flags);
+ return 1;
+ }
+
+ if (fstat(fd, &st)) { err = 2; goto out1; }
+ if (!st.st_size) {
+ evdns_resolv_set_defaults(flags);
+ err = (flags & DNS_OPTION_NAMESERVERS) ? 6 : 0;
+ goto out1;
+ }
+ if (st.st_size > 65535) { err = 3; goto out1; } /* no resolv.conf should be any bigger */
+
+ resolv = (u8 *) malloc((size_t)st.st_size + 1);
+ if (!resolv) { err = 4; goto out1; }
+
+ n = 0;
+ while ((r = read(fd, resolv+n, (size_t)st.st_size-n)) > 0) {
+ n += r;
+ if (n == st.st_size)
+ break;
+ assert(n < st.st_size);
+ }
+ if (r < 0) { err = 5; goto out2; }
+ resolv[n] = 0; /* we malloced an extra byte; this should be fine. */
+
+ start = (char *) resolv;
+ for (;;) {
+ char *const newline = strchr(start, '\n');
+ if (!newline) {
+ resolv_conf_parse_line(start, flags);
+ break;
+ } else {
+ *newline = 0;
+ resolv_conf_parse_line(start, flags);
+ start = newline + 1;
+ }
+ }
+
+ if (!server_head && (flags & DNS_OPTION_NAMESERVERS)) {
+ /* no nameservers were configured. */
+ evdns_nameserver_ip_add("127.0.0.1");
+ err = 6;
+ }
+ if (flags & DNS_OPTION_SEARCH && (!global_search_state || global_search_state->num_domains == 0)) {
+ search_set_from_hostname();
+ }
+
+out2:
+ free(resolv);
+out1:
+ close(fd);
+ return err;
+}
+
+#ifdef WIN32
+/* Add multiple nameservers from a space-or-comma-separated list. */
+static int
+evdns_nameserver_ip_add_line(const char *ips) {
+ const char *addr;
+ char *buf;
+ int r;
+ while (*ips) {
+ while (ISSPACE(*ips) || *ips == ',' || *ips == '\t')
+ ++ips;
+ addr = ips;
+ while (ISDIGIT(*ips) || *ips == '.' || *ips == ':')
+ ++ips;
+ buf = malloc(ips-addr+1);
+ if (!buf) return 4;
+ memcpy(buf, addr, ips-addr);
+ buf[ips-addr] = '\0';
+ r = evdns_nameserver_ip_add(buf);
+ free(buf);
+ if (r) return r;
+ }
+ return 0;
+}
+
+typedef DWORD(WINAPI *GetNetworkParams_fn_t)(FIXED_INFO *, DWORD*);
+
+/* Use the windows GetNetworkParams interface in iphlpapi.dll to */
+/* figure out what our nameservers are. */
+static int
+load_nameservers_with_getnetworkparams(void)
+{
+ /* Based on MSDN examples and inspection of c-ares code. */
+ FIXED_INFO *fixed;
+ HMODULE handle = 0;
+ ULONG size = sizeof(FIXED_INFO);
+ void *buf = NULL;
+ int status = 0, r, added_any;
+ IP_ADDR_STRING *ns;
+ GetNetworkParams_fn_t fn;
+
+ if (!(handle = LoadLibrary("iphlpapi.dll"))) {
+ log(EVDNS_LOG_WARN, "Could not open iphlpapi.dll");
+ status = -1;
+ goto done;
+ }
+ if (!(fn = (GetNetworkParams_fn_t) GetProcAddress(handle, "GetNetworkParams"))) {
+ log(EVDNS_LOG_WARN, "Could not get address of function.");
+ status = -1;
+ goto done;
+ }
+
+ buf = malloc(size);
+ if (!buf) { status = 4; goto done; }
+ fixed = buf;
+ r = fn(fixed, &size);
+ if (r != ERROR_SUCCESS && r != ERROR_BUFFER_OVERFLOW) {
+ status = -1;
+ goto done;
+ }
+ if (r != ERROR_SUCCESS) {
+ free(buf);
+ buf = malloc(size);
+ if (!buf) { status = 4; goto done; }
+ fixed = buf;
+ r = fn(fixed, &size);
+ if (r != ERROR_SUCCESS) {
+ log(EVDNS_LOG_DEBUG, "fn() failed.");
+ status = -1;
+ goto done;
+ }
+ }
+
+ assert(fixed);
+ added_any = 0;
+ ns = &(fixed->DnsServerList);
+ while (ns) {
+ r = evdns_nameserver_ip_add_line(ns->IpAddress.String);
+ if (r) {
+ log(EVDNS_LOG_DEBUG,"Could not add nameserver %s to list,error: %d",
+ (ns->IpAddress.String),(int)GetLastError());
+ status = r;
+ goto done;
+ } else {
+ log(EVDNS_LOG_DEBUG,"Succesfully added %s as nameserver",ns->IpAddress.String);
+ }
+
+ added_any++;
+ ns = ns->Next;
+ }
+
+ if (!added_any) {
+ log(EVDNS_LOG_DEBUG, "No nameservers added.");
+ status = -1;
+ }
+
+ done:
+ if (buf)
+ free(buf);
+ if (handle)
+ FreeLibrary(handle);
+ return status;
+}
+
+static int
+config_nameserver_from_reg_key(HKEY key, const char *subkey)
+{
+ char *buf;
+ DWORD bufsz = 0, type = 0;
+ int status = 0;
+
+ if (RegQueryValueEx(key, subkey, 0, &type, NULL, &bufsz)
+ != ERROR_MORE_DATA)
+ return -1;
+ if (!(buf = malloc(bufsz)))
+ return -1;
+
+ if (RegQueryValueEx(key, subkey, 0, &type, (LPBYTE)buf, &bufsz)
+ == ERROR_SUCCESS && bufsz > 1) {
+ status = evdns_nameserver_ip_add_line(buf);
+ }
+
+ free(buf);
+ return status;
+}
+
+#define SERVICES_KEY "System\\CurrentControlSet\\Services\\"
+#define WIN_NS_9X_KEY SERVICES_KEY "VxD\\MSTCP"
+#define WIN_NS_NT_KEY SERVICES_KEY "Tcpip\\Parameters"
+
+static int
+load_nameservers_from_registry(void)
+{
+ int found = 0;
+ int r;
+#define TRY(k, name) \
+ if (!found && config_nameserver_from_reg_key(k,name) == 0) { \
+ log(EVDNS_LOG_DEBUG,"Found nameservers in %s/%s",#k,name); \
+ found = 1; \
+ } else if (!found) { \
+ log(EVDNS_LOG_DEBUG,"Didn't find nameservers in %s/%s", \
+ #k,#name); \
+ }
+
+ if (((int)GetVersion()) > 0) { /* NT */
+ HKEY nt_key = 0, interfaces_key = 0;
+
+ if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_NT_KEY, 0,
+ KEY_READ, &nt_key) != ERROR_SUCCESS) {
+ log(EVDNS_LOG_DEBUG,"Couldn't open nt key, %d",(int)GetLastError());
+ return -1;
+ }
+ r = RegOpenKeyEx(nt_key, "Interfaces", 0,
+ KEY_QUERY_VALUE|KEY_ENUMERATE_SUB_KEYS,
+ &interfaces_key);
+ if (r != ERROR_SUCCESS) {
+ log(EVDNS_LOG_DEBUG,"Couldn't open interfaces key, %d",(int)GetLastError());
+ return -1;
+ }
+ TRY(nt_key, "NameServer");
+ TRY(nt_key, "DhcpNameServer");
+ TRY(interfaces_key, "NameServer");
+ TRY(interfaces_key, "DhcpNameServer");
+ RegCloseKey(interfaces_key);
+ RegCloseKey(nt_key);
+ } else {
+ HKEY win_key = 0;
+ if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, WIN_NS_9X_KEY, 0,
+ KEY_READ, &win_key) != ERROR_SUCCESS) {
+ log(EVDNS_LOG_DEBUG, "Couldn't open registry key, %d", (int)GetLastError());
+ return -1;
+ }
+ TRY(win_key, "NameServer");
+ RegCloseKey(win_key);
+ }
+
+ if (found == 0) {
+ log(EVDNS_LOG_WARN,"Didn't find any nameservers.");
+ }
+
+ return found ? 0 : -1;
+#undef TRY
+}
+
+int
+evdns_config_windows_nameservers(void)
+{
+ if (load_nameservers_with_getnetworkparams() == 0)
+ return 0;
+ return load_nameservers_from_registry();
+}
+#endif
+
+int
+evdns_init(void)
+{
+ int res = 0;
+#ifdef WIN32
+ res = evdns_config_windows_nameservers();
+#else
+ res = evdns_resolv_conf_parse(DNS_OPTIONS_ALL, "/etc/resolv.conf");
+#endif
+
+ return (res);
+}
+
+const char *
+evdns_err_to_string(int err)
+{
+ switch (err) {
+ case DNS_ERR_NONE: return "no error";
+ case DNS_ERR_FORMAT: return "misformatted query";
+ case DNS_ERR_SERVERFAILED: return "server failed";
+ case DNS_ERR_NOTEXIST: return "name does not exist";
+ case DNS_ERR_NOTIMPL: return "query not implemented";
+ case DNS_ERR_REFUSED: return "refused";
+
+ case DNS_ERR_TRUNCATED: return "reply truncated or ill-formed";
+ case DNS_ERR_UNKNOWN: return "unknown";
+ case DNS_ERR_TIMEOUT: return "request timed out";
+ case DNS_ERR_SHUTDOWN: return "dns subsystem shut down";
+ default: return "[Unknown error code]";
+ }
+}
+
+void
+evdns_shutdown(int fail_requests)
+{
+ struct nameserver *server, *server_next;
+ struct search_domain *dom, *dom_next;
+
+ while (req_head) {
+ if (fail_requests)
+ reply_callback(req_head, 0, DNS_ERR_SHUTDOWN, NULL);
+ request_finished(req_head, &req_head);
+ }
+ while (req_waiting_head) {
+ if (fail_requests)
+ reply_callback(req_waiting_head, 0, DNS_ERR_SHUTDOWN, NULL);
+ request_finished(req_waiting_head, &req_waiting_head);
+ }
+ global_requests_inflight = global_requests_waiting = 0;
+
+ for (server = server_head; server; server = server_next) {
+ server_next = server->next;
+ if (server->socket >= 0)
+ CLOSE_SOCKET(server->socket);
+ (void) event_del(&server->event);
+ if (server->state == 0)
+ (void) event_del(&server->timeout_event);
+ free(server);
+ if (server_next == server_head)
+ break;
+ }
+ server_head = NULL;
+ global_good_nameservers = 0;
+
+ if (global_search_state) {
+ for (dom = global_search_state->head; dom; dom = dom_next) {
+ dom_next = dom->next;
+ free(dom);
+ }
+ free(global_search_state);
+ global_search_state = NULL;
+ }
+ evdns_log_fn = NULL;
+}
+
+#ifdef EVDNS_MAIN
+void
+main_callback(int result, char type, int count, int ttl,
+ void *addrs, void *orig) {
+ char *n = (char*)orig;
+ int i;
+ for (i = 0; i < count; ++i) {
+ if (type == DNS_IPv4_A) {
+ printf("%s: %s\n", n, debug_ntoa(((u32*)addrs)[i]));
+ } else if (type == DNS_PTR) {
+ printf("%s: %s\n", n, ((char**)addrs)[i]);
+ }
+ }
+ if (!count) {
+ printf("%s: No answer (%d)\n", n, result);
+ }
+ fflush(stdout);
+}
+void
+evdns_server_callback(struct evdns_server_request *req, void *data)
+{
+ int i, r;
+ (void)data;
+ /* dummy; give 192.168.11.11 as an answer for all A questions,
+ * give foo.bar.example.com as an answer for all PTR questions. */
+ for (i = 0; i < req->nquestions; ++i) {
+ u32 ans = htonl(0xc0a80b0bUL);
+ if (req->questions[i]->type == EVDNS_TYPE_A &&
+ req->questions[i]->dns_question_class == EVDNS_CLASS_INET) {
+ printf(" -- replying for %s (A)\n", req->questions[i]->name);
+ r = evdns_server_request_add_a_reply(req, req->questions[i]->name,
+ 1, &ans, 10);
+ if (r<0)
+ printf("eeep, didn't work.\n");
+ } else if (req->questions[i]->type == EVDNS_TYPE_PTR &&
+ req->questions[i]->dns_question_class == EVDNS_CLASS_INET) {
+ printf(" -- replying for %s (PTR)\n", req->questions[i]->name);
+ r = evdns_server_request_add_ptr_reply(req, NULL, req->questions[i]->name,
+ "foo.bar.example.com", 10);
+ } else {
+ printf(" -- skipping %s [%d %d]\n", req->questions[i]->name,
+ req->questions[i]->type, req->questions[i]->dns_question_class);
+ }
+ }
+
+ r = evdns_request_respond(req, 0);
+ if (r<0)
+ printf("eeek, couldn't send reply.\n");
+}
+
+void
+logfn(int is_warn, const char *msg) {
+ (void) is_warn;
+ fprintf(stderr, "%s\n", msg);
+}
+int
+main(int c, char **v) {
+ int idx;
+ int reverse = 0, verbose = 1, servertest = 0;
+ if (c<2) {
+ fprintf(stderr, "syntax: %s [-x] [-v] hostname\n", v[0]);
+ fprintf(stderr, "syntax: %s [-servertest]\n", v[0]);
+ return 1;
+ }
+ idx = 1;
+ while (idx < c && v[idx][0] == '-') {
+ if (!strcmp(v[idx], "-x"))
+ reverse = 1;
+ else if (!strcmp(v[idx], "-v"))
+ verbose = 1;
+ else if (!strcmp(v[idx], "-servertest"))
+ servertest = 1;
+ else
+ fprintf(stderr, "Unknown option %s\n", v[idx]);
+ ++idx;
+ }
+ event_init();
+ if (verbose)
+ evdns_set_log_fn(logfn);
+ evdns_resolv_conf_parse(DNS_OPTION_NAMESERVERS, "/etc/resolv.conf");
+ if (servertest) {
+ int sock;
+ struct sockaddr_in my_addr;
+ sock = socket(PF_INET, SOCK_DGRAM, 0);
+ evutil_make_socket_nonblocking(sock);
+ my_addr.sin_family = AF_INET;
+ my_addr.sin_port = htons(10053);
+ my_addr.sin_addr.s_addr = INADDR_ANY;
+ if (bind(sock, (struct sockaddr*)&my_addr, sizeof(my_addr))<0) {
+ perror("bind");
+ exit(1);
+ }
+ evdns_add_server_port(sock, 0, evdns_server_callback, NULL);
+ }
+ for (; idx < c; ++idx) {
+ if (reverse) {
+ struct in_addr addr;
+ if (!inet_aton(v[idx], &addr)) {
+ fprintf(stderr, "Skipping non-IP %s\n", v[idx]);
+ continue;
+ }
+ fprintf(stderr, "resolving %s...\n",v[idx]);
+ evdns_resolve_reverse(&addr, 0, main_callback, v[idx]);
+ } else {
+ fprintf(stderr, "resolving (fwd) %s...\n",v[idx]);
+ evdns_resolve_ipv4(v[idx], 0, main_callback, v[idx]);
+ }
+ }
+ fflush(stdout);
+ event_dispatch();
+ return 0;
+}
+#endif
diff --git a/libevent/evdns.h b/libevent/evdns.h
new file mode 100644
index 00000000000..1eb5c382480
--- /dev/null
+++ b/libevent/evdns.h
@@ -0,0 +1,528 @@
+/*
+ * Copyright (c) 2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * The original DNS code is due to Adam Langley with heavy
+ * modifications by Nick Mathewson. Adam put his DNS software in the
+ * public domain. You can find his original copyright below. Please,
+ * aware that the code as part of libevent is governed by the 3-clause
+ * BSD license above.
+ *
+ * This software is Public Domain. To view a copy of the public domain dedication,
+ * visit http://creativecommons.org/licenses/publicdomain/ or send a letter to
+ * Creative Commons, 559 Nathan Abbott Way, Stanford, California 94305, USA.
+ *
+ * I ask and expect, but do not require, that all derivative works contain an
+ * attribution similar to:
+ * Parts developed by Adam Langley <agl@imperialviolet.org>
+ *
+ * You may wish to replace the word "Parts" with something else depending on
+ * the amount of original code.
+ *
+ * (Derivative works does not include programs which link against, run or include
+ * the source verbatim in their source distributions)
+ */
+
+/** @file evdns.h
+ *
+ * Welcome, gentle reader
+ *
+ * Async DNS lookups are really a whole lot harder than they should be,
+ * mostly stemming from the fact that the libc resolver has never been
+ * very good at them. Before you use this library you should see if libc
+ * can do the job for you with the modern async call getaddrinfo_a
+ * (see http://www.imperialviolet.org/page25.html#e498). Otherwise,
+ * please continue.
+ *
+ * This code is based on libevent and you must call event_init before
+ * any of the APIs in this file. You must also seed the OpenSSL random
+ * source if you are using OpenSSL for ids (see below).
+ *
+ * This library is designed to be included and shipped with your source
+ * code. You statically link with it. You should also test for the
+ * existence of strtok_r and define HAVE_STRTOK_R if you have it.
+ *
+ * The DNS protocol requires a good source of id numbers and these
+ * numbers should be unpredictable for spoofing reasons. There are
+ * three methods for generating them here and you must define exactly
+ * one of them. In increasing order of preference:
+ *
+ * DNS_USE_GETTIMEOFDAY_FOR_ID:
+ * Using the bottom 16 bits of the usec result from gettimeofday. This
+ * is a pretty poor solution but should work anywhere.
+ * DNS_USE_CPU_CLOCK_FOR_ID:
+ * Using the bottom 16 bits of the nsec result from the CPU's time
+ * counter. This is better, but may not work everywhere. Requires
+ * POSIX realtime support and you'll need to link against -lrt on
+ * glibc systems at least.
+ * DNS_USE_OPENSSL_FOR_ID:
+ * Uses the OpenSSL RAND_bytes call to generate the data. You must
+ * have seeded the pool before making any calls to this library.
+ *
+ * The library keeps track of the state of nameservers and will avoid
+ * them when they go down. Otherwise it will round robin between them.
+ *
+ * Quick start guide:
+ * #include "evdns.h"
+ * void callback(int result, char type, int count, int ttl,
+ * void *addresses, void *arg);
+ * evdns_resolv_conf_parse(DNS_OPTIONS_ALL, "/etc/resolv.conf");
+ * evdns_resolve("www.hostname.com", 0, callback, NULL);
+ *
+ * When the lookup is complete the callback function is called. The
+ * first argument will be one of the DNS_ERR_* defines in evdns.h.
+ * Hopefully it will be DNS_ERR_NONE, in which case type will be
+ * DNS_IPv4_A, count will be the number of IP addresses, ttl is the time
+ * which the data can be cached for (in seconds), addresses will point
+ * to an array of uint32_t's and arg will be whatever you passed to
+ * evdns_resolve.
+ *
+ * Searching:
+ *
+ * In order for this library to be a good replacement for glibc's resolver it
+ * supports searching. This involves setting a list of default domains, in
+ * which names will be queried for. The number of dots in the query name
+ * determines the order in which this list is used.
+ *
+ * Searching appears to be a single lookup from the point of view of the API,
+ * although many DNS queries may be generated from a single call to
+ * evdns_resolve. Searching can also drastically slow down the resolution
+ * of names.
+ *
+ * To disable searching:
+ * 1. Never set it up. If you never call evdns_resolv_conf_parse or
+ * evdns_search_add then no searching will occur.
+ *
+ * 2. If you do call evdns_resolv_conf_parse then don't pass
+ * DNS_OPTION_SEARCH (or DNS_OPTIONS_ALL, which implies it).
+ *
+ * 3. When calling evdns_resolve, pass the DNS_QUERY_NO_SEARCH flag.
+ *
+ * The order of searches depends on the number of dots in the name. If the
+ * number is greater than the ndots setting then the names is first tried
+ * globally. Otherwise each search domain is appended in turn.
+ *
+ * The ndots setting can either be set from a resolv.conf, or by calling
+ * evdns_search_ndots_set.
+ *
+ * For example, with ndots set to 1 (the default) and a search domain list of
+ * ["myhome.net"]:
+ * Query: www
+ * Order: www.myhome.net, www.
+ *
+ * Query: www.abc
+ * Order: www.abc., www.abc.myhome.net
+ *
+ * Internals:
+ *
+ * Requests are kept in two queues. The first is the inflight queue. In
+ * this queue requests have an allocated transaction id and nameserver.
+ * They will soon be transmitted if they haven't already been.
+ *
+ * The second is the waiting queue. The size of the inflight ring is
+ * limited and all other requests wait in waiting queue for space. This
+ * bounds the number of concurrent requests so that we don't flood the
+ * nameserver. Several algorithms require a full walk of the inflight
+ * queue and so bounding its size keeps thing going nicely under huge
+ * (many thousands of requests) loads.
+ *
+ * If a nameserver loses too many requests it is considered down and we
+ * try not to use it. After a while we send a probe to that nameserver
+ * (a lookup for google.com) and, if it replies, we consider it working
+ * again. If the nameserver fails a probe we wait longer to try again
+ * with the next probe.
+ */
+
+#ifndef EVENTDNS_H
+#define EVENTDNS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For integer types. */
+#include <evutil.h>
+
+/** Error codes 0-5 are as described in RFC 1035. */
+#define DNS_ERR_NONE 0
+/** The name server was unable to interpret the query */
+#define DNS_ERR_FORMAT 1
+/** The name server was unable to process this query due to a problem with the
+ * name server */
+#define DNS_ERR_SERVERFAILED 2
+/** The domain name does not exist */
+#define DNS_ERR_NOTEXIST 3
+/** The name server does not support the requested kind of query */
+#define DNS_ERR_NOTIMPL 4
+/** The name server refuses to reform the specified operation for policy
+ * reasons */
+#define DNS_ERR_REFUSED 5
+/** The reply was truncated or ill-formated */
+#define DNS_ERR_TRUNCATED 65
+/** An unknown error occurred */
+#define DNS_ERR_UNKNOWN 66
+/** Communication with the server timed out */
+#define DNS_ERR_TIMEOUT 67
+/** The request was canceled because the DNS subsystem was shut down. */
+#define DNS_ERR_SHUTDOWN 68
+
+#define DNS_IPv4_A 1
+#define DNS_PTR 2
+#define DNS_IPv6_AAAA 3
+
+#define DNS_QUERY_NO_SEARCH 1
+
+#define DNS_OPTION_SEARCH 1
+#define DNS_OPTION_NAMESERVERS 2
+#define DNS_OPTION_MISC 4
+#define DNS_OPTIONS_ALL 7
+
+/**
+ * The callback that contains the results from a lookup.
+ * - type is either DNS_IPv4_A or DNS_PTR or DNS_IPv6_AAAA
+ * - count contains the number of addresses of form type
+ * - ttl is the number of seconds the resolution may be cached for.
+ * - addresses needs to be cast according to type
+ */
+typedef void (*evdns_callback_type) (int result, char type, int count, int ttl, void *addresses, void *arg);
+
+/**
+ Initialize the asynchronous DNS library.
+
+ This function initializes support for non-blocking name resolution by
+ calling evdns_resolv_conf_parse() on UNIX and
+ evdns_config_windows_nameservers() on Windows.
+
+ @return 0 if successful, or -1 if an error occurred
+ @see evdns_shutdown()
+ */
+int evdns_init(void);
+
+
+/**
+ Shut down the asynchronous DNS resolver and terminate all active requests.
+
+ If the 'fail_requests' option is enabled, all active requests will return
+ an empty result with the error flag set to DNS_ERR_SHUTDOWN. Otherwise,
+ the requests will be silently discarded.
+
+ @param fail_requests if zero, active requests will be aborted; if non-zero,
+ active requests will return DNS_ERR_SHUTDOWN.
+ @see evdns_init()
+ */
+void evdns_shutdown(int fail_requests);
+
+
+/**
+ Convert a DNS error code to a string.
+
+ @param err the DNS error code
+ @return a string containing an explanation of the error code
+*/
+const char *evdns_err_to_string(int err);
+
+
+/**
+ Add a nameserver.
+
+ The address should be an IPv4 address in network byte order.
+ The type of address is chosen so that it matches in_addr.s_addr.
+
+ @param address an IP address in network byte order
+ @return 0 if successful, or -1 if an error occurred
+ @see evdns_nameserver_ip_add()
+ */
+int evdns_nameserver_add(unsigned long int address);
+
+
+/**
+ Get the number of configured nameservers.
+
+ This returns the number of configured nameservers (not necessarily the
+ number of running nameservers). This is useful for double-checking
+ whether our calls to the various nameserver configuration functions
+ have been successful.
+
+ @return the number of configured nameservers
+ @see evdns_nameserver_add()
+ */
+int evdns_count_nameservers(void);
+
+
+/**
+ Remove all configured nameservers, and suspend all pending resolves.
+
+ Resolves will not necessarily be re-attempted until evdns_resume() is called.
+
+ @return 0 if successful, or -1 if an error occurred
+ @see evdns_resume()
+ */
+int evdns_clear_nameservers_and_suspend(void);
+
+
+/**
+ Resume normal operation and continue any suspended resolve requests.
+
+ Re-attempt resolves left in limbo after an earlier call to
+ evdns_clear_nameservers_and_suspend().
+
+ @return 0 if successful, or -1 if an error occurred
+ @see evdns_clear_nameservers_and_suspend()
+ */
+int evdns_resume(void);
+
+
+/**
+ Add a nameserver.
+
+ This wraps the evdns_nameserver_add() function by parsing a string as an IP
+ address and adds it as a nameserver.
+
+ @return 0 if successful, or -1 if an error occurred
+ @see evdns_nameserver_add()
+ */
+int evdns_nameserver_ip_add(const char *ip_as_string);
+
+
+/**
+ Lookup an A record for a given name.
+
+ @param name a DNS hostname
+ @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+ @param callback a callback function to invoke when the request is completed
+ @param ptr an argument to pass to the callback function
+ @return 0 if successful, or -1 if an error occurred
+ @see evdns_resolve_ipv6(), evdns_resolve_reverse(), evdns_resolve_reverse_ipv6()
+ */
+int evdns_resolve_ipv4(const char *name, int flags, evdns_callback_type callback, void *ptr);
+
+
+/**
+ Lookup an AAAA record for a given name.
+
+ @param name a DNS hostname
+ @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+ @param callback a callback function to invoke when the request is completed
+ @param ptr an argument to pass to the callback function
+ @return 0 if successful, or -1 if an error occurred
+ @see evdns_resolve_ipv4(), evdns_resolve_reverse(), evdns_resolve_reverse_ipv6()
+ */
+int evdns_resolve_ipv6(const char *name, int flags, evdns_callback_type callback, void *ptr);
+
+struct in_addr;
+struct in6_addr;
+
+/**
+ Lookup a PTR record for a given IP address.
+
+ @param in an IPv4 address
+ @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+ @param callback a callback function to invoke when the request is completed
+ @param ptr an argument to pass to the callback function
+ @return 0 if successful, or -1 if an error occurred
+ @see evdns_resolve_reverse_ipv6()
+ */
+int evdns_resolve_reverse(const struct in_addr *in, int flags, evdns_callback_type callback, void *ptr);
+
+
+/**
+ Lookup a PTR record for a given IPv6 address.
+
+ @param in an IPv6 address
+ @param flags either 0, or DNS_QUERY_NO_SEARCH to disable searching for this query.
+ @param callback a callback function to invoke when the request is completed
+ @param ptr an argument to pass to the callback function
+ @return 0 if successful, or -1 if an error occurred
+ @see evdns_resolve_reverse_ipv6()
+ */
+int evdns_resolve_reverse_ipv6(const struct in6_addr *in, int flags, evdns_callback_type callback, void *ptr);
+
+
+/**
+ Set the value of a configuration option.
+
+ The currently available configuration options are:
+
+ ndots, timeout, max-timeouts, max-inflight, and attempts
+
+ @param option the name of the configuration option to be modified
+ @param val the value to be set
+ @param flags either 0 | DNS_OPTION_SEARCH | DNS_OPTION_MISC
+ @return 0 if successful, or -1 if an error occurred
+ */
+int evdns_set_option(const char *option, const char *val, int flags);
+
+
+/**
+ Parse a resolv.conf file.
+
+ The 'flags' parameter determines what information is parsed from the
+ resolv.conf file. See the man page for resolv.conf for the format of this
+ file.
+
+ The following directives are not parsed from the file: sortlist, rotate,
+ no-check-names, inet6, debug.
+
+ If this function encounters an error, the possible return values are: 1 =
+ failed to open file, 2 = failed to stat file, 3 = file too large, 4 = out of
+ memory, 5 = short read from file, 6 = no nameservers listed in the file
+
+ @param flags any of DNS_OPTION_NAMESERVERS|DNS_OPTION_SEARCH|DNS_OPTION_MISC|
+ DNS_OPTIONS_ALL
+ @param filename the path to the resolv.conf file
+ @return 0 if successful, or various positive error codes if an error
+ occurred (see above)
+ @see resolv.conf(3), evdns_config_windows_nameservers()
+ */
+int evdns_resolv_conf_parse(int flags, const char *const filename);
+
+
+/**
+ Obtain nameserver information using the Windows API.
+
+ Attempt to configure a set of nameservers based on platform settings on
+ a win32 host. Preferentially tries to use GetNetworkParams; if that fails,
+ looks in the registry.
+
+ @return 0 if successful, or -1 if an error occurred
+ @see evdns_resolv_conf_parse()
+ */
+#ifdef WIN32
+int evdns_config_windows_nameservers(void);
+#endif
+
+
+/**
+ Clear the list of search domains.
+ */
+void evdns_search_clear(void);
+
+
+/**
+ Add a domain to the list of search domains
+
+ @param domain the domain to be added to the search list
+ */
+void evdns_search_add(const char *domain);
+
+
+/**
+ Set the 'ndots' parameter for searches.
+
+ Sets the number of dots which, when found in a name, causes
+ the first query to be without any search domain.
+
+ @param ndots the new ndots parameter
+ */
+void evdns_search_ndots_set(const int ndots);
+
+/**
+ A callback that is invoked when a log message is generated
+
+ @param is_warning indicates if the log message is a 'warning'
+ @param msg the content of the log message
+ */
+typedef void (*evdns_debug_log_fn_type)(int is_warning, const char *msg);
+
+
+/**
+ Set the callback function to handle log messages.
+
+ @param fn the callback to be invoked when a log message is generated
+ */
+void evdns_set_log_fn(evdns_debug_log_fn_type fn);
+
+/**
+ Set a callback that will be invoked to generate transaction IDs. By
+ default, we pick transaction IDs based on the current clock time.
+
+ @param fn the new callback, or NULL to use the default.
+ */
+void evdns_set_transaction_id_fn(ev_uint16_t (*fn)(void));
+
+#define DNS_NO_SEARCH 1
+
+/*
+ * Structures and functions used to implement a DNS server.
+ */
+
+struct evdns_server_request {
+ int flags;
+ int nquestions;
+ struct evdns_server_question **questions;
+};
+struct evdns_server_question {
+ int type;
+#ifdef __cplusplus
+ int dns_question_class;
+#else
+ /* You should refer to this field as "dns_question_class". The
+ * name "class" works in C for backward compatibility, and will be
+ * removed in a future version. (1.5 or later). */
+ int class;
+#define dns_question_class class
+#endif
+ char name[1];
+};
+typedef void (*evdns_request_callback_fn_type)(struct evdns_server_request *, void *);
+#define EVDNS_ANSWER_SECTION 0
+#define EVDNS_AUTHORITY_SECTION 1
+#define EVDNS_ADDITIONAL_SECTION 2
+
+#define EVDNS_TYPE_A 1
+#define EVDNS_TYPE_NS 2
+#define EVDNS_TYPE_CNAME 5
+#define EVDNS_TYPE_SOA 6
+#define EVDNS_TYPE_PTR 12
+#define EVDNS_TYPE_MX 15
+#define EVDNS_TYPE_TXT 16
+#define EVDNS_TYPE_AAAA 28
+
+#define EVDNS_QTYPE_AXFR 252
+#define EVDNS_QTYPE_ALL 255
+
+#define EVDNS_CLASS_INET 1
+
+struct evdns_server_port *evdns_add_server_port(int socket, int is_tcp, evdns_request_callback_fn_type callback, void *user_data);
+void evdns_close_server_port(struct evdns_server_port *port);
+
+int evdns_server_request_add_reply(struct evdns_server_request *req, int section, const char *name, int type, int dns_class, int ttl, int datalen, int is_name, const char *data);
+int evdns_server_request_add_a_reply(struct evdns_server_request *req, const char *name, int n, void *addrs, int ttl);
+int evdns_server_request_add_aaaa_reply(struct evdns_server_request *req, const char *name, int n, void *addrs, int ttl);
+int evdns_server_request_add_ptr_reply(struct evdns_server_request *req, struct in_addr *in, const char *inaddr_name, const char *hostname, int ttl);
+int evdns_server_request_add_cname_reply(struct evdns_server_request *req, const char *name, const char *cname, int ttl);
+
+int evdns_server_request_respond(struct evdns_server_request *req, int err);
+int evdns_server_request_drop(struct evdns_server_request *req);
+struct sockaddr;
+int evdns_server_request_get_requesting_addr(struct evdns_server_request *_req, struct sockaddr *sa, int addr_len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !EVENTDNS_H */
diff --git a/libevent/event-internal.h b/libevent/event-internal.h
new file mode 100644
index 00000000000..6436b3358bd
--- /dev/null
+++ b/libevent/event-internal.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVENT_INTERNAL_H_
+#define _EVENT_INTERNAL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "config.h"
+#include "min_heap.h"
+#include "evsignal.h"
+
+struct eventop {
+ const char *name;
+ void *(*init)(struct event_base *);
+ int (*add)(void *, struct event *);
+ int (*del)(void *, struct event *);
+ int (*dispatch)(struct event_base *, void *, struct timeval *);
+ void (*dealloc)(struct event_base *, void *);
+ /* set if we need to reinitialize the event base */
+ int need_reinit;
+};
+
+struct event_base {
+ const struct eventop *evsel;
+ void *evbase;
+ int event_count; /* counts number of total events */
+ int event_count_active; /* counts number of active events */
+
+ int event_gotterm; /* Set to terminate loop */
+ int event_break; /* Set to terminate loop immediately */
+
+ /* active event management */
+ struct event_list **activequeues;
+ int nactivequeues;
+
+ /* signal handling info */
+ struct evsignal_info sig;
+
+ struct event_list eventqueue;
+ struct timeval event_tv;
+
+ struct min_heap timeheap;
+
+ struct timeval tv_cache;
+};
+
+/* Internal use only: Functions that might be missing from <sys/queue.h> */
+#ifndef HAVE_TAILQFOREACH
+#define TAILQ_FIRST(head) ((head)->tqh_first)
+#define TAILQ_END(head) NULL
+#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+#define TAILQ_FOREACH(var, head, field) \
+ for((var) = TAILQ_FIRST(head); \
+ (var) != TAILQ_END(head); \
+ (var) = TAILQ_NEXT(var, field))
+#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
+ (elm)->field.tqe_next = (listelm); \
+ *(listelm)->field.tqe_prev = (elm); \
+ (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
+} while (0)
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+
+#endif /* TAILQ_FOREACH */
+
+int _evsignal_set_handler(struct event_base *base, int evsignal,
+ void (*fn)(int));
+int _evsignal_restore_handler(struct event_base *base, int evsignal);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EVENT_INTERNAL_H_ */
diff --git a/libevent/event.3 b/libevent/event.3
new file mode 100644
index 00000000000..5b33ec64a93
--- /dev/null
+++ b/libevent/event.3
@@ -0,0 +1,624 @@
+.\" $OpenBSD: event.3,v 1.4 2002/07/12 18:50:48 provos Exp $
+.\"
+.\" Copyright (c) 2000 Artur Grabowski <art@openbsd.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\"
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. The name of the author may not be used to endorse or promote products
+.\" derived from this software without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+.\" AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+.\" THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+.\" EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+.\" PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+.\" OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+.\" OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+.\" ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd August 8, 2000
+.Dt EVENT 3
+.Os
+.Sh NAME
+.Nm event_init ,
+.Nm event_dispatch ,
+.Nm event_loop ,
+.Nm event_loopexit ,
+.Nm event_loopbreak ,
+.Nm event_set ,
+.Nm event_base_dispatch ,
+.Nm event_base_loop ,
+.Nm event_base_loopexit ,
+.Nm event_base_loopbreak ,
+.Nm event_base_set ,
+.Nm event_base_free ,
+.Nm event_add ,
+.Nm event_del ,
+.Nm event_once ,
+.Nm event_base_once ,
+.Nm event_pending ,
+.Nm event_initialized ,
+.Nm event_priority_init ,
+.Nm event_priority_set ,
+.Nm evtimer_set ,
+.Nm evtimer_add ,
+.Nm evtimer_del ,
+.Nm evtimer_pending ,
+.Nm evtimer_initialized ,
+.Nm signal_set ,
+.Nm signal_add ,
+.Nm signal_del ,
+.Nm signal_pending ,
+.Nm signal_initialized ,
+.Nm bufferevent_new ,
+.Nm bufferevent_free ,
+.Nm bufferevent_write ,
+.Nm bufferevent_write_buffer ,
+.Nm bufferevent_read ,
+.Nm bufferevent_enable ,
+.Nm bufferevent_disable ,
+.Nm bufferevent_settimeout ,
+.Nm bufferevent_base_set ,
+.Nm evbuffer_new ,
+.Nm evbuffer_free ,
+.Nm evbuffer_add ,
+.Nm evbuffer_add_buffer ,
+.Nm evbuffer_add_printf ,
+.Nm evbuffer_add_vprintf ,
+.Nm evbuffer_drain ,
+.Nm evbuffer_write ,
+.Nm evbuffer_read ,
+.Nm evbuffer_find ,
+.Nm evbuffer_readline ,
+.Nm evhttp_new ,
+.Nm evhttp_bind_socket ,
+.Nm evhttp_free
+.Nd execute a function when a specific event occurs
+.Sh SYNOPSIS
+.Fd #include <sys/time.h>
+.Fd #include <event.h>
+.Ft "struct event_base *"
+.Fn "event_init" "void"
+.Ft int
+.Fn "event_dispatch" "void"
+.Ft int
+.Fn "event_loop" "int flags"
+.Ft int
+.Fn "event_loopexit" "struct timeval *tv"
+.Ft int
+.Fn "event_loopbreak" "void"
+.Ft void
+.Fn "event_set" "struct event *ev" "int fd" "short event" "void (*fn)(int, short, void *)" "void *arg"
+.Ft int
+.Fn "event_base_dispatch" "struct event_base *base"
+.Ft int
+.Fn "event_base_loop" "struct event_base *base" "int flags"
+.Ft int
+.Fn "event_base_loopexit" "struct event_base *base" "struct timeval *tv"
+.Ft int
+.Fn "event_base_loopbreak" "struct event_base *base"
+.Ft int
+.Fn "event_base_set" "struct event_base *base" "struct event *"
+.Ft void
+.Fn "event_base_free" "struct event_base *base"
+.Ft int
+.Fn "event_add" "struct event *ev" "struct timeval *tv"
+.Ft int
+.Fn "event_del" "struct event *ev"
+.Ft int
+.Fn "event_once" "int fd" "short event" "void (*fn)(int, short, void *)" "void *arg" "struct timeval *tv"
+.Ft int
+.Fn "event_base_once" "struct event_base *base" "int fd" "short event" "void (*fn)(int, short, void *)" "void *arg" "struct timeval *tv"
+.Ft int
+.Fn "event_pending" "struct event *ev" "short event" "struct timeval *tv"
+.Ft int
+.Fn "event_initialized" "struct event *ev"
+.Ft int
+.Fn "event_priority_init" "int npriorities"
+.Ft int
+.Fn "event_priority_set" "struct event *ev" "int priority"
+.Ft void
+.Fn "evtimer_set" "struct event *ev" "void (*fn)(int, short, void *)" "void *arg"
+.Ft void
+.Fn "evtimer_add" "struct event *ev" "struct timeval *"
+.Ft void
+.Fn "evtimer_del" "struct event *ev"
+.Ft int
+.Fn "evtimer_pending" "struct event *ev" "struct timeval *tv"
+.Ft int
+.Fn "evtimer_initialized" "struct event *ev"
+.Ft void
+.Fn "signal_set" "struct event *ev" "int signal" "void (*fn)(int, short, void *)" "void *arg"
+.Ft void
+.Fn "signal_add" "struct event *ev" "struct timeval *"
+.Ft void
+.Fn "signal_del" "struct event *ev"
+.Ft int
+.Fn "signal_pending" "struct event *ev" "struct timeval *tv"
+.Ft int
+.Fn "signal_initialized" "struct event *ev"
+.Ft "struct bufferevent *"
+.Fn "bufferevent_new" "int fd" "evbuffercb readcb" "evbuffercb writecb" "everrorcb" "void *cbarg"
+.Ft void
+.Fn "bufferevent_free" "struct bufferevent *bufev"
+.Ft int
+.Fn "bufferevent_write" "struct bufferevent *bufev" "void *data" "size_t size"
+.Ft int
+.Fn "bufferevent_write_buffer" "struct bufferevent *bufev" "struct evbuffer *buf"
+.Ft size_t
+.Fn "bufferevent_read" "struct bufferevent *bufev" "void *data" "size_t size"
+.Ft int
+.Fn "bufferevent_enable" "struct bufferevent *bufev" "short event"
+.Ft int
+.Fn "bufferevent_disable" "struct bufferevent *bufev" "short event"
+.Ft void
+.Fn "bufferevent_settimeout" "struct bufferevent *bufev" "int timeout_read" "int timeout_write"
+.Ft int
+.Fn "bufferevent_base_set" "struct event_base *base" "struct bufferevent *bufev"
+.Ft "struct evbuffer *"
+.Fn "evbuffer_new" "void"
+.Ft void
+.Fn "evbuffer_free" "struct evbuffer *buf"
+.Ft int
+.Fn "evbuffer_add" "struct evbuffer *buf" "const void *data" "size_t size"
+.Ft int
+.Fn "evbuffer_add_buffer" "struct evbuffer *dst" "struct evbuffer *src"
+.Ft int
+.Fn "evbuffer_add_printf" "struct evbuffer *buf" "const char *fmt" "..."
+.Ft int
+.Fn "evbuffer_add_vprintf" "struct evbuffer *buf" "const char *fmt" "va_list ap"
+.Ft void
+.Fn "evbuffer_drain" "struct evbuffer *buf" "size_t size"
+.Ft int
+.Fn "evbuffer_write" "struct evbuffer *buf" "int fd"
+.Ft int
+.Fn "evbuffer_read" "struct evbuffer *buf" "int fd" "int size"
+.Ft "u_char *"
+.Fn "evbuffer_find" "struct evbuffer *buf" "const u_char *data" "size_t size"
+.Ft "char *"
+.Fn "evbuffer_readline" "struct evbuffer *buf"
+.Ft "struct evhttp *"
+.Fn "evhttp_new" "struct event_base *base"
+.Ft int
+.Fn "evhttp_bind_socket" "struct evhttp *http" "const char *address" "u_short port"
+.Ft "void"
+.Fn "evhttp_free" "struct evhttp *http"
+.Ft int
+.Fa (*event_sigcb)(void) ;
+.Ft volatile sig_atomic_t
+.Fa event_gotsig ;
+.Sh DESCRIPTION
+The
+.Nm event
+API provides a mechanism to execute a function when a specific event
+on a file descriptor occurs or after a given time has passed.
+.Pp
+The
+.Nm event
+API needs to be initialized with
+.Fn event_init
+before it can be used.
+.Pp
+In order to process events, an application needs to call
+.Fn event_dispatch .
+This function only returns on error, and should replace the event core
+of the application program.
+.Pp
+The function
+.Fn event_set
+prepares the event structure
+.Fa ev
+to be used in future calls to
+.Fn event_add
+and
+.Fn event_del .
+The event will be prepared to call the function specified by the
+.Fa fn
+argument with an
+.Fa int
+argument indicating the file descriptor, a
+.Fa short
+argument indicating the type of event, and a
+.Fa void *
+argument given in the
+.Fa arg
+argument.
+The
+.Fa fd
+indicates the file descriptor that should be monitored for events.
+The events can be either
+.Va EV_READ ,
+.Va EV_WRITE ,
+or both,
+indicating that an application can read or write from the file descriptor
+respectively without blocking.
+.Pp
+The function
+.Fa fn
+will be called with the file descriptor that triggered the event and
+the type of event which will be either
+.Va EV_TIMEOUT ,
+.Va EV_SIGNAL ,
+.Va EV_READ ,
+or
+.Va EV_WRITE .
+Additionally, an event which has registered interest in more than one of the
+preceeding events, via bitwise-OR to
+.Fn event_set ,
+can provide its callback function with a bitwise-OR of more than one triggered
+event.
+The additional flag
+.Va EV_PERSIST
+makes an
+.Fn event_add
+persistent until
+.Fn event_del
+has been called.
+.Pp
+Once initialized, the
+.Fa ev
+structure can be used repeatedly with
+.Fn event_add
+and
+.Fn event_del
+and does not need to be reinitialized unless the function called and/or
+the argument to it are to be changed.
+However, when an
+.Fa ev
+structure has been added to libevent using
+.Fn event_add
+the structure must persist until the event occurs (assuming
+.Fa EV_PERSIST
+is not set) or is removed
+using
+.Fn event_del .
+You may not reuse the same
+.Fa ev
+structure for multiple monitored descriptors; each descriptor
+needs its own
+.Fa ev .
+.Pp
+The function
+.Fn event_add
+schedules the execution of the
+.Fa ev
+event when the event specified in
+.Fn event_set
+occurs or in at least the time specified in the
+.Fa tv .
+If
+.Fa tv
+is
+.Dv NULL ,
+no timeout occurs and the function will only be called
+if a matching event occurs on the file descriptor.
+The event in the
+.Fa ev
+argument must be already initialized by
+.Fn event_set
+and may not be used in calls to
+.Fn event_set
+until it has timed out or been removed with
+.Fn event_del .
+If the event in the
+.Fa ev
+argument already has a scheduled timeout, the old timeout will be
+replaced by the new one.
+.Pp
+The function
+.Fn event_del
+will cancel the event in the argument
+.Fa ev .
+If the event has already executed or has never been added
+the call will have no effect.
+.Pp
+The functions
+.Fn evtimer_set ,
+.Fn evtimer_add ,
+.Fn evtimer_del ,
+.Fn evtimer_initialized ,
+and
+.Fn evtimer_pending
+are abbreviations for common situations where only a timeout is required.
+The file descriptor passed will be \-1, and the event type will be
+.Va EV_TIMEOUT .
+.Pp
+The functions
+.Fn signal_set ,
+.Fn signal_add ,
+.Fn signal_del ,
+.Fn signal_initialized ,
+and
+.Fn signal_pending
+are abbreviations.
+The event type will be a persistent
+.Va EV_SIGNAL .
+That means
+.Fn signal_set
+adds
+.Va EV_PERSIST .
+.Pp
+In order to avoid races in signal handlers, the
+.Nm event
+API provides two variables:
+.Va event_sigcb
+and
+.Va event_gotsig .
+A signal handler
+sets
+.Va event_gotsig
+to indicate that a signal has been received.
+The application sets
+.Va event_sigcb
+to a callback function.
+After the signal handler sets
+.Va event_gotsig ,
+.Nm event_dispatch
+will execute the callback function to process received signals.
+The callback returns 1 when no events are registered any more.
+It can return \-1 to indicate an error to the
+.Nm event
+library, causing
+.Fn event_dispatch
+to terminate with
+.Va errno
+set to
+.Er EINTR .
+.Pp
+The function
+.Fn event_once
+is similar to
+.Fn event_set .
+However, it schedules a callback to be called exactly once and does not
+require the caller to prepare an
+.Fa event
+structure.
+This function supports
+.Fa EV_TIMEOUT ,
+.Fa EV_READ ,
+and
+.Fa EV_WRITE .
+.Pp
+The
+.Fn event_pending
+function can be used to check if the event specified by
+.Fa event
+is pending to run.
+If
+.Va EV_TIMEOUT
+was specified and
+.Fa tv
+is not
+.Dv NULL ,
+the expiration time of the event will be returned in
+.Fa tv .
+.Pp
+The
+.Fn event_initialized
+macro can be used to check if an event has been initialized.
+.Pp
+The
+.Nm event_loop
+function provides an interface for single pass execution of pending
+events.
+The flags
+.Va EVLOOP_ONCE
+and
+.Va EVLOOP_NONBLOCK
+are recognized.
+The
+.Nm event_loopexit
+function exits from the event loop. The next
+.Fn event_loop
+iteration after the
+given timer expires will complete normally (handling all queued events) then
+exit without blocking for events again. Subsequent invocations of
+.Fn event_loop
+will proceed normally.
+The
+.Nm event_loopbreak
+function exits from the event loop immediately.
+.Fn event_loop
+will abort after the next event is completed;
+.Fn event_loopbreak
+is typically invoked from this event's callback. This behavior is analogous
+to the "break;" statement. Subsequent invocations of
+.Fn event_loop
+will proceed normally.
+.Pp
+It is the responsibility of the caller to provide these functions with
+pre-allocated event structures.
+.Pp
+.Sh EVENT PRIORITIES
+By default
+.Nm libevent
+schedules all active events with the same priority.
+However, sometimes it is desirable to process some events with a higher
+priority than others.
+For that reason,
+.Nm libevent
+supports strict priority queues.
+Active events with a lower priority are always processed before events
+with a higher priority.
+.Pp
+The number of different priorities can be set initially with the
+.Fn event_priority_init
+function.
+This function should be called before the first call to
+.Fn event_dispatch .
+The
+.Fn event_priority_set
+function can be used to assign a priority to an event.
+By default,
+.Nm libevent
+assigns the middle priority to all events unless their priority
+is explicitly set.
+.Sh THREAD SAFE EVENTS
+.Nm Libevent
+has experimental support for thread-safe events.
+When initializing the library via
+.Fn event_init ,
+an event base is returned.
+This event base can be used in conjunction with calls to
+.Fn event_base_set ,
+.Fn event_base_dispatch ,
+.Fn event_base_loop ,
+.Fn event_base_loopexit ,
+.Fn bufferevent_base_set
+and
+.Fn event_base_free .
+.Fn event_base_set
+should be called after preparing an event with
+.Fn event_set ,
+as
+.Fn event_set
+assigns the provided event to the most recently created event base.
+.Fn bufferevent_base_set
+should be called after preparing a bufferevent with
+.Fn bufferevent_new .
+.Fn event_base_free
+should be used to free memory associated with the event base
+when it is no longer needed.
+.Sh BUFFERED EVENTS
+.Nm libevent
+provides an abstraction on top of the regular event callbacks.
+This abstraction is called a
+.Va "buffered event" .
+A buffered event provides input and output buffers that get filled
+and drained automatically.
+The user of a buffered event no longer deals directly with the IO,
+but instead is reading from input and writing to output buffers.
+.Pp
+A new bufferevent is created by
+.Fn bufferevent_new .
+The parameter
+.Fa fd
+specifies the file descriptor from which data is read and written to.
+This file descriptor is not allowed to be a
+.Xr pipe 2 .
+The next three parameters are callbacks.
+The read and write callback have the following form:
+.Ft void
+.Fn "(*cb)" "struct bufferevent *bufev" "void *arg" .
+The error callback has the following form:
+.Ft void
+.Fn "(*cb)" "struct bufferevent *bufev" "short what" "void *arg" .
+The argument is specified by the fourth parameter
+.Fa "cbarg" .
+A
+.Fa bufferevent struct
+pointer is returned on success, NULL on error.
+Both the read and the write callback may be NULL.
+The error callback has to be always provided.
+.Pp
+Once initialized, the bufferevent structure can be used repeatedly with
+bufferevent_enable() and bufferevent_disable().
+The flags parameter can be a combination of
+.Va EV_READ
+and
+.Va EV_WRITE .
+When read enabled the bufferevent will try to read from the file
+descriptor and call the read callback.
+The write callback is executed
+whenever the output buffer is drained below the write low watermark,
+which is
+.Va 0
+by default.
+.Pp
+The
+.Fn bufferevent_write
+function can be used to write data to the file descriptor.
+The data is appended to the output buffer and written to the descriptor
+automatically as it becomes available for writing.
+.Fn bufferevent_write
+returns 0 on success or \-1 on failure.
+The
+.Fn bufferevent_read
+function is used to read data from the input buffer,
+returning the amount of data read.
+.Pp
+If multiple bases are in use, bufferevent_base_set() must be called before
+enabling the bufferevent for the first time.
+.Sh NON-BLOCKING HTTP SUPPORT
+.Nm libevent
+provides a very thin HTTP layer that can be used both to host an HTTP
+server and also to make HTTP requests.
+An HTTP server can be created by calling
+.Fn evhttp_new .
+It can be bound to any port and address with the
+.Fn evhttp_bind_socket
+function.
+When the HTTP server is no longer used, it can be freed via
+.Fn evhttp_free .
+.Pp
+To be notified of HTTP requests, a user needs to register callbacks with the
+HTTP server.
+This can be done by calling
+.Fn evhttp_set_cb .
+The second argument is the URI for which a callback is being registered.
+The corresponding callback will receive an
+.Va struct evhttp_request
+object that contains all information about the request.
+.Pp
+This section does not document all the possible function calls; please
+check
+.Va event.h
+for the public interfaces.
+.Sh ADDITIONAL NOTES
+It is possible to disable support for
+.Va epoll , kqueue , devpoll , poll
+or
+.Va select
+by setting the environment variable
+.Va EVENT_NOEPOLL , EVENT_NOKQUEUE , EVENT_NODEVPOLL , EVENT_NOPOLL
+or
+.Va EVENT_NOSELECT ,
+respectively.
+By setting the environment variable
+.Va EVENT_SHOW_METHOD ,
+.Nm libevent
+displays the kernel notification method that it uses.
+.Sh RETURN VALUES
+Upon successful completion
+.Fn event_add
+and
+.Fn event_del
+return 0.
+Otherwise, \-1 is returned and the global variable errno is
+set to indicate the error.
+.Sh SEE ALSO
+.Xr kqueue 2 ,
+.Xr poll 2 ,
+.Xr select 2 ,
+.Xr evdns 3 ,
+.Xr timeout 9
+.Sh HISTORY
+The
+.Nm event
+API manpage is based on the
+.Xr timeout 9
+manpage by Artur Grabowski.
+The port of
+.Nm libevent
+to Windows is due to Michael A. Davis.
+Support for real-time signals is due to Taral.
+.Sh AUTHORS
+The
+.Nm event
+library was written by Niels Provos.
+.Sh BUGS
+This documentation is neither complete nor authoritative.
+If you are in doubt about the usage of this API then
+check the source code to find out how it works, write
+up the missing piece of documentation and send it to
+me for inclusion in this man page.
diff --git a/libevent/event.c b/libevent/event.c
new file mode 100644
index 00000000000..6eb5db05c87
--- /dev/null
+++ b/libevent/event.c
@@ -0,0 +1,1025 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#include <sys/queue.h>
+#include <stdio.h>
+#include <stdlib.h>
+#ifndef WIN32
+#include <unistd.h>
+#endif
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <assert.h>
+#include <time.h>
+
+#include "event.h"
+#include "event-internal.h"
+#include "evutil.h"
+#include "log.h"
+
+#ifdef HAVE_EVENT_PORTS
+extern const struct eventop evportops;
+#endif
+#ifdef HAVE_SELECT
+extern const struct eventop selectops;
+#endif
+#ifdef HAVE_POLL
+extern const struct eventop pollops;
+#endif
+#ifdef HAVE_EPOLL
+extern const struct eventop epollops;
+#endif
+#ifdef HAVE_WORKING_KQUEUE
+extern const struct eventop kqops;
+#endif
+#ifdef HAVE_DEVPOLL
+extern const struct eventop devpollops;
+#endif
+#ifdef WIN32
+extern const struct eventop win32ops;
+#endif
+
+/* In order of preference */
+static const struct eventop *eventops[] = {
+#ifdef HAVE_EVENT_PORTS
+ &evportops,
+#endif
+#ifdef HAVE_WORKING_KQUEUE
+ &kqops,
+#endif
+#ifdef HAVE_EPOLL
+ &epollops,
+#endif
+#ifdef HAVE_DEVPOLL
+ &devpollops,
+#endif
+#ifdef HAVE_POLL
+ &pollops,
+#endif
+#ifdef HAVE_SELECT
+ &selectops,
+#endif
+#ifdef WIN32
+ &win32ops,
+#endif
+ NULL
+};
+
+/* Global state */
+struct event_base *current_base = NULL;
+extern struct event_base *evsignal_base;
+static int use_monotonic;
+
+/* Handle signals - This is a deprecated interface */
+int (*event_sigcb)(void); /* Signal callback when gotsig is set */
+volatile sig_atomic_t event_gotsig; /* Set in signal handler */
+
+/* Prototypes */
+static void event_queue_insert(struct event_base *, struct event *, int);
+static void event_queue_remove(struct event_base *, struct event *, int);
+static int event_haveevents(struct event_base *);
+
+static void event_process_active(struct event_base *);
+
+static int timeout_next(struct event_base *, struct timeval **);
+static void timeout_process(struct event_base *);
+static void timeout_correct(struct event_base *, struct timeval *);
+
+static void
+detect_monotonic(void)
+{
+#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+ struct timespec ts;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0)
+ use_monotonic = 1;
+#endif
+}
+
+static int
+gettime(struct event_base *base, struct timeval *tp)
+{
+ if (base->tv_cache.tv_sec) {
+ *tp = base->tv_cache;
+ return (0);
+ }
+
+#if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC)
+ if (use_monotonic) {
+ struct timespec ts;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
+ return (-1);
+
+ tp->tv_sec = ts.tv_sec;
+ tp->tv_usec = ts.tv_nsec / 1000;
+ return (0);
+ }
+#endif
+
+ return (evutil_gettimeofday(tp, NULL));
+}
+
+struct event_base *
+event_init(void)
+{
+ struct event_base *base = event_base_new();
+
+ if (base != NULL)
+ current_base = base;
+
+ return (base);
+}
+
+struct event_base *
+event_base_new(void)
+{
+ int i;
+ struct event_base *base;
+
+ if ((base = calloc(1, sizeof(struct event_base))) == NULL)
+ event_err(1, "%s: calloc", __func__);
+
+ event_sigcb = NULL;
+ event_gotsig = 0;
+
+ detect_monotonic();
+ gettime(base, &base->event_tv);
+
+ min_heap_ctor(&base->timeheap);
+ TAILQ_INIT(&base->eventqueue);
+ base->sig.ev_signal_pair[0] = -1;
+ base->sig.ev_signal_pair[1] = -1;
+
+ base->evbase = NULL;
+ for (i = 0; eventops[i] && !base->evbase; i++) {
+ base->evsel = eventops[i];
+
+ base->evbase = base->evsel->init(base);
+ }
+
+ if (base->evbase == NULL)
+ event_errx(1, "%s: no event mechanism available", __func__);
+
+ if (getenv("EVENT_SHOW_METHOD"))
+ event_msgx("libevent using: %s\n",
+ base->evsel->name);
+
+ /* allocate a single active event queue */
+ event_base_priority_init(base, 1);
+
+ return (base);
+}
+
+void
+event_base_free(struct event_base *base)
+{
+ int i, n_deleted=0;
+ struct event *ev;
+
+ if (base == NULL && current_base)
+ base = current_base;
+ if (base == current_base)
+ current_base = NULL;
+
+ /* XXX(niels) - check for internal events first */
+ assert(base);
+ /* Delete all non-internal events. */
+ for (ev = TAILQ_FIRST(&base->eventqueue); ev; ) {
+ struct event *next = TAILQ_NEXT(ev, ev_next);
+ if (!(ev->ev_flags & EVLIST_INTERNAL)) {
+ event_del(ev);
+ ++n_deleted;
+ }
+ ev = next;
+ }
+ while ((ev = min_heap_top(&base->timeheap)) != NULL) {
+ event_del(ev);
+ ++n_deleted;
+ }
+
+ for (i = 0; i < base->nactivequeues; ++i) {
+ for (ev = TAILQ_FIRST(base->activequeues[i]); ev; ) {
+ struct event *next = TAILQ_NEXT(ev, ev_active_next);
+ if (!(ev->ev_flags & EVLIST_INTERNAL)) {
+ event_del(ev);
+ ++n_deleted;
+ }
+ ev = next;
+ }
+ }
+
+ if (n_deleted)
+ event_debug(("%s: %d events were still set in base",
+ __func__, n_deleted));
+
+ if (base->evsel->dealloc != NULL)
+ base->evsel->dealloc(base, base->evbase);
+
+ for (i = 0; i < base->nactivequeues; ++i)
+ assert(TAILQ_EMPTY(base->activequeues[i]));
+
+ assert(min_heap_empty(&base->timeheap));
+ min_heap_dtor(&base->timeheap);
+
+ for (i = 0; i < base->nactivequeues; ++i)
+ free(base->activequeues[i]);
+ free(base->activequeues);
+
+ assert(TAILQ_EMPTY(&base->eventqueue));
+
+ free(base);
+}
+
+/* reinitialized the event base after a fork */
+int
+event_reinit(struct event_base *base)
+{
+ const struct eventop *evsel = base->evsel;
+ void *evbase = base->evbase;
+ int res = 0;
+ struct event *ev;
+
+ /* check if this event mechanism requires reinit */
+ if (!evsel->need_reinit)
+ return (0);
+
+ /* prevent internal delete */
+ if (base->sig.ev_signal_added) {
+ /* we cannot call event_del here because the base has
+ * not been reinitialized yet. */
+ event_queue_remove(base, &base->sig.ev_signal,
+ EVLIST_INSERTED);
+ if (base->sig.ev_signal.ev_flags & EVLIST_ACTIVE)
+ event_queue_remove(base, &base->sig.ev_signal,
+ EVLIST_ACTIVE);
+ base->sig.ev_signal_added = 0;
+ }
+
+ if (base->evsel->dealloc != NULL)
+ base->evsel->dealloc(base, base->evbase);
+ evbase = base->evbase = evsel->init(base);
+ if (base->evbase == NULL)
+ event_errx(1, "%s: could not reinitialize event mechanism",
+ __func__);
+
+ TAILQ_FOREACH(ev, &base->eventqueue, ev_next) {
+ if (evsel->add(evbase, ev) == -1)
+ res = -1;
+ }
+
+ return (res);
+}
+
+int
+event_priority_init(int npriorities)
+{
+ return event_base_priority_init(current_base, npriorities);
+}
+
+int
+event_base_priority_init(struct event_base *base, int npriorities)
+{
+ int i;
+
+ if (base->event_count_active)
+ return (-1);
+
+ if (base->nactivequeues && npriorities != base->nactivequeues) {
+ for (i = 0; i < base->nactivequeues; ++i) {
+ free(base->activequeues[i]);
+ }
+ free(base->activequeues);
+ }
+
+ /* Allocate our priority queues */
+ base->nactivequeues = npriorities;
+ base->activequeues = (struct event_list **)calloc(base->nactivequeues,
+ npriorities * sizeof(struct event_list *));
+ if (base->activequeues == NULL)
+ event_err(1, "%s: calloc", __func__);
+
+ for (i = 0; i < base->nactivequeues; ++i) {
+ base->activequeues[i] = malloc(sizeof(struct event_list));
+ if (base->activequeues[i] == NULL)
+ event_err(1, "%s: malloc", __func__);
+ TAILQ_INIT(base->activequeues[i]);
+ }
+
+ return (0);
+}
+
+int
+event_haveevents(struct event_base *base)
+{
+ return (base->event_count > 0);
+}
+
+/*
+ * Active events are stored in priority queues. Lower priorities are always
+ * process before higher priorities. Low priority events can starve high
+ * priority ones.
+ */
+
+static void
+event_process_active(struct event_base *base)
+{
+ struct event *ev;
+ struct event_list *activeq = NULL;
+ int i;
+ short ncalls;
+
+ for (i = 0; i < base->nactivequeues; ++i) {
+ if (TAILQ_FIRST(base->activequeues[i]) != NULL) {
+ activeq = base->activequeues[i];
+ break;
+ }
+ }
+
+ assert(activeq != NULL);
+
+ for (ev = TAILQ_FIRST(activeq); ev; ev = TAILQ_FIRST(activeq)) {
+ if (ev->ev_events & EV_PERSIST)
+ event_queue_remove(base, ev, EVLIST_ACTIVE);
+ else
+ event_del(ev);
+
+ /* Allows deletes to work */
+ ncalls = ev->ev_ncalls;
+ ev->ev_pncalls = &ncalls;
+ while (ncalls) {
+ ncalls--;
+ ev->ev_ncalls = ncalls;
+ (*ev->ev_callback)((int)ev->ev_fd, ev->ev_res, ev->ev_arg);
+ if (event_gotsig || base->event_break)
+ return;
+ }
+ }
+}
+
+/*
+ * Wait continously for events. We exit only if no events are left.
+ */
+
+int
+event_dispatch(void)
+{
+ return (event_loop(0));
+}
+
+int
+event_base_dispatch(struct event_base *event_base)
+{
+ return (event_base_loop(event_base, 0));
+}
+
+const char *
+event_base_get_method(struct event_base *base)
+{
+ assert(base);
+ return (base->evsel->name);
+}
+
+static void
+event_loopexit_cb(int fd, short what, void *arg)
+{
+ struct event_base *base = arg;
+ base->event_gotterm = 1;
+}
+
+/* not thread safe */
+int
+event_loopexit(const struct timeval *tv)
+{
+ return (event_once(-1, EV_TIMEOUT, event_loopexit_cb,
+ current_base, tv));
+}
+
+int
+event_base_loopexit(struct event_base *event_base, const struct timeval *tv)
+{
+ return (event_base_once(event_base, -1, EV_TIMEOUT, event_loopexit_cb,
+ event_base, tv));
+}
+
+/* not thread safe */
+int
+event_loopbreak(void)
+{
+ return (event_base_loopbreak(current_base));
+}
+
+int
+event_base_loopbreak(struct event_base *event_base)
+{
+ if (event_base == NULL)
+ return (-1);
+
+ event_base->event_break = 1;
+ return (0);
+}
+
+
+
+/* not thread safe */
+
+int
+event_loop(int flags)
+{
+ return event_base_loop(current_base, flags);
+}
+
+int
+event_base_loop(struct event_base *base, int flags)
+{
+ const struct eventop *evsel = base->evsel;
+ void *evbase = base->evbase;
+ struct timeval tv;
+ struct timeval *tv_p;
+ int res, done;
+
+ /* clear time cache */
+ base->tv_cache.tv_sec = 0;
+
+ if (base->sig.ev_signal_added)
+ evsignal_base = base;
+ done = 0;
+ while (!done) {
+ /* Terminate the loop if we have been asked to */
+ if (base->event_gotterm) {
+ base->event_gotterm = 0;
+ break;
+ }
+
+ if (base->event_break) {
+ base->event_break = 0;
+ break;
+ }
+
+ /* You cannot use this interface for multi-threaded apps */
+ while (event_gotsig) {
+ event_gotsig = 0;
+ if (event_sigcb) {
+ res = (*event_sigcb)();
+ if (res == -1) {
+ errno = EINTR;
+ return (-1);
+ }
+ }
+ }
+
+ timeout_correct(base, &tv);
+
+ tv_p = &tv;
+ if (!base->event_count_active && !(flags & EVLOOP_NONBLOCK)) {
+ timeout_next(base, &tv_p);
+ } else {
+ /*
+ * if we have active events, we just poll new events
+ * without waiting.
+ */
+ evutil_timerclear(&tv);
+ }
+
+ /* If we have no events, we just exit */
+ if (!event_haveevents(base)) {
+ event_debug(("%s: no events registered.", __func__));
+ return (1);
+ }
+
+ /* update last old time */
+ gettime(base, &base->event_tv);
+
+ /* clear time cache */
+ base->tv_cache.tv_sec = 0;
+
+ res = evsel->dispatch(base, evbase, tv_p);
+
+ if (res == -1)
+ return (-1);
+ gettime(base, &base->tv_cache);
+
+ timeout_process(base);
+
+ if (base->event_count_active) {
+ event_process_active(base);
+ if (!base->event_count_active && (flags & EVLOOP_ONCE))
+ done = 1;
+ } else if (flags & EVLOOP_NONBLOCK)
+ done = 1;
+ }
+
+ /* clear time cache */
+ base->tv_cache.tv_sec = 0;
+
+ event_debug(("%s: asked to terminate loop.", __func__));
+ return (0);
+}
+
+/* Sets up an event for processing once */
+
+struct event_once {
+ struct event ev;
+
+ void (*cb)(int, short, void *);
+ void *arg;
+};
+
+/* One-time callback, it deletes itself */
+
+static void
+event_once_cb(int fd, short events, void *arg)
+{
+ struct event_once *eonce = arg;
+
+ (*eonce->cb)(fd, events, eonce->arg);
+ free(eonce);
+}
+
+/* not threadsafe, event scheduled once. */
+int
+event_once(int fd, short events,
+ void (*callback)(int, short, void *), void *arg, const struct timeval *tv)
+{
+ return event_base_once(current_base, fd, events, callback, arg, tv);
+}
+
+/* Schedules an event once */
+int
+event_base_once(struct event_base *base, int fd, short events,
+ void (*callback)(int, short, void *), void *arg, const struct timeval *tv)
+{
+ struct event_once *eonce;
+ struct timeval etv;
+ int res;
+
+ /* We cannot support signals that just fire once */
+ if (events & EV_SIGNAL)
+ return (-1);
+
+ if ((eonce = calloc(1, sizeof(struct event_once))) == NULL)
+ return (-1);
+
+ eonce->cb = callback;
+ eonce->arg = arg;
+
+ if (events == EV_TIMEOUT) {
+ if (tv == NULL) {
+ evutil_timerclear(&etv);
+ tv = &etv;
+ }
+
+ evtimer_set(&eonce->ev, event_once_cb, eonce);
+ } else if (events & (EV_READ|EV_WRITE)) {
+ events &= EV_READ|EV_WRITE;
+
+ event_set(&eonce->ev, fd, events, event_once_cb, eonce);
+ } else {
+ /* Bad event combination */
+ free(eonce);
+ return (-1);
+ }
+
+ res = event_base_set(base, &eonce->ev);
+ if (res == 0)
+ res = event_add(&eonce->ev, tv);
+ if (res != 0) {
+ free(eonce);
+ return (res);
+ }
+
+ return (0);
+}
+
+void
+event_set(struct event *ev, int fd, short events,
+ void (*callback)(int, short, void *), void *arg)
+{
+ /* Take the current base - caller needs to set the real base later */
+ ev->ev_base = current_base;
+
+ ev->ev_callback = callback;
+ ev->ev_arg = arg;
+ ev->ev_fd = fd;
+ ev->ev_events = events;
+ ev->ev_res = 0;
+ ev->ev_flags = EVLIST_INIT;
+ ev->ev_ncalls = 0;
+ ev->ev_pncalls = NULL;
+
+ min_heap_elem_init(ev);
+
+ /* by default, we put new events into the middle priority */
+ if(current_base)
+ ev->ev_pri = current_base->nactivequeues/2;
+}
+
+int
+event_base_set(struct event_base *base, struct event *ev)
+{
+ /* Only innocent events may be assigned to a different base */
+ if (ev->ev_flags != EVLIST_INIT)
+ return (-1);
+
+ ev->ev_base = base;
+ ev->ev_pri = base->nactivequeues/2;
+
+ return (0);
+}
+
+/*
+ * Set's the priority of an event - if an event is already scheduled
+ * changing the priority is going to fail.
+ */
+
+int
+event_priority_set(struct event *ev, int pri)
+{
+ if (ev->ev_flags & EVLIST_ACTIVE)
+ return (-1);
+ if (pri < 0 || pri >= ev->ev_base->nactivequeues)
+ return (-1);
+
+ ev->ev_pri = pri;
+
+ return (0);
+}
+
+/*
+ * Checks if a specific event is pending or scheduled.
+ */
+
+int
+event_pending(struct event *ev, short event, struct timeval *tv)
+{
+ struct timeval now, res;
+ int flags = 0;
+
+ if (ev->ev_flags & EVLIST_INSERTED)
+ flags |= (ev->ev_events & (EV_READ|EV_WRITE|EV_SIGNAL));
+ if (ev->ev_flags & EVLIST_ACTIVE)
+ flags |= ev->ev_res;
+ if (ev->ev_flags & EVLIST_TIMEOUT)
+ flags |= EV_TIMEOUT;
+
+ event &= (EV_TIMEOUT|EV_READ|EV_WRITE|EV_SIGNAL);
+
+ /* See if there is a timeout that we should report */
+ if (tv != NULL && (flags & event & EV_TIMEOUT)) {
+ gettime(ev->ev_base, &now);
+ evutil_timersub(&ev->ev_timeout, &now, &res);
+ /* correctly remap to real time */
+ evutil_gettimeofday(&now, NULL);
+ evutil_timeradd(&now, &res, tv);
+ }
+
+ return (flags & event);
+}
+
+int
+event_add(struct event *ev, const struct timeval *tv)
+{
+ struct event_base *base = ev->ev_base;
+ const struct eventop *evsel = base->evsel;
+ void *evbase = base->evbase;
+ int res = 0;
+
+ event_debug((
+ "event_add: event: %p, %s%s%scall %p",
+ ev,
+ ev->ev_events & EV_READ ? "EV_READ " : " ",
+ ev->ev_events & EV_WRITE ? "EV_WRITE " : " ",
+ tv ? "EV_TIMEOUT " : " ",
+ ev->ev_callback));
+
+ assert(!(ev->ev_flags & ~EVLIST_ALL));
+
+ /*
+ * prepare for timeout insertion further below, if we get a
+ * failure on any step, we should not change any state.
+ */
+ if (tv != NULL && !(ev->ev_flags & EVLIST_TIMEOUT)) {
+ if (min_heap_reserve(&base->timeheap,
+ 1 + min_heap_size(&base->timeheap)) == -1)
+ return (-1); /* ENOMEM == errno */
+ }
+
+ if ((ev->ev_events & (EV_READ|EV_WRITE|EV_SIGNAL)) &&
+ !(ev->ev_flags & (EVLIST_INSERTED|EVLIST_ACTIVE))) {
+ res = evsel->add(evbase, ev);
+ if (res != -1)
+ event_queue_insert(base, ev, EVLIST_INSERTED);
+ }
+
+ /*
+ * we should change the timout state only if the previous event
+ * addition succeeded.
+ */
+ if (res != -1 && tv != NULL) {
+ struct timeval now;
+
+ /*
+ * we already reserved memory above for the case where we
+ * are not replacing an exisiting timeout.
+ */
+ if (ev->ev_flags & EVLIST_TIMEOUT)
+ event_queue_remove(base, ev, EVLIST_TIMEOUT);
+
+ /* Check if it is active due to a timeout. Rescheduling
+ * this timeout before the callback can be executed
+ * removes it from the active list. */
+ if ((ev->ev_flags & EVLIST_ACTIVE) &&
+ (ev->ev_res & EV_TIMEOUT)) {
+ /* See if we are just active executing this
+ * event in a loop
+ */
+ if (ev->ev_ncalls && ev->ev_pncalls) {
+ /* Abort loop */
+ *ev->ev_pncalls = 0;
+ }
+
+ event_queue_remove(base, ev, EVLIST_ACTIVE);
+ }
+
+ gettime(base, &now);
+ evutil_timeradd(&now, tv, &ev->ev_timeout);
+
+ event_debug((
+ "event_add: timeout in %ld seconds, call %p",
+ tv->tv_sec, ev->ev_callback));
+
+ event_queue_insert(base, ev, EVLIST_TIMEOUT);
+ }
+
+ return (res);
+}
+
+int
+event_del(struct event *ev)
+{
+ struct event_base *base;
+ const struct eventop *evsel;
+ void *evbase;
+
+ event_debug(("event_del: %p, callback %p",
+ ev, ev->ev_callback));
+
+ /* An event without a base has not been added */
+ if (ev->ev_base == NULL)
+ return (-1);
+
+ base = ev->ev_base;
+ evsel = base->evsel;
+ evbase = base->evbase;
+
+ assert(!(ev->ev_flags & ~EVLIST_ALL));
+
+ /* See if we are just active executing this event in a loop */
+ if (ev->ev_ncalls && ev->ev_pncalls) {
+ /* Abort loop */
+ *ev->ev_pncalls = 0;
+ }
+
+ if (ev->ev_flags & EVLIST_TIMEOUT)
+ event_queue_remove(base, ev, EVLIST_TIMEOUT);
+
+ if (ev->ev_flags & EVLIST_ACTIVE)
+ event_queue_remove(base, ev, EVLIST_ACTIVE);
+
+ if (ev->ev_flags & EVLIST_INSERTED) {
+ event_queue_remove(base, ev, EVLIST_INSERTED);
+ return (evsel->del(evbase, ev));
+ }
+
+ return (0);
+}
+
+void
+event_active(struct event *ev, int res, short ncalls)
+{
+ /* We get different kinds of events, add them together */
+ if (ev->ev_flags & EVLIST_ACTIVE) {
+ ev->ev_res |= res;
+ return;
+ }
+
+ ev->ev_res = res;
+ ev->ev_ncalls = ncalls;
+ ev->ev_pncalls = NULL;
+ event_queue_insert(ev->ev_base, ev, EVLIST_ACTIVE);
+}
+
+static int
+timeout_next(struct event_base *base, struct timeval **tv_p)
+{
+ struct timeval now;
+ struct event *ev;
+ struct timeval *tv = *tv_p;
+
+ if ((ev = min_heap_top(&base->timeheap)) == NULL) {
+ /* if no time-based events are active wait for I/O */
+ *tv_p = NULL;
+ return (0);
+ }
+
+ if (gettime(base, &now) == -1)
+ return (-1);
+
+ if (evutil_timercmp(&ev->ev_timeout, &now, <=)) {
+ evutil_timerclear(tv);
+ return (0);
+ }
+
+ evutil_timersub(&ev->ev_timeout, &now, tv);
+
+ assert(tv->tv_sec >= 0);
+ assert(tv->tv_usec >= 0);
+
+ event_debug(("timeout_next: in %ld seconds", tv->tv_sec));
+ return (0);
+}
+
+/*
+ * Determines if the time is running backwards by comparing the current
+ * time against the last time we checked. Not needed when using clock
+ * monotonic.
+ */
+
+static void
+timeout_correct(struct event_base *base, struct timeval *tv)
+{
+ struct event **pev;
+ unsigned int size;
+ struct timeval off;
+
+ if (use_monotonic)
+ return;
+
+ /* Check if time is running backwards */
+ gettime(base, tv);
+ if (evutil_timercmp(tv, &base->event_tv, >=)) {
+ base->event_tv = *tv;
+ return;
+ }
+
+ event_debug(("%s: time is running backwards, corrected",
+ __func__));
+ evutil_timersub(&base->event_tv, tv, &off);
+
+ /*
+ * We can modify the key element of the node without destroying
+ * the key, beause we apply it to all in the right order.
+ */
+ pev = base->timeheap.p;
+ size = base->timeheap.n;
+ for (; size-- > 0; ++pev) {
+ struct timeval *ev_tv = &(**pev).ev_timeout;
+ evutil_timersub(ev_tv, &off, ev_tv);
+ }
+ /* Now remember what the new time turned out to be. */
+ base->event_tv = *tv;
+}
+
+void
+timeout_process(struct event_base *base)
+{
+ struct timeval now;
+ struct event *ev;
+
+ if (min_heap_empty(&base->timeheap))
+ return;
+
+ gettime(base, &now);
+
+ while ((ev = min_heap_top(&base->timeheap))) {
+ if (evutil_timercmp(&ev->ev_timeout, &now, >))
+ break;
+
+ /* delete this event from the I/O queues */
+ event_del(ev);
+
+ event_debug(("timeout_process: call %p",
+ ev->ev_callback));
+ event_active(ev, EV_TIMEOUT, 1);
+ }
+}
+
+void
+event_queue_remove(struct event_base *base, struct event *ev, int queue)
+{
+ if (!(ev->ev_flags & queue))
+ event_errx(1, "%s: %p(fd %d) not on queue %x", __func__,
+ ev, ev->ev_fd, queue);
+
+ if (~ev->ev_flags & EVLIST_INTERNAL)
+ base->event_count--;
+
+ ev->ev_flags &= ~queue;
+ switch (queue) {
+ case EVLIST_INSERTED:
+ TAILQ_REMOVE(&base->eventqueue, ev, ev_next);
+ break;
+ case EVLIST_ACTIVE:
+ base->event_count_active--;
+ TAILQ_REMOVE(base->activequeues[ev->ev_pri],
+ ev, ev_active_next);
+ break;
+ case EVLIST_TIMEOUT:
+ min_heap_erase(&base->timeheap, ev);
+ break;
+ default:
+ event_errx(1, "%s: unknown queue %x", __func__, queue);
+ }
+}
+
+void
+event_queue_insert(struct event_base *base, struct event *ev, int queue)
+{
+ if (ev->ev_flags & queue) {
+ /* Double insertion is possible for active events */
+ if (queue & EVLIST_ACTIVE)
+ return;
+
+ event_errx(1, "%s: %p(fd %d) already on queue %x", __func__,
+ ev, ev->ev_fd, queue);
+ }
+
+ if (~ev->ev_flags & EVLIST_INTERNAL)
+ base->event_count++;
+
+ ev->ev_flags |= queue;
+ switch (queue) {
+ case EVLIST_INSERTED:
+ TAILQ_INSERT_TAIL(&base->eventqueue, ev, ev_next);
+ break;
+ case EVLIST_ACTIVE:
+ base->event_count_active++;
+ TAILQ_INSERT_TAIL(base->activequeues[ev->ev_pri],
+ ev,ev_active_next);
+ break;
+ case EVLIST_TIMEOUT: {
+ min_heap_push(&base->timeheap, ev);
+ break;
+ }
+ default:
+ event_errx(1, "%s: unknown queue %x", __func__, queue);
+ }
+}
+
+/* Functions for debugging */
+
+const char *
+event_get_version(void)
+{
+ return (VERSION);
+}
+
+/*
+ * No thread-safe interface needed - the information should be the same
+ * for all threads.
+ */
+
+const char *
+event_get_method(void)
+{
+ return (current_base->evsel->name);
+}
diff --git a/libevent/event.h b/libevent/event.h
new file mode 100644
index 00000000000..039e4f88bcb
--- /dev/null
+++ b/libevent/event.h
@@ -0,0 +1,1175 @@
+/*
+ * Copyright (c) 2000-2007 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVENT_H_
+#define _EVENT_H_
+
+/** @mainpage
+
+ @section intro Introduction
+
+ libevent is an event notification library for developing scalable network
+ servers. The libevent API provides a mechanism to execute a callback
+ function when a specific event occurs on a file descriptor or after a
+ timeout has been reached. Furthermore, libevent also support callbacks due
+ to signals or regular timeouts.
+
+ libevent is meant to replace the event loop found in event driven network
+ servers. An application just needs to call event_dispatch() and then add or
+ remove events dynamically without having to change the event loop.
+
+ Currently, libevent supports /dev/poll, kqueue(2), select(2), poll(2) and
+ epoll(4). It also has experimental support for real-time signals. The
+ internal event mechanism is completely independent of the exposed event API,
+ and a simple update of libevent can provide new functionality without having
+ to redesign the applications. As a result, Libevent allows for portable
+ application development and provides the most scalable event notification
+ mechanism available on an operating system. Libevent can also be used for
+ multi-threaded aplications; see Steven Grimm's explanation. Libevent should
+ compile on Linux, *BSD, Mac OS X, Solaris and Windows.
+
+ @section usage Standard usage
+
+ Every program that uses libevent must include the <event.h> header, and pass
+ the -levent flag to the linker. Before using any of the functions in the
+ library, you must call event_init() or event_base_new() to perform one-time
+ initialization of the libevent library.
+
+ @section event Event notification
+
+ For each file descriptor that you wish to monitor, you must declare an event
+ structure and call event_set() to initialize the members of the structure.
+ To enable notification, you add the structure to the list of monitored
+ events by calling event_add(). The event structure must remain allocated as
+ long as it is active, so it should be allocated on the heap. Finally, you
+ call event_dispatch() to loop and dispatch events.
+
+ @section bufferevent I/O Buffers
+
+ libevent provides an abstraction on top of the regular event callbacks. This
+ abstraction is called a buffered event. A buffered event provides input and
+ output buffers that get filled and drained automatically. The user of a
+ buffered event no longer deals directly with the I/O, but instead is reading
+ from input and writing to output buffers.
+
+ Once initialized via bufferevent_new(), the bufferevent structure can be
+ used repeatedly with bufferevent_enable() and bufferevent_disable().
+ Instead of reading and writing directly to a socket, you would call
+ bufferevent_read() and bufferevent_write().
+
+ When read enabled the bufferevent will try to read from the file descriptor
+ and call the read callback. The write callback is executed whenever the
+ output buffer is drained below the write low watermark, which is 0 by
+ default.
+
+ @section timers Timers
+
+ libevent can also be used to create timers that invoke a callback after a
+ certain amount of time has expired. The evtimer_set() function prepares an
+ event struct to be used as a timer. To activate the timer, call
+ evtimer_add(). Timers can be deactivated by calling evtimer_del().
+
+ @section timeouts Timeouts
+
+ In addition to simple timers, libevent can assign timeout events to file
+ descriptors that are triggered whenever a certain amount of time has passed
+ with no activity on a file descriptor. The timeout_set() function
+ initializes an event struct for use as a timeout. Once initialized, the
+ event must be activated by using timeout_add(). To cancel the timeout, call
+ timeout_del().
+
+ @section evdns Asynchronous DNS resolution
+
+ libevent provides an asynchronous DNS resolver that should be used instead
+ of the standard DNS resolver functions. These functions can be imported by
+ including the <evdns.h> header in your program. Before using any of the
+ resolver functions, you must call evdns_init() to initialize the library. To
+ convert a hostname to an IP address, you call the evdns_resolve_ipv4()
+ function. To perform a reverse lookup, you would call the
+ evdns_resolve_reverse() function. All of these functions use callbacks to
+ avoid blocking while the lookup is performed.
+
+ @section evhttp Event-driven HTTP servers
+
+ libevent provides a very simple event-driven HTTP server that can be
+ embedded in your program and used to service HTTP requests.
+
+ To use this capability, you need to include the <evhttp.h> header in your
+ program. You create the server by calling evhttp_new(). Add addresses and
+ ports to listen on with evhttp_bind_socket(). You then register one or more
+ callbacks to handle incoming requests. Each URI can be assigned a callback
+ via the evhttp_set_cb() function. A generic callback function can also be
+ registered via evhttp_set_gencb(); this callback will be invoked if no other
+ callbacks have been registered for a given URI.
+
+ @section evrpc A framework for RPC servers and clients
+
+ libevents provides a framework for creating RPC servers and clients. It
+ takes care of marshaling and unmarshaling all data structures.
+
+ @section api API Reference
+
+ To browse the complete documentation of the libevent API, click on any of
+ the following links.
+
+ event.h
+ The primary libevent header
+
+ evdns.h
+ Asynchronous DNS resolution
+
+ evhttp.h
+ An embedded libevent-based HTTP server
+
+ evrpc.h
+ A framework for creating RPC servers and clients
+
+ */
+
+/** @file event.h
+
+ A library for writing event-driven network servers
+
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <config.h>
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#include <stdarg.h>
+
+/* For int types. */
+#include <evutil.h>
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+typedef unsigned char u_char;
+typedef unsigned short u_short;
+#endif
+
+#define EVLIST_TIMEOUT 0x01
+#define EVLIST_INSERTED 0x02
+#define EVLIST_SIGNAL 0x04
+#define EVLIST_ACTIVE 0x08
+#define EVLIST_INTERNAL 0x10
+#define EVLIST_INIT 0x80
+
+/* EVLIST_X_ Private space: 0x1000-0xf000 */
+#define EVLIST_ALL (0xf000 | 0x9f)
+
+#define EV_TIMEOUT 0x01
+#define EV_READ 0x02
+#define EV_WRITE 0x04
+#define EV_SIGNAL 0x08
+#define EV_PERSIST 0x10 /* Persistant event */
+
+/* Fix so that ppl dont have to run with <sys/queue.h> */
+#ifndef TAILQ_ENTRY
+#define _EVENT_DEFINED_TQENTRY
+#define TAILQ_ENTRY(type) \
+struct { \
+ struct type *tqe_next; /* next element */ \
+ struct type **tqe_prev; /* address of previous next element */ \
+}
+#endif /* !TAILQ_ENTRY */
+
+struct event_base;
+struct event {
+ TAILQ_ENTRY (event) ev_next;
+ TAILQ_ENTRY (event) ev_active_next;
+ TAILQ_ENTRY (event) ev_signal_next;
+ unsigned int min_heap_idx; /* for managing timeouts */
+
+ struct event_base *ev_base;
+
+ int ev_fd;
+ short ev_events;
+ short ev_ncalls;
+ short *ev_pncalls; /* Allows deletes in callback */
+
+ struct timeval ev_timeout;
+
+ int ev_pri; /* smaller numbers are higher priority */
+
+ void (*ev_callback)(int, short, void *arg);
+ void *ev_arg;
+
+ int ev_res; /* result passed to event callback */
+ int ev_flags;
+};
+
+#define EVENT_SIGNAL(ev) (int)(ev)->ev_fd
+#define EVENT_FD(ev) (int)(ev)->ev_fd
+
+/*
+ * Key-Value pairs. Can be used for HTTP headers but also for
+ * query argument parsing.
+ */
+struct evkeyval {
+ TAILQ_ENTRY(evkeyval) next;
+
+ char *key;
+ char *value;
+};
+
+#ifdef _EVENT_DEFINED_TQENTRY
+#undef TAILQ_ENTRY
+struct event_list;
+struct evkeyvalq;
+#undef _EVENT_DEFINED_TQENTRY
+#else
+TAILQ_HEAD (event_list, event);
+TAILQ_HEAD (evkeyvalq, evkeyval);
+#endif /* _EVENT_DEFINED_TQENTRY */
+
+/**
+ Initialize the event API.
+
+ Use event_base_new() to initialize a new event base, but does not set
+ the current_base global. If using only event_base_new(), each event
+ added must have an event base set with event_base_set()
+
+ @see event_base_set(), event_base_free(), event_init()
+ */
+struct event_base *event_base_new(void);
+
+/**
+ Initialize the event API.
+
+ The event API needs to be initialized with event_init() before it can be
+ used. Sets the current_base global representing the default base for
+ events that have no base associated with them.
+
+ @see event_base_set(), event_base_new()
+ */
+struct event_base *event_init(void);
+
+/**
+ Reinitialized the event base after a fork
+
+ Some event mechanisms do not survive across fork. The event base needs
+ to be reinitialized with the event_reinit() function.
+
+ @param base the event base that needs to be re-initialized
+ @return 0 if successful, or -1 if some events could not be re-added.
+ @see event_base_new(), event_init()
+*/
+int event_reinit(struct event_base *base);
+
+/**
+ Loop to process events.
+
+ In order to process events, an application needs to call
+ event_dispatch(). This function only returns on error, and should
+ replace the event core of the application program.
+
+ @see event_base_dispatch()
+ */
+int event_dispatch(void);
+
+
+/**
+ Threadsafe event dispatching loop.
+
+ @param eb the event_base structure returned by event_init()
+ @see event_init(), event_dispatch()
+ */
+int event_base_dispatch(struct event_base *);
+
+
+/**
+ Get the kernel event notification mechanism used by libevent.
+
+ @param eb the event_base structure returned by event_base_new()
+ @return a string identifying the kernel event mechanism (kqueue, epoll, etc.)
+ */
+const char *event_base_get_method(struct event_base *);
+
+
+/**
+ Deallocate all memory associated with an event_base, and free the base.
+
+ Note that this function will not close any fds or free any memory passed
+ to event_set as the argument to callback.
+
+ @param eb an event_base to be freed
+ */
+void event_base_free(struct event_base *);
+
+
+#define _EVENT_LOG_DEBUG 0
+#define _EVENT_LOG_MSG 1
+#define _EVENT_LOG_WARN 2
+#define _EVENT_LOG_ERR 3
+typedef void (*event_log_cb)(int severity, const char *msg);
+/**
+ Redirect libevent's log messages.
+
+ @param cb a function taking two arguments: an integer severity between
+ _EVENT_LOG_DEBUG and _EVENT_LOG_ERR, and a string. If cb is NULL,
+ then the default log is used.
+ */
+void event_set_log_callback(event_log_cb cb);
+
+/**
+ Associate a different event base with an event.
+
+ @param eb the event base
+ @param ev the event
+ */
+int event_base_set(struct event_base *, struct event *);
+
+/**
+ event_loop() flags
+ */
+/*@{*/
+#define EVLOOP_ONCE 0x01 /**< Block at most once. */
+#define EVLOOP_NONBLOCK 0x02 /**< Do not block. */
+/*@}*/
+
+/**
+ Handle events.
+
+ This is a more flexible version of event_dispatch().
+
+ @param flags any combination of EVLOOP_ONCE | EVLOOP_NONBLOCK
+ @return 0 if successful, -1 if an error occurred, or 1 if no events were
+ registered.
+ @see event_loopexit(), event_base_loop()
+*/
+int event_loop(int);
+
+/**
+ Handle events (threadsafe version).
+
+ This is a more flexible version of event_base_dispatch().
+
+ @param eb the event_base structure returned by event_init()
+ @param flags any combination of EVLOOP_ONCE | EVLOOP_NONBLOCK
+ @return 0 if successful, -1 if an error occurred, or 1 if no events were
+ registered.
+ @see event_loopexit(), event_base_loop()
+ */
+int event_base_loop(struct event_base *, int);
+
+/**
+ Exit the event loop after the specified time.
+
+ The next event_loop() iteration after the given timer expires will
+ complete normally (handling all queued events) then exit without
+ blocking for events again.
+
+ Subsequent invocations of event_loop() will proceed normally.
+
+ @param tv the amount of time after which the loop should terminate.
+ @return 0 if successful, or -1 if an error occurred
+ @see event_loop(), event_base_loop(), event_base_loopexit()
+ */
+int event_loopexit(const struct timeval *);
+
+
+/**
+ Exit the event loop after the specified time (threadsafe variant).
+
+ The next event_base_loop() iteration after the given timer expires will
+ complete normally (handling all queued events) then exit without
+ blocking for events again.
+
+ Subsequent invocations of event_base_loop() will proceed normally.
+
+ @param eb the event_base structure returned by event_init()
+ @param tv the amount of time after which the loop should terminate.
+ @return 0 if successful, or -1 if an error occurred
+ @see event_loopexit()
+ */
+int event_base_loopexit(struct event_base *, const struct timeval *);
+
+/**
+ Abort the active event_loop() immediately.
+
+ event_loop() will abort the loop after the next event is completed;
+ event_loopbreak() is typically invoked from this event's callback.
+ This behavior is analogous to the "break;" statement.
+
+ Subsequent invocations of event_loop() will proceed normally.
+
+ @return 0 if successful, or -1 if an error occurred
+ @see event_base_loopbreak(), event_loopexit()
+ */
+int event_loopbreak(void);
+
+/**
+ Abort the active event_base_loop() immediately.
+
+ event_base_loop() will abort the loop after the next event is completed;
+ event_base_loopbreak() is typically invoked from this event's callback.
+ This behavior is analogous to the "break;" statement.
+
+ Subsequent invocations of event_loop() will proceed normally.
+
+ @param eb the event_base structure returned by event_init()
+ @return 0 if successful, or -1 if an error occurred
+ @see event_base_loopexit
+ */
+int event_base_loopbreak(struct event_base *);
+
+
+/**
+ Add a timer event.
+
+ @param ev the event struct
+ @param tv timeval struct
+ */
+#define evtimer_add(ev, tv) event_add(ev, tv)
+
+
+/**
+ Define a timer event.
+
+ @param ev event struct to be modified
+ @param cb callback function
+ @param arg argument that will be passed to the callback function
+ */
+#define evtimer_set(ev, cb, arg) event_set(ev, -1, 0, cb, arg)
+
+
+/**
+ * Delete a timer event.
+ *
+ * @param ev the event struct to be disabled
+ */
+#define evtimer_del(ev) event_del(ev)
+#define evtimer_pending(ev, tv) event_pending(ev, EV_TIMEOUT, tv)
+#define evtimer_initialized(ev) ((ev)->ev_flags & EVLIST_INIT)
+
+/**
+ * Add a timeout event.
+ *
+ * @param ev the event struct to be disabled
+ * @param tv the timeout value, in seconds
+ */
+#define timeout_add(ev, tv) event_add(ev, tv)
+
+
+/**
+ * Define a timeout event.
+ *
+ * @param ev the event struct to be defined
+ * @param cb the callback to be invoked when the timeout expires
+ * @param arg the argument to be passed to the callback
+ */
+#define timeout_set(ev, cb, arg) event_set(ev, -1, 0, cb, arg)
+
+
+/**
+ * Disable a timeout event.
+ *
+ * @param ev the timeout event to be disabled
+ */
+#define timeout_del(ev) event_del(ev)
+
+#define timeout_pending(ev, tv) event_pending(ev, EV_TIMEOUT, tv)
+#define timeout_initialized(ev) ((ev)->ev_flags & EVLIST_INIT)
+
+#define signal_add(ev, tv) event_add(ev, tv)
+#define signal_set(ev, x, cb, arg) \
+ event_set(ev, x, EV_SIGNAL|EV_PERSIST, cb, arg)
+#define signal_del(ev) event_del(ev)
+#define signal_pending(ev, tv) event_pending(ev, EV_SIGNAL, tv)
+#define signal_initialized(ev) ((ev)->ev_flags & EVLIST_INIT)
+
+/**
+ Prepare an event structure to be added.
+
+ The function event_set() prepares the event structure ev to be used in
+ future calls to event_add() and event_del(). The event will be prepared to
+ call the function specified by the fn argument with an int argument
+ indicating the file descriptor, a short argument indicating the type of
+ event, and a void * argument given in the arg argument. The fd indicates
+ the file descriptor that should be monitored for events. The events can be
+ either EV_READ, EV_WRITE, or both. Indicating that an application can read
+ or write from the file descriptor respectively without blocking.
+
+ The function fn will be called with the file descriptor that triggered the
+ event and the type of event which will be either EV_TIMEOUT, EV_SIGNAL,
+ EV_READ, or EV_WRITE. The additional flag EV_PERSIST makes an event_add()
+ persistent until event_del() has been called.
+
+ @param ev an event struct to be modified
+ @param fd the file descriptor to be monitored
+ @param event desired events to monitor; can be EV_READ and/or EV_WRITE
+ @param fn callback function to be invoked when the event occurs
+ @param arg an argument to be passed to the callback function
+
+ @see event_add(), event_del(), event_once()
+
+ */
+void event_set(struct event *, int, short, void (*)(int, short, void *), void *);
+
+/**
+ Schedule a one-time event to occur.
+
+ The function event_once() is similar to event_set(). However, it schedules
+ a callback to be called exactly once and does not require the caller to
+ prepare an event structure.
+
+ @param fd a file descriptor to monitor
+ @param events event(s) to monitor; can be any of EV_TIMEOUT | EV_READ |
+ EV_WRITE
+ @param callback callback function to be invoked when the event occurs
+ @param arg an argument to be passed to the callback function
+ @param timeout the maximum amount of time to wait for the event, or NULL
+ to wait forever
+ @return 0 if successful, or -1 if an error occurred
+ @see event_set()
+
+ */
+int event_once(int, short, void (*)(int, short, void *), void *,
+ const struct timeval *);
+
+
+/**
+ Schedule a one-time event (threadsafe variant)
+
+ The function event_base_once() is similar to event_set(). However, it
+ schedules a callback to be called exactly once and does not require the
+ caller to prepare an event structure.
+
+ @param base an event_base returned by event_init()
+ @param fd a file descriptor to monitor
+ @param events event(s) to monitor; can be any of EV_TIMEOUT | EV_READ |
+ EV_WRITE
+ @param callback callback function to be invoked when the event occurs
+ @param arg an argument to be passed to the callback function
+ @param timeout the maximum amount of time to wait for the event, or NULL
+ to wait forever
+ @return 0 if successful, or -1 if an error occurred
+ @see event_once()
+ */
+int event_base_once(struct event_base *base, int fd, short events,
+ void (*callback)(int, short, void *), void *arg,
+ const struct timeval *timeout);
+
+
+/**
+ Add an event to the set of monitored events.
+
+ The function event_add() schedules the execution of the ev event when the
+ event specified in event_set() occurs or in at least the time specified in
+ the tv. If tv is NULL, no timeout occurs and the function will only be
+ called if a matching event occurs on the file descriptor. The event in the
+ ev argument must be already initialized by event_set() and may not be used
+ in calls to event_set() until it has timed out or been removed with
+ event_del(). If the event in the ev argument already has a scheduled
+ timeout, the old timeout will be replaced by the new one.
+
+ @param ev an event struct initialized via event_set()
+ @param timeout the maximum amount of time to wait for the event, or NULL
+ to wait forever
+ @return 0 if successful, or -1 if an error occurred
+ @see event_del(), event_set()
+ */
+int event_add(struct event *ev, const struct timeval *timeout);
+
+
+/**
+ Remove an event from the set of monitored events.
+
+ The function event_del() will cancel the event in the argument ev. If the
+ event has already executed or has never been added the call will have no
+ effect.
+
+ @param ev an event struct to be removed from the working set
+ @return 0 if successful, or -1 if an error occurred
+ @see event_add()
+ */
+int event_del(struct event *);
+
+void event_active(struct event *, int, short);
+
+
+/**
+ Checks if a specific event is pending or scheduled.
+
+ @param ev an event struct previously passed to event_add()
+ @param event the requested event type; any of EV_TIMEOUT|EV_READ|
+ EV_WRITE|EV_SIGNAL
+ @param tv an alternate timeout (FIXME - is this true?)
+
+ @return 1 if the event is pending, or 0 if the event has not occurred
+
+ */
+int event_pending(struct event *ev, short event, struct timeval *tv);
+
+
+/**
+ Test if an event structure has been initialized.
+
+ The event_initialized() macro can be used to check if an event has been
+ initialized.
+
+ @param ev an event structure to be tested
+ @return 1 if the structure has been initialized, or 0 if it has not been
+ initialized
+ */
+#ifdef WIN32
+#define event_initialized(ev) ((ev)->ev_flags & EVLIST_INIT && (ev)->ev_fd != (int)INVALID_HANDLE_VALUE)
+#else
+#define event_initialized(ev) ((ev)->ev_flags & EVLIST_INIT)
+#endif
+
+
+/**
+ Get the libevent version number.
+
+ @return a string containing the version number of libevent
+ */
+const char *event_get_version(void);
+
+
+/**
+ Get the kernel event notification mechanism used by libevent.
+
+ @return a string identifying the kernel event mechanism (kqueue, epoll, etc.)
+ */
+const char *event_get_method(void);
+
+
+/**
+ Set the number of different event priorities.
+
+ By default libevent schedules all active events with the same priority.
+ However, some time it is desirable to process some events with a higher
+ priority than others. For that reason, libevent supports strict priority
+ queues. Active events with a lower priority are always processed before
+ events with a higher priority.
+
+ The number of different priorities can be set initially with the
+ event_priority_init() function. This function should be called before the
+ first call to event_dispatch(). The event_priority_set() function can be
+ used to assign a priority to an event. By default, libevent assigns the
+ middle priority to all events unless their priority is explicitly set.
+
+ @param npriorities the maximum number of priorities
+ @return 0 if successful, or -1 if an error occurred
+ @see event_base_priority_init(), event_priority_set()
+
+ */
+int event_priority_init(int);
+
+
+/**
+ Set the number of different event priorities (threadsafe variant).
+
+ See the description of event_priority_init() for more information.
+
+ @param eb the event_base structure returned by event_init()
+ @param npriorities the maximum number of priorities
+ @return 0 if successful, or -1 if an error occurred
+ @see event_priority_init(), event_priority_set()
+ */
+int event_base_priority_init(struct event_base *, int);
+
+
+/**
+ Assign a priority to an event.
+
+ @param ev an event struct
+ @param priority the new priority to be assigned
+ @return 0 if successful, or -1 if an error occurred
+ @see event_priority_init()
+ */
+int event_priority_set(struct event *, int);
+
+
+/* These functions deal with buffering input and output */
+
+struct evbuffer {
+ u_char *buffer;
+ u_char *orig_buffer;
+
+ size_t misalign;
+ size_t totallen;
+ size_t off;
+
+ void (*cb)(struct evbuffer *, size_t, size_t, void *);
+ void *cbarg;
+};
+
+/* Just for error reporting - use other constants otherwise */
+#define EVBUFFER_READ 0x01
+#define EVBUFFER_WRITE 0x02
+#define EVBUFFER_EOF 0x10
+#define EVBUFFER_ERROR 0x20
+#define EVBUFFER_TIMEOUT 0x40
+
+struct bufferevent;
+typedef void (*evbuffercb)(struct bufferevent *, void *);
+typedef void (*everrorcb)(struct bufferevent *, short what, void *);
+
+struct event_watermark {
+ size_t low;
+ size_t high;
+};
+
+struct bufferevent {
+ struct event_base *ev_base;
+
+ struct event ev_read;
+ struct event ev_write;
+
+ struct evbuffer *input;
+ struct evbuffer *output;
+
+ struct event_watermark wm_read;
+ struct event_watermark wm_write;
+
+ evbuffercb readcb;
+ evbuffercb writecb;
+ everrorcb errorcb;
+ void *cbarg;
+
+ int timeout_read; /* in seconds */
+ int timeout_write; /* in seconds */
+
+ short enabled; /* events that are currently enabled */
+};
+
+
+/**
+ Create a new bufferevent.
+
+ libevent provides an abstraction on top of the regular event callbacks.
+ This abstraction is called a buffered event. A buffered event provides
+ input and output buffers that get filled and drained automatically. The
+ user of a buffered event no longer deals directly with the I/O, but
+ instead is reading from input and writing to output buffers.
+
+ Once initialized, the bufferevent structure can be used repeatedly with
+ bufferevent_enable() and bufferevent_disable().
+
+ When read enabled the bufferevent will try to read from the file descriptor
+ and call the read callback. The write callback is executed whenever the
+ output buffer is drained below the write low watermark, which is 0 by
+ default.
+
+ If multiple bases are in use, bufferevent_base_set() must be called before
+ enabling the bufferevent for the first time.
+
+ @param fd the file descriptor from which data is read and written to.
+ This file descriptor is not allowed to be a pipe(2).
+ @param readcb callback to invoke when there is data to be read, or NULL if
+ no callback is desired
+ @param writecb callback to invoke when the file descriptor is ready for
+ writing, or NULL if no callback is desired
+ @param errorcb callback to invoke when there is an error on the file
+ descriptor
+ @param cbarg an argument that will be supplied to each of the callbacks
+ (readcb, writecb, and errorcb)
+ @return a pointer to a newly allocated bufferevent struct, or NULL if an
+ error occurred
+ @see bufferevent_base_set(), bufferevent_free()
+ */
+struct bufferevent *bufferevent_new(int fd,
+ evbuffercb readcb, evbuffercb writecb, everrorcb errorcb, void *cbarg);
+
+
+/**
+ Assign a bufferevent to a specific event_base.
+
+ @param base an event_base returned by event_init()
+ @param bufev a bufferevent struct returned by bufferevent_new()
+ @return 0 if successful, or -1 if an error occurred
+ @see bufferevent_new()
+ */
+int bufferevent_base_set(struct event_base *base, struct bufferevent *bufev);
+
+
+/**
+ Assign a priority to a bufferevent.
+
+ @param bufev a bufferevent struct
+ @param pri the priority to be assigned
+ @return 0 if successful, or -1 if an error occurred
+ */
+int bufferevent_priority_set(struct bufferevent *bufev, int pri);
+
+
+/**
+ Deallocate the storage associated with a bufferevent structure.
+
+ @param bufev the bufferevent structure to be freed.
+ */
+void bufferevent_free(struct bufferevent *bufev);
+
+
+/**
+ Changes the callbacks for a bufferevent.
+
+ @param bufev the bufferevent object for which to change callbacks
+ @param readcb callback to invoke when there is data to be read, or NULL if
+ no callback is desired
+ @param writecb callback to invoke when the file descriptor is ready for
+ writing, or NULL if no callback is desired
+ @param errorcb callback to invoke when there is an error on the file
+ descriptor
+ @param cbarg an argument that will be supplied to each of the callbacks
+ (readcb, writecb, and errorcb)
+ @see bufferevent_new()
+ */
+void bufferevent_setcb(struct bufferevent *bufev,
+ evbuffercb readcb, evbuffercb writecb, everrorcb errorcb, void *cbarg);
+
+/**
+ Changes the file descriptor on which the bufferevent operates.
+
+ @param bufev the bufferevent object for which to change the file descriptor
+ @param fd the file descriptor to operate on
+*/
+void bufferevent_setfd(struct bufferevent *bufev, int fd);
+
+/**
+ Write data to a bufferevent buffer.
+
+ The bufferevent_write() function can be used to write data to the file
+ descriptor. The data is appended to the output buffer and written to the
+ descriptor automatically as it becomes available for writing.
+
+ @param bufev the bufferevent to be written to
+ @param data a pointer to the data to be written
+ @param size the length of the data, in bytes
+ @return 0 if successful, or -1 if an error occurred
+ @see bufferevent_write_buffer()
+ */
+int bufferevent_write(struct bufferevent *bufev,
+ const void *data, size_t size);
+
+
+/**
+ Write data from an evbuffer to a bufferevent buffer. The evbuffer is
+ being drained as a result.
+
+ @param bufev the bufferevent to be written to
+ @param buf the evbuffer to be written
+ @return 0 if successful, or -1 if an error occurred
+ @see bufferevent_write()
+ */
+int bufferevent_write_buffer(struct bufferevent *bufev, struct evbuffer *buf);
+
+
+/**
+ Read data from a bufferevent buffer.
+
+ The bufferevent_read() function is used to read data from the input buffer.
+
+ @param bufev the bufferevent to be read from
+ @param data pointer to a buffer that will store the data
+ @param size the size of the data buffer, in bytes
+ @return the amount of data read, in bytes.
+ */
+size_t bufferevent_read(struct bufferevent *bufev, void *data, size_t size);
+
+/**
+ Enable a bufferevent.
+
+ @param bufev the bufferevent to be enabled
+ @param event any combination of EV_READ | EV_WRITE.
+ @return 0 if successful, or -1 if an error occurred
+ @see bufferevent_disable()
+ */
+int bufferevent_enable(struct bufferevent *bufev, short event);
+
+
+/**
+ Disable a bufferevent.
+
+ @param bufev the bufferevent to be disabled
+ @param event any combination of EV_READ | EV_WRITE.
+ @return 0 if successful, or -1 if an error occurred
+ @see bufferevent_enable()
+ */
+int bufferevent_disable(struct bufferevent *bufev, short event);
+
+
+/**
+ Set the read and write timeout for a buffered event.
+
+ @param bufev the bufferevent to be modified
+ @param timeout_read the read timeout
+ @param timeout_write the write timeout
+ */
+void bufferevent_settimeout(struct bufferevent *bufev,
+ int timeout_read, int timeout_write);
+
+
+/**
+ Sets the watermarks for read and write events.
+
+ On input, a bufferevent does not invoke the user read callback unless
+ there is at least low watermark data in the buffer. If the read buffer
+ is beyond the high watermark, the buffevent stops reading from the network.
+
+ On output, the user write callback is invoked whenever the buffered data
+ falls below the low watermark.
+
+ @param bufev the bufferevent to be modified
+ @param events EV_READ, EV_WRITE or both
+ @param lowmark the lower watermark to set
+ @param highmark the high watermark to set
+*/
+
+void bufferevent_setwatermark(struct bufferevent *bufev, short events,
+ size_t lowmark, size_t highmark);
+
+#define EVBUFFER_LENGTH(x) (x)->off
+#define EVBUFFER_DATA(x) (x)->buffer
+#define EVBUFFER_INPUT(x) (x)->input
+#define EVBUFFER_OUTPUT(x) (x)->output
+
+
+/**
+ Allocate storage for a new evbuffer.
+
+ @return a pointer to a newly allocated evbuffer struct, or NULL if an error
+ occurred
+ */
+struct evbuffer *evbuffer_new(void);
+
+
+/**
+ Deallocate storage for an evbuffer.
+
+ @param pointer to the evbuffer to be freed
+ */
+void evbuffer_free(struct evbuffer *);
+
+
+/**
+ Expands the available space in an event buffer.
+
+ Expands the available space in the event buffer to at least datlen
+
+ @param buf the event buffer to be expanded
+ @param datlen the new minimum length requirement
+ @return 0 if successful, or -1 if an error occurred
+*/
+int evbuffer_expand(struct evbuffer *, size_t);
+
+
+/**
+ Append data to the end of an evbuffer.
+
+ @param buf the event buffer to be appended to
+ @param data pointer to the beginning of the data buffer
+ @param datlen the number of bytes to be copied from the data buffer
+ */
+int evbuffer_add(struct evbuffer *, const void *, size_t);
+
+
+
+/**
+ Read data from an event buffer and drain the bytes read.
+
+ @param buf the event buffer to be read from
+ @param data the destination buffer to store the result
+ @param datlen the maximum size of the destination buffer
+ @return the number of bytes read
+ */
+int evbuffer_remove(struct evbuffer *, void *, size_t);
+
+
+/**
+ * Read a single line from an event buffer.
+ *
+ * Reads a line terminated by either '\r\n', '\n\r' or '\r' or '\n'.
+ * The returned buffer needs to be freed by the caller.
+ *
+ * @param buffer the evbuffer to read from
+ * @return pointer to a single line, or NULL if an error occurred
+ */
+char *evbuffer_readline(struct evbuffer *);
+
+
+/**
+ Move data from one evbuffer into another evbuffer.
+
+ This is a destructive add. The data from one buffer moves into
+ the other buffer. The destination buffer is expanded as needed.
+
+ @param outbuf the output buffer
+ @param inbuf the input buffer
+ @return 0 if successful, or -1 if an error occurred
+ */
+int evbuffer_add_buffer(struct evbuffer *, struct evbuffer *);
+
+
+/**
+ Append a formatted string to the end of an evbuffer.
+
+ @param buf the evbuffer that will be appended to
+ @param fmt a format string
+ @param ... arguments that will be passed to printf(3)
+ @return The number of bytes added if successful, or -1 if an error occurred.
+ */
+int evbuffer_add_printf(struct evbuffer *, const char *fmt, ...)
+#ifdef __GNUC__
+ __attribute__((format(printf, 2, 3)))
+#endif
+;
+
+
+/**
+ Append a va_list formatted string to the end of an evbuffer.
+
+ @param buf the evbuffer that will be appended to
+ @param fmt a format string
+ @param ap a varargs va_list argument array that will be passed to vprintf(3)
+ @return The number of bytes added if successful, or -1 if an error occurred.
+ */
+int evbuffer_add_vprintf(struct evbuffer *, const char *fmt, va_list ap);
+
+
+/**
+ Remove a specified number of bytes data from the beginning of an evbuffer.
+
+ @param buf the evbuffer to be drained
+ @param len the number of bytes to drain from the beginning of the buffer
+ */
+void evbuffer_drain(struct evbuffer *, size_t);
+
+
+/**
+ Write the contents of an evbuffer to a file descriptor.
+
+ The evbuffer will be drained after the bytes have been successfully written.
+
+ @param buffer the evbuffer to be written and drained
+ @param fd the file descriptor to be written to
+ @return the number of bytes written, or -1 if an error occurred
+ @see evbuffer_read()
+ */
+int evbuffer_write(struct evbuffer *, int);
+
+
+/**
+ Read from a file descriptor and store the result in an evbuffer.
+
+ @param buf the evbuffer to store the result
+ @param fd the file descriptor to read from
+ @param howmuch the number of bytes to be read
+ @return the number of bytes read, or -1 if an error occurred
+ @see evbuffer_write()
+ */
+int evbuffer_read(struct evbuffer *, int, int);
+
+
+/**
+ Find a string within an evbuffer.
+
+ @param buffer the evbuffer to be searched
+ @param what the string to be searched for
+ @param len the length of the search string
+ @return a pointer to the beginning of the search string, or NULL if the search failed.
+ */
+u_char *evbuffer_find(struct evbuffer *, const u_char *, size_t);
+
+/**
+ Set a callback to invoke when the evbuffer is modified.
+
+ @param buffer the evbuffer to be monitored
+ @param cb the callback function to invoke when the evbuffer is modified
+ @param cbarg an argument to be provided to the callback function
+ */
+void evbuffer_setcb(struct evbuffer *, void (*)(struct evbuffer *, size_t, size_t, void *), void *);
+
+/*
+ * Marshaling tagged data - We assume that all tags are inserted in their
+ * numeric order - so that unknown tags will always be higher than the
+ * known ones - and we can just ignore the end of an event buffer.
+ */
+
+void evtag_init(void);
+
+void evtag_marshal(struct evbuffer *evbuf, ev_uint32_t tag, const void *data,
+ ev_uint32_t len);
+
+/**
+ Encode an integer and store it in an evbuffer.
+
+ We encode integer's by nibbles; the first nibble contains the number
+ of significant nibbles - 1; this allows us to encode up to 64-bit
+ integers. This function is byte-order independent.
+
+ @param evbuf evbuffer to store the encoded number
+ @param number a 32-bit integer
+ */
+void encode_int(struct evbuffer *evbuf, ev_uint32_t number);
+
+void evtag_marshal_int(struct evbuffer *evbuf, ev_uint32_t tag,
+ ev_uint32_t integer);
+
+void evtag_marshal_string(struct evbuffer *buf, ev_uint32_t tag,
+ const char *string);
+
+void evtag_marshal_timeval(struct evbuffer *evbuf, ev_uint32_t tag,
+ struct timeval *tv);
+
+int evtag_unmarshal(struct evbuffer *src, ev_uint32_t *ptag,
+ struct evbuffer *dst);
+int evtag_peek(struct evbuffer *evbuf, ev_uint32_t *ptag);
+int evtag_peek_length(struct evbuffer *evbuf, ev_uint32_t *plength);
+int evtag_payload_length(struct evbuffer *evbuf, ev_uint32_t *plength);
+int evtag_consume(struct evbuffer *evbuf);
+
+int evtag_unmarshal_int(struct evbuffer *evbuf, ev_uint32_t need_tag,
+ ev_uint32_t *pinteger);
+
+int evtag_unmarshal_fixed(struct evbuffer *src, ev_uint32_t need_tag,
+ void *data, size_t len);
+
+int evtag_unmarshal_string(struct evbuffer *evbuf, ev_uint32_t need_tag,
+ char **pstring);
+
+int evtag_unmarshal_timeval(struct evbuffer *evbuf, ev_uint32_t need_tag,
+ struct timeval *ptv);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EVENT_H_ */
diff --git a/libevent/event_rpcgen.py b/libevent/event_rpcgen.py
new file mode 100644
index 00000000000..5503ff8a5c3
--- /dev/null
+++ b/libevent/event_rpcgen.py
@@ -0,0 +1,1417 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2005 Niels Provos <provos@citi.umich.edu>
+# All rights reserved.
+#
+# Generates marshaling code based on libevent.
+
+import sys
+import re
+
+#
+_NAME = "event_rpcgen.py"
+_VERSION = "0.1"
+_STRUCT_RE = '[a-z][a-z_0-9]*'
+
+# Globals
+line_count = 0
+
+white = re.compile(r'^\s+')
+cppcomment = re.compile(r'\/\/.*$')
+headerdirect = []
+cppdirect = []
+
+# Holds everything that makes a struct
+class Struct:
+ def __init__(self, name):
+ self._name = name
+ self._entries = []
+ self._tags = {}
+ print >>sys.stderr, ' Created struct: %s' % name
+
+ def AddEntry(self, entry):
+ if self._tags.has_key(entry.Tag()):
+ print >>sys.stderr, ( 'Entry "%s" duplicates tag number '
+ '%d from "%s" around line %d' ) % (
+ entry.Name(), entry.Tag(),
+ self._tags[entry.Tag()], line_count)
+ sys.exit(1)
+ self._entries.append(entry)
+ self._tags[entry.Tag()] = entry.Name()
+ print >>sys.stderr, ' Added entry: %s' % entry.Name()
+
+ def Name(self):
+ return self._name
+
+ def EntryTagName(self, entry):
+ """Creates the name inside an enumeration for distinguishing data
+ types."""
+ name = "%s_%s" % (self._name, entry.Name())
+ return name.upper()
+
+ def PrintIdented(self, file, ident, code):
+ """Takes an array, add indentation to each entry and prints it."""
+ for entry in code:
+ print >>file, '%s%s' % (ident, entry)
+
+ def PrintTags(self, file):
+ """Prints the tag definitions for a structure."""
+ print >>file, '/* Tag definition for %s */' % self._name
+ print >>file, 'enum %s_ {' % self._name.lower()
+ for entry in self._entries:
+ print >>file, ' %s=%d,' % (self.EntryTagName(entry),
+ entry.Tag())
+ print >>file, ' %s_MAX_TAGS' % (self._name.upper())
+ print >>file, '};\n'
+
+ def PrintForwardDeclaration(self, file):
+ print >>file, 'struct %s;' % self._name
+
+ def PrintDeclaration(self, file):
+ print >>file, '/* Structure declaration for %s */' % self._name
+ print >>file, 'struct %s_access_ {' % self._name
+ for entry in self._entries:
+ dcl = entry.AssignDeclaration('(*%s_assign)' % entry.Name())
+ dcl.extend(
+ entry.GetDeclaration('(*%s_get)' % entry.Name()))
+ if entry.Array():
+ dcl.extend(
+ entry.AddDeclaration('(*%s_add)' % entry.Name()))
+ self.PrintIdented(file, ' ', dcl)
+ print >>file, '};\n'
+
+ print >>file, 'struct %s {' % self._name
+ print >>file, ' struct %s_access_ *base;\n' % self._name
+ for entry in self._entries:
+ dcl = entry.Declaration()
+ self.PrintIdented(file, ' ', dcl)
+ print >>file, ''
+ for entry in self._entries:
+ print >>file, ' uint8_t %s_set;' % entry.Name()
+ print >>file, '};\n'
+
+ print >>file, \
+"""struct %(name)s *%(name)s_new(void);
+void %(name)s_free(struct %(name)s *);
+void %(name)s_clear(struct %(name)s *);
+void %(name)s_marshal(struct evbuffer *, const struct %(name)s *);
+int %(name)s_unmarshal(struct %(name)s *, struct evbuffer *);
+int %(name)s_complete(struct %(name)s *);
+void evtag_marshal_%(name)s(struct evbuffer *, uint32_t,
+ const struct %(name)s *);
+int evtag_unmarshal_%(name)s(struct evbuffer *, uint32_t,
+ struct %(name)s *);""" % { 'name' : self._name }
+
+
+ # Write a setting function of every variable
+ for entry in self._entries:
+ self.PrintIdented(file, '', entry.AssignDeclaration(
+ entry.AssignFuncName()))
+ self.PrintIdented(file, '', entry.GetDeclaration(
+ entry.GetFuncName()))
+ if entry.Array():
+ self.PrintIdented(file, '', entry.AddDeclaration(
+ entry.AddFuncName()))
+
+ print >>file, '/* --- %s done --- */\n' % self._name
+
+ def PrintCode(self, file):
+ print >>file, ('/*\n'
+ ' * Implementation of %s\n'
+ ' */\n') % self._name
+
+ print >>file, \
+ 'static struct %(name)s_access_ __%(name)s_base = {' % \
+ { 'name' : self._name }
+ for entry in self._entries:
+ self.PrintIdented(file, ' ', entry.CodeBase())
+ print >>file, '};\n'
+
+ # Creation
+ print >>file, (
+ 'struct %(name)s *\n'
+ '%(name)s_new(void)\n'
+ '{\n'
+ ' struct %(name)s *tmp;\n'
+ ' if ((tmp = malloc(sizeof(struct %(name)s))) == NULL) {\n'
+ ' event_warn("%%s: malloc", __func__);\n'
+ ' return (NULL);\n'
+ ' }\n'
+ ' tmp->base = &__%(name)s_base;\n') % { 'name' : self._name }
+
+ for entry in self._entries:
+ self.PrintIdented(file, ' ', entry.CodeNew('tmp'))
+ print >>file, ' tmp->%s_set = 0;\n' % entry.Name()
+
+ print >>file, (
+ ' return (tmp);\n'
+ '}\n')
+
+ # Adding
+ for entry in self._entries:
+ if entry.Array():
+ self.PrintIdented(file, '', entry.CodeAdd())
+ print >>file, ''
+
+ # Assigning
+ for entry in self._entries:
+ self.PrintIdented(file, '', entry.CodeAssign())
+ print >>file, ''
+
+ # Getting
+ for entry in self._entries:
+ self.PrintIdented(file, '', entry.CodeGet())
+ print >>file, ''
+
+ # Clearing
+ print >>file, ( 'void\n'
+ '%(name)s_clear(struct %(name)s *tmp)\n'
+ '{'
+ ) % { 'name' : self._name }
+ for entry in self._entries:
+ self.PrintIdented(file, ' ', entry.CodeClear('tmp'))
+
+ print >>file, '}\n'
+
+ # Freeing
+ print >>file, ( 'void\n'
+ '%(name)s_free(struct %(name)s *tmp)\n'
+ '{'
+ ) % { 'name' : self._name }
+
+ for entry in self._entries:
+ self.PrintIdented(file, ' ', entry.CodeFree('tmp'))
+
+ print >>file, (' free(tmp);\n'
+ '}\n')
+
+ # Marshaling
+ print >>file, ('void\n'
+ '%(name)s_marshal(struct evbuffer *evbuf, '
+ 'const struct %(name)s *tmp)'
+ '{') % { 'name' : self._name }
+ for entry in self._entries:
+ indent = ' '
+ # Optional entries do not have to be set
+ if entry.Optional():
+ indent += ' '
+ print >>file, ' if (tmp->%s_set) {' % entry.Name()
+ self.PrintIdented(
+ file, indent,
+ entry.CodeMarshal('evbuf', self.EntryTagName(entry), 'tmp'))
+ if entry.Optional():
+ print >>file, ' }'
+
+ print >>file, '}\n'
+
+ # Unmarshaling
+ print >>file, ('int\n'
+ '%(name)s_unmarshal(struct %(name)s *tmp, '
+ ' struct evbuffer *evbuf)\n'
+ '{\n'
+ ' uint32_t tag;\n'
+ ' while (EVBUFFER_LENGTH(evbuf) > 0) {\n'
+ ' if (evtag_peek(evbuf, &tag) == -1)\n'
+ ' return (-1);\n'
+ ' switch (tag) {\n'
+ ) % { 'name' : self._name }
+ for entry in self._entries:
+ print >>file, ' case %s:\n' % self.EntryTagName(entry)
+ if not entry.Array():
+ print >>file, (
+ ' if (tmp->%s_set)\n'
+ ' return (-1);'
+ ) % (entry.Name())
+
+ self.PrintIdented(
+ file, ' ',
+ entry.CodeUnmarshal('evbuf',
+ self.EntryTagName(entry), 'tmp'))
+
+ print >>file, ( ' tmp->%s_set = 1;\n' % entry.Name() +
+ ' break;\n' )
+ print >>file, ( ' default:\n'
+ ' return -1;\n'
+ ' }\n'
+ ' }\n' )
+ # Check if it was decoded completely
+ print >>file, ( ' if (%(name)s_complete(tmp) == -1)\n'
+ ' return (-1);'
+ ) % { 'name' : self._name }
+
+ # Successfully decoded
+ print >>file, ( ' return (0);\n'
+ '}\n')
+
+ # Checking if a structure has all the required data
+ print >>file, (
+ 'int\n'
+ '%(name)s_complete(struct %(name)s *msg)\n'
+ '{' ) % { 'name' : self._name }
+ for entry in self._entries:
+ self.PrintIdented(
+ file, ' ',
+ entry.CodeComplete('msg'))
+ print >>file, (
+ ' return (0);\n'
+ '}\n' )
+
+ # Complete message unmarshaling
+ print >>file, (
+ 'int\n'
+ 'evtag_unmarshal_%(name)s(struct evbuffer *evbuf, '
+ 'uint32_t need_tag, struct %(name)s *msg)\n'
+ '{\n'
+ ' uint32_t tag;\n'
+ ' int res = -1;\n'
+ '\n'
+ ' struct evbuffer *tmp = evbuffer_new();\n'
+ '\n'
+ ' if (evtag_unmarshal(evbuf, &tag, tmp) == -1'
+ ' || tag != need_tag)\n'
+ ' goto error;\n'
+ '\n'
+ ' if (%(name)s_unmarshal(msg, tmp) == -1)\n'
+ ' goto error;\n'
+ '\n'
+ ' res = 0;\n'
+ '\n'
+ ' error:\n'
+ ' evbuffer_free(tmp);\n'
+ ' return (res);\n'
+ '}\n' ) % { 'name' : self._name }
+
+ # Complete message marshaling
+ print >>file, (
+ 'void\n'
+ 'evtag_marshal_%(name)s(struct evbuffer *evbuf, uint32_t tag, '
+ 'const struct %(name)s *msg)\n'
+ '{\n'
+ ' struct evbuffer *_buf = evbuffer_new();\n'
+ ' assert(_buf != NULL);\n'
+ ' evbuffer_drain(_buf, -1);\n'
+ ' %(name)s_marshal(_buf, msg);\n'
+ ' evtag_marshal(evbuf, tag, EVBUFFER_DATA(_buf), '
+ 'EVBUFFER_LENGTH(_buf));\n'
+ ' evbuffer_free(_buf);\n'
+ '}\n' ) % { 'name' : self._name }
+
+class Entry:
+ def __init__(self, type, name, tag):
+ self._type = type
+ self._name = name
+ self._tag = int(tag)
+ self._ctype = type
+ self._optional = 0
+ self._can_be_array = 0
+ self._array = 0
+ self._line_count = -1
+ self._struct = None
+ self._refname = None
+
+ def GetTranslation(self):
+ return { "parent_name" : self._struct.Name(),
+ "name" : self._name,
+ "ctype" : self._ctype,
+ "refname" : self._refname
+ }
+
+ def SetStruct(self, struct):
+ self._struct = struct
+
+ def LineCount(self):
+ assert self._line_count != -1
+ return self._line_count
+
+ def SetLineCount(self, number):
+ self._line_count = number
+
+ def Array(self):
+ return self._array
+
+ def Optional(self):
+ return self._optional
+
+ def Tag(self):
+ return self._tag
+
+ def Name(self):
+ return self._name
+
+ def Type(self):
+ return self._type
+
+ def MakeArray(self, yes=1):
+ self._array = yes
+
+ def MakeOptional(self):
+ self._optional = 1
+
+ def GetFuncName(self):
+ return '%s_%s_get' % (self._struct.Name(), self._name)
+
+ def GetDeclaration(self, funcname):
+ code = [ 'int %s(struct %s *, %s *);' % (
+ funcname, self._struct.Name(), self._ctype ) ]
+ return code
+
+ def CodeGet(self):
+ code = (
+ 'int',
+ '%(parent_name)s_%(name)s_get(struct %(parent_name)s *msg, '
+ '%(ctype)s *value)',
+ '{',
+ ' if (msg->%(name)s_set != 1)',
+ ' return (-1);',
+ ' *value = msg->%(name)s_data;',
+ ' return (0);',
+ '}' )
+ code = '\n'.join(code)
+ code = code % self.GetTranslation()
+ return code.split('\n')
+
+ def AssignFuncName(self):
+ return '%s_%s_assign' % (self._struct.Name(), self._name)
+
+ def AddFuncName(self):
+ return '%s_%s_add' % (self._struct.Name(), self._name)
+
+ def AssignDeclaration(self, funcname):
+ code = [ 'int %s(struct %s *, const %s);' % (
+ funcname, self._struct.Name(), self._ctype ) ]
+ return code
+
+ def CodeAssign(self):
+ code = [ 'int',
+ '%(parent_name)s_%(name)s_assign(struct %(parent_name)s *msg,'
+ ' const %(ctype)s value)',
+ '{',
+ ' msg->%(name)s_set = 1;',
+ ' msg->%(name)s_data = value;',
+ ' return (0);',
+ '}' ]
+ code = '\n'.join(code)
+ code = code % self.GetTranslation()
+ return code.split('\n')
+
+ def CodeClear(self, structname):
+ code = [ '%s->%s_set = 0;' % (structname, self.Name()) ]
+
+ return code
+
+ def CodeComplete(self, structname):
+ if self.Optional():
+ return []
+
+ code = [ 'if (!%s->%s_set)' % (structname, self.Name()),
+ ' return (-1);' ]
+
+ return code
+
+ def CodeFree(self, name):
+ return []
+
+ def CodeBase(self):
+ code = [
+ '%(parent_name)s_%(name)s_assign,',
+ '%(parent_name)s_%(name)s_get,'
+ ]
+ if self.Array():
+ code.append('%(parent_name)s_%(name)s_add,')
+
+ code = '\n'.join(code)
+ code = code % self.GetTranslation()
+ return code.split('\n')
+
+ def Verify(self):
+ if self.Array() and not self._can_be_array:
+ print >>sys.stderr, (
+ 'Entry "%s" cannot be created as an array '
+ 'around line %d' ) % (self._name, self.LineCount())
+ sys.exit(1)
+ if not self._struct:
+ print >>sys.stderr, (
+ 'Entry "%s" does not know which struct it belongs to '
+ 'around line %d' ) % (self._name, self.LineCount())
+ sys.exit(1)
+ if self._optional and self._array:
+ print >>sys.stderr, ( 'Entry "%s" has illegal combination of '
+ 'optional and array around line %d' ) % (
+ self._name, self.LineCount() )
+ sys.exit(1)
+
+class EntryBytes(Entry):
+ def __init__(self, type, name, tag, length):
+ # Init base class
+ Entry.__init__(self, type, name, tag)
+
+ self._length = length
+ self._ctype = 'uint8_t'
+
+ def GetDeclaration(self, funcname):
+ code = [ 'int %s(struct %s *, %s **);' % (
+ funcname, self._struct.Name(), self._ctype ) ]
+ return code
+
+ def AssignDeclaration(self, funcname):
+ code = [ 'int %s(struct %s *, const %s *);' % (
+ funcname, self._struct.Name(), self._ctype ) ]
+ return code
+
+ def Declaration(self):
+ dcl = ['uint8_t %s_data[%s];' % (self._name, self._length)]
+
+ return dcl
+
+ def CodeGet(self):
+ name = self._name
+ code = [ 'int',
+ '%s_%s_get(struct %s *msg, %s **value)' % (
+ self._struct.Name(), name,
+ self._struct.Name(), self._ctype),
+ '{',
+ ' if (msg->%s_set != 1)' % name,
+ ' return (-1);',
+ ' *value = msg->%s_data;' % name,
+ ' return (0);',
+ '}' ]
+ return code
+
+ def CodeAssign(self):
+ name = self._name
+ code = [ 'int',
+ '%s_%s_assign(struct %s *msg, const %s *value)' % (
+ self._struct.Name(), name,
+ self._struct.Name(), self._ctype),
+ '{',
+ ' msg->%s_set = 1;' % name,
+ ' memcpy(msg->%s_data, value, %s);' % (
+ name, self._length),
+ ' return (0);',
+ '}' ]
+ return code
+
+ def CodeUnmarshal(self, buf, tag_name, var_name):
+ code = [ 'if (evtag_unmarshal_fixed(%s, %s, ' % (buf, tag_name) +
+ '%s->%s_data, ' % (var_name, self._name) +
+ 'sizeof(%s->%s_data)) == -1) {' % (
+ var_name, self._name),
+ ' event_warnx("%%s: failed to unmarshal %s", __func__);' % (
+ self._name ),
+ ' return (-1);',
+ '}'
+ ]
+ return code
+
+ def CodeMarshal(self, buf, tag_name, var_name):
+ code = ['evtag_marshal(%s, %s, %s->%s_data, sizeof(%s->%s_data));' % (
+ buf, tag_name, var_name, self._name, var_name, self._name )]
+ return code
+
+ def CodeClear(self, structname):
+ code = [ '%s->%s_set = 0;' % (structname, self.Name()),
+ 'memset(%s->%s_data, 0, sizeof(%s->%s_data));' % (
+ structname, self._name, structname, self._name)]
+
+ return code
+
+ def CodeNew(self, name):
+ code = ['memset(%s->%s_data, 0, sizeof(%s->%s_data));' % (
+ name, self._name, name, self._name)]
+ return code
+
+ def Verify(self):
+ if not self._length:
+ print >>sys.stderr, 'Entry "%s" needs a length around line %d' % (
+ self._name, self.LineCount() )
+ sys.exit(1)
+
+ Entry.Verify(self)
+
+class EntryInt(Entry):
+ def __init__(self, type, name, tag):
+ # Init base class
+ Entry.__init__(self, type, name, tag)
+
+ self._ctype = 'uint32_t'
+
+ def CodeUnmarshal(self, buf, tag_name, var_name):
+ code = ['if (evtag_unmarshal_int(%s, %s, &%s->%s_data) == -1) {' % (
+ buf, tag_name, var_name, self._name),
+ ' event_warnx("%%s: failed to unmarshal %s", __func__);' % (
+ self._name ),
+ ' return (-1);',
+ '}' ]
+ return code
+
+ def CodeMarshal(self, buf, tag_name, var_name):
+ code = ['evtag_marshal_int(%s, %s, %s->%s_data);' % (
+ buf, tag_name, var_name, self._name)]
+ return code
+
+ def Declaration(self):
+ dcl = ['uint32_t %s_data;' % self._name]
+
+ return dcl
+
+ def CodeNew(self, name):
+ code = ['%s->%s_data = 0;' % (name, self._name)]
+ return code
+
+class EntryString(Entry):
+ def __init__(self, type, name, tag):
+ # Init base class
+ Entry.__init__(self, type, name, tag)
+
+ self._ctype = 'char *'
+
+ def CodeAssign(self):
+ name = self._name
+ code = """int
+%(parent_name)s_%(name)s_assign(struct %(parent_name)s *msg,
+ const %(ctype)s value)
+{
+ if (msg->%(name)s_data != NULL)
+ free(msg->%(name)s_data);
+ if ((msg->%(name)s_data = strdup(value)) == NULL)
+ return (-1);
+ msg->%(name)s_set = 1;
+ return (0);
+}""" % self.GetTranslation()
+
+ return code.split('\n')
+
+ def CodeUnmarshal(self, buf, tag_name, var_name):
+ code = ['if (evtag_unmarshal_string(%s, %s, &%s->%s_data) == -1) {' % (
+ buf, tag_name, var_name, self._name),
+ ' event_warnx("%%s: failed to unmarshal %s", __func__);' % (
+ self._name ),
+ ' return (-1);',
+ '}'
+ ]
+ return code
+
+ def CodeMarshal(self, buf, tag_name, var_name):
+ code = ['evtag_marshal_string(%s, %s, %s->%s_data);' % (
+ buf, tag_name, var_name, self._name)]
+ return code
+
+ def CodeClear(self, structname):
+ code = [ 'if (%s->%s_set == 1) {' % (structname, self.Name()),
+ ' free (%s->%s_data);' % (structname, self.Name()),
+ ' %s->%s_data = NULL;' % (structname, self.Name()),
+ ' %s->%s_set = 0;' % (structname, self.Name()),
+ '}'
+ ]
+
+ return code
+
+ def CodeNew(self, name):
+ code = ['%s->%s_data = NULL;' % (name, self._name)]
+ return code
+
+ def CodeFree(self, name):
+ code = ['if (%s->%s_data != NULL)' % (name, self._name),
+ ' free (%s->%s_data); ' % (name, self._name)]
+
+ return code
+
+ def Declaration(self):
+ dcl = ['char *%s_data;' % self._name]
+
+ return dcl
+
+class EntryStruct(Entry):
+ def __init__(self, type, name, tag, refname):
+ # Init base class
+ Entry.__init__(self, type, name, tag)
+
+ self._can_be_array = 1
+ self._refname = refname
+ self._ctype = 'struct %s*' % refname
+
+ def CodeGet(self):
+ name = self._name
+ code = [ 'int',
+ '%s_%s_get(struct %s *msg, %s *value)' % (
+ self._struct.Name(), name,
+ self._struct.Name(), self._ctype),
+ '{',
+ ' if (msg->%s_set != 1) {' % name,
+ ' msg->%s_data = %s_new();' % (name, self._refname),
+ ' if (msg->%s_data == NULL)' % name,
+ ' return (-1);',
+ ' msg->%s_set = 1;' % name,
+ ' }',
+ ' *value = msg->%s_data;' % name,
+ ' return (0);',
+ '}' ]
+ return code
+
+ def CodeAssign(self):
+ name = self._name
+ code = """int
+%(parent_name)s_%(name)s_assign(struct %(parent_name)s *msg,
+ const %(ctype)s value)
+{
+ struct evbuffer *tmp = NULL;
+ if (msg->%(name)s_set) {
+ %(refname)s_clear(msg->%(name)s_data);
+ msg->%(name)s_set = 0;
+ } else {
+ msg->%(name)s_data = %(refname)s_new();
+ if (msg->%(name)s_data == NULL) {
+ event_warn("%%s: %(refname)s_new()", __func__);
+ goto error;
+ }
+ }
+ if ((tmp = evbuffer_new()) == NULL) {
+ event_warn("%%s: evbuffer_new()", __func__);
+ goto error;
+ }
+ %(refname)s_marshal(tmp, value);
+ if (%(refname)s_unmarshal(msg->%(name)s_data, tmp) == -1) {
+ event_warnx("%%s: %(refname)s_unmarshal", __func__);
+ goto error;
+ }
+ msg->%(name)s_set = 1;
+ evbuffer_free(tmp);
+ return (0);
+ error:
+ if (tmp != NULL)
+ evbuffer_free(tmp);
+ if (msg->%(name)s_data != NULL) {
+ %(refname)s_free(msg->%(name)s_data);
+ msg->%(name)s_data = NULL;
+ }
+ return (-1);
+}""" % self.GetTranslation()
+ return code.split('\n')
+
+ def CodeComplete(self, structname):
+ if self.Optional():
+ code = [ 'if (%s->%s_set && %s_complete(%s->%s_data) == -1)' % (
+ structname, self.Name(),
+ self._refname, structname, self.Name()),
+ ' return (-1);' ]
+ else:
+ code = [ 'if (%s_complete(%s->%s_data) == -1)' % (
+ self._refname, structname, self.Name()),
+ ' return (-1);' ]
+
+ return code
+
+ def CodeUnmarshal(self, buf, tag_name, var_name):
+ code = ['%s->%s_data = %s_new();' % (
+ var_name, self._name, self._refname),
+ 'if (%s->%s_data == NULL)' % (var_name, self._name),
+ ' return (-1);',
+ 'if (evtag_unmarshal_%s(%s, %s, %s->%s_data) == -1) {' % (
+ self._refname, buf, tag_name, var_name, self._name),
+ ' event_warnx("%%s: failed to unmarshal %s", __func__);' % (
+ self._name ),
+ ' return (-1);',
+ '}'
+ ]
+ return code
+
+ def CodeMarshal(self, buf, tag_name, var_name):
+ code = ['evtag_marshal_%s(%s, %s, %s->%s_data);' % (
+ self._refname, buf, tag_name, var_name, self._name)]
+ return code
+
+ def CodeClear(self, structname):
+ code = [ 'if (%s->%s_set == 1) {' % (structname, self.Name()),
+ ' %s_free(%s->%s_data);' % (
+ self._refname, structname, self.Name()),
+ ' %s->%s_data = NULL;' % (structname, self.Name()),
+ ' %s->%s_set = 0;' % (structname, self.Name()),
+ '}'
+ ]
+
+ return code
+
+ def CodeNew(self, name):
+ code = ['%s->%s_data = NULL;' % (name, self._name)]
+ return code
+
+ def CodeFree(self, name):
+ code = ['if (%s->%s_data != NULL)' % (name, self._name),
+ ' %s_free(%s->%s_data); ' % (
+ self._refname, name, self._name)]
+
+ return code
+
+ def Declaration(self):
+ dcl = ['%s %s_data;' % (self._ctype, self._name)]
+
+ return dcl
+
+class EntryVarBytes(Entry):
+ def __init__(self, type, name, tag):
+ # Init base class
+ Entry.__init__(self, type, name, tag)
+
+ self._ctype = 'uint8_t *'
+
+ def GetDeclaration(self, funcname):
+ code = [ 'int %s(struct %s *, %s *, uint32_t *);' % (
+ funcname, self._struct.Name(), self._ctype ) ]
+ return code
+
+ def AssignDeclaration(self, funcname):
+ code = [ 'int %s(struct %s *, const %s, uint32_t);' % (
+ funcname, self._struct.Name(), self._ctype ) ]
+ return code
+
+ def CodeAssign(self):
+ name = self._name
+ code = [ 'int',
+ '%s_%s_assign(struct %s *msg, '
+ 'const %s value, uint32_t len)' % (
+ self._struct.Name(), name,
+ self._struct.Name(), self._ctype),
+ '{',
+ ' if (msg->%s_data != NULL)' % name,
+ ' free (msg->%s_data);' % name,
+ ' msg->%s_data = malloc(len);' % name,
+ ' if (msg->%s_data == NULL)' % name,
+ ' return (-1);',
+ ' msg->%s_set = 1;' % name,
+ ' msg->%s_length = len;' % name,
+ ' memcpy(msg->%s_data, value, len);' % name,
+ ' return (0);',
+ '}' ]
+ return code
+
+ def CodeGet(self):
+ name = self._name
+ code = [ 'int',
+ '%s_%s_get(struct %s *msg, %s *value, uint32_t *plen)' % (
+ self._struct.Name(), name,
+ self._struct.Name(), self._ctype),
+ '{',
+ ' if (msg->%s_set != 1)' % name,
+ ' return (-1);',
+ ' *value = msg->%s_data;' % name,
+ ' *plen = msg->%s_length;' % name,
+ ' return (0);',
+ '}' ]
+ return code
+
+ def CodeUnmarshal(self, buf, tag_name, var_name):
+ code = ['if (evtag_payload_length(%s, &%s->%s_length) == -1)' % (
+ buf, var_name, self._name),
+ ' return (-1);',
+ # We do not want DoS opportunities
+ 'if (%s->%s_length > EVBUFFER_LENGTH(%s))' % (
+ var_name, self._name, buf),
+ ' return (-1);',
+ 'if ((%s->%s_data = malloc(%s->%s_length)) == NULL)' % (
+ var_name, self._name, var_name, self._name),
+ ' return (-1);',
+ 'if (evtag_unmarshal_fixed(%s, %s, %s->%s_data, '
+ '%s->%s_length) == -1) {' % (
+ buf, tag_name, var_name, self._name, var_name, self._name),
+ ' event_warnx("%%s: failed to unmarshal %s", __func__);' % (
+ self._name ),
+ ' return (-1);',
+ '}'
+ ]
+ return code
+
+ def CodeMarshal(self, buf, tag_name, var_name):
+ code = ['evtag_marshal(%s, %s, %s->%s_data, %s->%s_length);' % (
+ buf, tag_name, var_name, self._name, var_name, self._name)]
+ return code
+
+ def CodeClear(self, structname):
+ code = [ 'if (%s->%s_set == 1) {' % (structname, self.Name()),
+ ' free (%s->%s_data);' % (structname, self.Name()),
+ ' %s->%s_data = NULL;' % (structname, self.Name()),
+ ' %s->%s_length = 0;' % (structname, self.Name()),
+ ' %s->%s_set = 0;' % (structname, self.Name()),
+ '}'
+ ]
+
+ return code
+
+ def CodeNew(self, name):
+ code = ['%s->%s_data = NULL;' % (name, self._name),
+ '%s->%s_length = 0;' % (name, self._name) ]
+ return code
+
+ def CodeFree(self, name):
+ code = ['if (%s->%s_data != NULL)' % (name, self._name),
+ ' free (%s->%s_data); ' % (name, self._name)]
+
+ return code
+
+ def Declaration(self):
+ dcl = ['uint8_t *%s_data;' % self._name,
+ 'uint32_t %s_length;' % self._name]
+
+ return dcl
+
+class EntryArray(Entry):
+ def __init__(self, entry):
+ # Init base class
+ Entry.__init__(self, entry._type, entry._name, entry._tag)
+
+ self._entry = entry
+ self._refname = entry._refname
+ self._ctype = 'struct %s *' % self._refname
+
+ def GetDeclaration(self, funcname):
+ """Allows direct access to elements of the array."""
+ translate = self.GetTranslation()
+ translate["funcname"] = funcname
+ code = [
+ 'int %(funcname)s(struct %(parent_name)s *, int, %(ctype)s *);' %
+ translate ]
+ return code
+
+ def AssignDeclaration(self, funcname):
+ code = [ 'int %s(struct %s *, int, const %s);' % (
+ funcname, self._struct.Name(), self._ctype ) ]
+ return code
+
+ def AddDeclaration(self, funcname):
+ code = [ '%s %s(struct %s *);' % (
+ self._ctype, funcname, self._struct.Name() ) ]
+ return code
+
+ def CodeGet(self):
+ code = """int
+%(parent_name)s_%(name)s_get(struct %(parent_name)s *msg, int offset,
+ %(ctype)s *value)
+{
+ if (!msg->%(name)s_set || offset < 0 || offset >= msg->%(name)s_length)
+ return (-1);
+ *value = msg->%(name)s_data[offset];
+ return (0);
+}""" % self.GetTranslation()
+
+ return code.split('\n')
+
+ def CodeAssign(self):
+ code = """int
+%(parent_name)s_%(name)s_assign(struct %(parent_name)s *msg, int off,
+ const %(ctype)s value)
+{
+ struct evbuffer *tmp = NULL;
+ if (!msg->%(name)s_set || off < 0 || off >= msg->%(name)s_length)
+ return (-1);
+ %(refname)s_clear(msg->%(name)s_data[off]);
+ if ((tmp = evbuffer_new()) == NULL) {
+ event_warn("%%s: evbuffer_new()", __func__);
+ goto error;
+ }
+ %(refname)s_marshal(tmp, value);
+ if (%(refname)s_unmarshal(msg->%(name)s_data[off], tmp) == -1) {
+ event_warnx("%%s: %(refname)s_unmarshal", __func__);
+ goto error;
+ }
+ evbuffer_free(tmp);
+ return (0);
+error:
+ if (tmp != NULL)
+ evbuffer_free(tmp);
+ %(refname)s_clear(msg->%(name)s_data[off]);
+ return (-1);
+}""" % self.GetTranslation()
+
+ return code.split('\n')
+
+ def CodeAdd(self):
+ code = \
+"""%(ctype)s
+%(parent_name)s_%(name)s_add(struct %(parent_name)s *msg)
+{
+ if (++msg->%(name)s_length >= msg->%(name)s_num_allocated) {
+ int tobe_allocated = msg->%(name)s_num_allocated;
+ %(ctype)s* new_data = NULL;
+ tobe_allocated = !tobe_allocated ? 1 : tobe_allocated << 1;
+ new_data = (%(ctype)s*) realloc(msg->%(name)s_data,
+ tobe_allocated * sizeof(%(ctype)s));
+ if (new_data == NULL)
+ goto error;
+ msg->%(name)s_data = new_data;
+ msg->%(name)s_num_allocated = tobe_allocated;
+ }
+ msg->%(name)s_data[msg->%(name)s_length - 1] = %(refname)s_new();
+ if (msg->%(name)s_data[msg->%(name)s_length - 1] == NULL)
+ goto error;
+ msg->%(name)s_set = 1;
+ return (msg->%(name)s_data[msg->%(name)s_length - 1]);
+error:
+ --msg->%(name)s_length;
+ return (NULL);
+}
+ """ % self.GetTranslation()
+
+ return code.split('\n')
+
+ def CodeComplete(self, structname):
+ code = []
+ translate = self.GetTranslation()
+
+ if self.Optional():
+ code.append( 'if (%(structname)s->%(name)s_set)' % translate)
+
+ translate["structname"] = structname
+ tmp = """{
+ int i;
+ for (i = 0; i < %(structname)s->%(name)s_length; ++i) {
+ if (%(refname)s_complete(%(structname)s->%(name)s_data[i]) == -1)
+ return (-1);
+ }
+}""" % translate
+ code.extend(tmp.split('\n'))
+
+ return code
+
+ def CodeUnmarshal(self, buf, tag_name, var_name):
+ translate = self.GetTranslation()
+ translate["var_name"] = var_name
+ translate["buf"] = buf
+ translate["tag_name"] = tag_name
+ code = """if (%(parent_name)s_%(name)s_add(%(var_name)s) == NULL)
+ return (-1);
+if (evtag_unmarshal_%(refname)s(%(buf)s, %(tag_name)s,
+ %(var_name)s->%(name)s_data[%(var_name)s->%(name)s_length - 1]) == -1) {
+ --%(var_name)s->%(name)s_length;
+ event_warnx("%%s: failed to unmarshal %(name)s", __func__);
+ return (-1);
+}""" % translate
+
+ return code.split('\n')
+
+ def CodeMarshal(self, buf, tag_name, var_name):
+ code = ['{',
+ ' int i;',
+ ' for (i = 0; i < %s->%s_length; ++i) {' % (
+ var_name, self._name),
+ ' evtag_marshal_%s(%s, %s, %s->%s_data[i]);' % (
+ self._refname, buf, tag_name, var_name, self._name),
+ ' }',
+ '}'
+ ]
+ return code
+
+ def CodeClear(self, structname):
+ code = [ 'if (%s->%s_set == 1) {' % (structname, self.Name()),
+ ' int i;',
+ ' for (i = 0; i < %s->%s_length; ++i) {' % (
+ structname, self.Name()),
+ ' %s_free(%s->%s_data[i]);' % (
+ self._refname, structname, self.Name()),
+ ' }',
+ ' free(%s->%s_data);' % (structname, self.Name()),
+ ' %s->%s_data = NULL;' % (structname, self.Name()),
+ ' %s->%s_set = 0;' % (structname, self.Name()),
+ ' %s->%s_length = 0;' % (structname, self.Name()),
+ ' %s->%s_num_allocated = 0;' % (structname, self.Name()),
+ '}'
+ ]
+
+ return code
+
+ def CodeNew(self, name):
+ code = ['%s->%s_data = NULL;' % (name, self._name),
+ '%s->%s_length = 0;' % (name, self._name),
+ '%s->%s_num_allocated = 0;' % (name, self._name)]
+ return code
+
+ def CodeFree(self, name):
+ code = ['if (%s->%s_data != NULL) {' % (name, self._name),
+ ' int i;',
+ ' for (i = 0; i < %s->%s_length; ++i) {' % (
+ name, self._name),
+ ' %s_free(%s->%s_data[i]); ' % (
+ self._refname, name, self._name),
+ ' %s->%s_data[i] = NULL;' % (name, self._name),
+ ' }',
+ ' free(%s->%s_data);' % (name, self._name),
+ ' %s->%s_data = NULL;' % (name, self._name),
+ ' %s->%s_length = 0;' % (name, self._name),
+ ' %s->%s_num_allocated = 0;' % (name, self._name),
+ '}'
+ ]
+
+ return code
+
+ def Declaration(self):
+ dcl = ['struct %s **%s_data;' % (self._refname, self._name),
+ 'int %s_length;' % self._name,
+ 'int %s_num_allocated;' % self._name ]
+
+ return dcl
+
+def NormalizeLine(line):
+ global white
+ global cppcomment
+
+ line = cppcomment.sub('', line)
+ line = line.strip()
+ line = white.sub(' ', line)
+
+ return line
+
+def ProcessOneEntry(newstruct, entry):
+ optional = 0
+ array = 0
+ entry_type = ''
+ name = ''
+ tag = ''
+ tag_set = None
+ separator = ''
+ fixed_length = ''
+
+ tokens = entry.split(' ')
+ while tokens:
+ token = tokens[0]
+ tokens = tokens[1:]
+
+ if not entry_type:
+ if not optional and token == 'optional':
+ optional = 1
+ continue
+
+ if not array and token == 'array':
+ array = 1
+ continue
+
+ if not entry_type:
+ entry_type = token
+ continue
+
+ if not name:
+ res = re.match(r'^([^\[\]]+)(\[.*\])?$', token)
+ if not res:
+ print >>sys.stderr, 'Cannot parse name: \"%s\" around %d' % (
+ entry, line_count)
+ sys.exit(1)
+ name = res.group(1)
+ fixed_length = res.group(2)
+ if fixed_length:
+ fixed_length = fixed_length[1:-1]
+ continue
+
+ if not separator:
+ separator = token
+ if separator != '=':
+ print >>sys.stderr, 'Expected "=" after name \"%s\" got %s' % (
+ name, token)
+ sys.exit(1)
+ continue
+
+ if not tag_set:
+ tag_set = 1
+ if not re.match(r'^(0x)?[0-9]+$', token):
+ print >>sys.stderr, 'Expected tag number: \"%s\"' % entry
+ sys.exit(1)
+ tag = int(token, 0)
+ continue
+
+ print >>sys.stderr, 'Cannot parse \"%s\"' % entry
+ sys.exit(1)
+
+ if not tag_set:
+ print >>sys.stderr, 'Need tag number: \"%s\"' % entry
+ sys.exit(1)
+
+ # Create the right entry
+ if entry_type == 'bytes':
+ if fixed_length:
+ newentry = EntryBytes(entry_type, name, tag, fixed_length)
+ else:
+ newentry = EntryVarBytes(entry_type, name, tag)
+ elif entry_type == 'int' and not fixed_length:
+ newentry = EntryInt(entry_type, name, tag)
+ elif entry_type == 'string' and not fixed_length:
+ newentry = EntryString(entry_type, name, tag)
+ else:
+ res = re.match(r'^struct\[(%s)\]$' % _STRUCT_RE,
+ entry_type, re.IGNORECASE)
+ if res:
+ # References another struct defined in our file
+ newentry = EntryStruct(entry_type, name, tag, res.group(1))
+ else:
+ print >>sys.stderr, 'Bad type: "%s" in "%s"' % (entry_type, entry)
+ sys.exit(1)
+
+ structs = []
+
+ if optional:
+ newentry.MakeOptional()
+ if array:
+ newentry.MakeArray()
+
+ newentry.SetStruct(newstruct)
+ newentry.SetLineCount(line_count)
+ newentry.Verify()
+
+ if array:
+ # We need to encapsulate this entry into a struct
+ newname = newentry.Name()+ '_array'
+
+ # Now borgify the new entry.
+ newentry = EntryArray(newentry)
+ newentry.SetStruct(newstruct)
+ newentry.SetLineCount(line_count)
+ newentry.MakeArray()
+
+ newstruct.AddEntry(newentry)
+
+ return structs
+
+def ProcessStruct(data):
+ tokens = data.split(' ')
+
+ # First three tokens are: 'struct' 'name' '{'
+ newstruct = Struct(tokens[1])
+
+ inside = ' '.join(tokens[3:-1])
+
+ tokens = inside.split(';')
+
+ structs = []
+
+ for entry in tokens:
+ entry = NormalizeLine(entry)
+ if not entry:
+ continue
+
+ # It's possible that new structs get defined in here
+ structs.extend(ProcessOneEntry(newstruct, entry))
+
+ structs.append(newstruct)
+ return structs
+
+def GetNextStruct(file):
+ global line_count
+ global cppdirect
+
+ got_struct = 0
+
+ processed_lines = []
+
+ have_c_comment = 0
+ data = ''
+ while 1:
+ line = file.readline()
+ if not line:
+ break
+
+ line_count += 1
+ line = line[:-1]
+
+ if not have_c_comment and re.search(r'/\*', line):
+ if re.search(r'/\*.*\*/', line):
+ line = re.sub(r'/\*.*\*/', '', line)
+ else:
+ line = re.sub(r'/\*.*$', '', line)
+ have_c_comment = 1
+
+ if have_c_comment:
+ if not re.search(r'\*/', line):
+ continue
+ have_c_comment = 0
+ line = re.sub(r'^.*\*/', '', line)
+
+ line = NormalizeLine(line)
+
+ if not line:
+ continue
+
+ if not got_struct:
+ if re.match(r'#include ["<].*[>"]', line):
+ cppdirect.append(line)
+ continue
+
+ if re.match(r'^#(if( |def)|endif)', line):
+ cppdirect.append(line)
+ continue
+
+ if re.match(r'^#define', line):
+ headerdirect.append(line)
+ continue
+
+ if not re.match(r'^struct %s {$' % _STRUCT_RE,
+ line, re.IGNORECASE):
+ print >>sys.stderr, 'Missing struct on line %d: %s' % (
+ line_count, line)
+ sys.exit(1)
+ else:
+ got_struct = 1
+ data += line
+ continue
+
+ # We are inside the struct
+ tokens = line.split('}')
+ if len(tokens) == 1:
+ data += ' ' + line
+ continue
+
+ if len(tokens[1]):
+ print >>sys.stderr, 'Trailing garbage after struct on line %d' % (
+ line_count )
+ sys.exit(1)
+
+ # We found the end of the struct
+ data += ' %s}' % tokens[0]
+ break
+
+ # Remove any comments, that might be in there
+ data = re.sub(r'/\*.*\*/', '', data)
+
+ return data
+
+
+def Parse(file):
+ """
+ Parses the input file and returns C code and corresponding header file.
+ """
+
+ entities = []
+
+ while 1:
+ # Just gets the whole struct nicely formatted
+ data = GetNextStruct(file)
+
+ if not data:
+ break
+
+ entities.extend(ProcessStruct(data))
+
+ return entities
+
+def GuardName(name):
+ name = '_'.join(name.split('.'))
+ name = '_'.join(name.split('/'))
+ guard = '_'+name.upper()+'_'
+
+ return guard
+
+def HeaderPreamble(name):
+ guard = GuardName(name)
+ pre = (
+ '/*\n'
+ ' * Automatically generated from %s\n'
+ ' */\n\n'
+ '#ifndef %s\n'
+ '#define %s\n\n' ) % (
+ name, guard, guard)
+
+ # insert stdint.h - let's hope everyone has it
+ pre += (
+ '#include <event-config.h>\n'
+ '#ifdef _EVENT_HAVE_STDINT_H\n'
+ '#include <stdint.h>\n'
+ '#endif\n' )
+
+ for statement in headerdirect:
+ pre += '%s\n' % statement
+ if headerdirect:
+ pre += '\n'
+
+ pre += (
+ '#define EVTAG_HAS(msg, member) ((msg)->member##_set == 1)\n'
+ '#ifdef __GNUC__\n'
+ '#define EVTAG_ASSIGN(msg, member, args...) '
+ '(*(msg)->base->member##_assign)(msg, ## args)\n'
+ '#define EVTAG_GET(msg, member, args...) '
+ '(*(msg)->base->member##_get)(msg, ## args)\n'
+ '#else\n'
+ '#define EVTAG_ASSIGN(msg, member, ...) '
+ '(*(msg)->base->member##_assign)(msg, ## __VA_ARGS__)\n'
+ '#define EVTAG_GET(msg, member, ...) '
+ '(*(msg)->base->member##_get)(msg, ## __VA_ARGS__)\n'
+ '#endif\n'
+ '#define EVTAG_ADD(msg, member) (*(msg)->base->member##_add)(msg)\n'
+ '#define EVTAG_LEN(msg, member) ((msg)->member##_length)\n'
+ )
+
+ return pre
+
+
+def HeaderPostamble(name):
+ guard = GuardName(name)
+ return '#endif /* %s */' % guard
+
+def BodyPreamble(name):
+ global _NAME
+ global _VERSION
+
+ header_file = '.'.join(name.split('.')[:-1]) + '.gen.h'
+
+ pre = ( '/*\n'
+ ' * Automatically generated from %s\n'
+ ' * by %s/%s. DO NOT EDIT THIS FILE.\n'
+ ' */\n\n' ) % (name, _NAME, _VERSION)
+ pre += ( '#include <sys/types.h>\n'
+ '#include <sys/time.h>\n'
+ '#include <stdlib.h>\n'
+ '#include <string.h>\n'
+ '#include <assert.h>\n'
+ '#include <event.h>\n\n' )
+
+ for statement in cppdirect:
+ pre += '%s\n' % statement
+
+ pre += '\n#include "%s"\n\n' % header_file
+
+ pre += 'void event_err(int eval, const char *fmt, ...);\n'
+ pre += 'void event_warn(const char *fmt, ...);\n'
+ pre += 'void event_errx(int eval, const char *fmt, ...);\n'
+ pre += 'void event_warnx(const char *fmt, ...);\n\n'
+
+ return pre
+
+def main(argv):
+ if len(argv) < 2 or not argv[1]:
+ print >>sys.stderr, 'Need RPC description file as first argument.'
+ sys.exit(1)
+
+ filename = argv[1]
+
+ ext = filename.split('.')[-1]
+ if ext != 'rpc':
+ print >>sys.stderr, 'Unrecognized file extension: %s' % ext
+ sys.exit(1)
+
+ print >>sys.stderr, 'Reading \"%s\"' % filename
+
+ fp = open(filename, 'r')
+ entities = Parse(fp)
+ fp.close()
+
+ header_file = '.'.join(filename.split('.')[:-1]) + '.gen.h'
+ impl_file = '.'.join(filename.split('.')[:-1]) + '.gen.c'
+
+ print >>sys.stderr, '... creating "%s"' % header_file
+ header_fp = open(header_file, 'w')
+ print >>header_fp, HeaderPreamble(filename)
+
+ # Create forward declarations: allows other structs to reference
+ # each other
+ for entry in entities:
+ entry.PrintForwardDeclaration(header_fp)
+ print >>header_fp, ''
+
+ for entry in entities:
+ entry.PrintTags(header_fp)
+ entry.PrintDeclaration(header_fp)
+ print >>header_fp, HeaderPostamble(filename)
+ header_fp.close()
+
+ print >>sys.stderr, '... creating "%s"' % impl_file
+ impl_fp = open(impl_file, 'w')
+ print >>impl_fp, BodyPreamble(filename)
+ for entry in entities:
+ entry.PrintCode(impl_fp)
+ impl_fp.close()
+
+if __name__ == '__main__':
+ main(sys.argv)
diff --git a/libevent/event_tagging.c b/libevent/event_tagging.c
new file mode 100644
index 00000000000..d436e3fd65b
--- /dev/null
+++ b/libevent/event_tagging.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright (c) 2003, 2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#else
+#include <sys/ioctl.h>
+#endif
+
+#include <sys/queue.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef WIN32
+#include <syslog.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "event.h"
+#include "evutil.h"
+#include "log.h"
+
+int evtag_decode_int(ev_uint32_t *pnumber, struct evbuffer *evbuf);
+int evtag_encode_tag(struct evbuffer *evbuf, ev_uint32_t tag);
+int evtag_decode_tag(ev_uint32_t *ptag, struct evbuffer *evbuf);
+
+static struct evbuffer *_buf; /* not thread safe */
+
+void
+evtag_init(void)
+{
+ if (_buf != NULL)
+ return;
+
+ if ((_buf = evbuffer_new()) == NULL)
+ event_err(1, "%s: malloc", __func__);
+}
+
+/*
+ * We encode integer's by nibbles; the first nibble contains the number
+ * of significant nibbles - 1; this allows us to encode up to 64-bit
+ * integers. This function is byte-order independent.
+ */
+
+void
+encode_int(struct evbuffer *evbuf, ev_uint32_t number)
+{
+ int off = 1, nibbles = 0;
+ ev_uint8_t data[5];
+
+ memset(data, 0, sizeof(ev_uint32_t)+1);
+ while (number) {
+ if (off & 0x1)
+ data[off/2] = (data[off/2] & 0xf0) | (number & 0x0f);
+ else
+ data[off/2] = (data[off/2] & 0x0f) |
+ ((number & 0x0f) << 4);
+ number >>= 4;
+ off++;
+ }
+
+ if (off > 2)
+ nibbles = off - 2;
+
+ /* Off - 1 is the number of encoded nibbles */
+ data[0] = (data[0] & 0x0f) | ((nibbles & 0x0f) << 4);
+
+ evbuffer_add(evbuf, data, (off + 1) / 2);
+}
+
+/*
+ * Support variable length encoding of tags; we use the high bit in each
+ * octet as a continuation signal.
+ */
+
+int
+evtag_encode_tag(struct evbuffer *evbuf, ev_uint32_t tag)
+{
+ int bytes = 0;
+ ev_uint8_t data[5];
+
+ memset(data, 0, sizeof(data));
+ do {
+ ev_uint8_t lower = tag & 0x7f;
+ tag >>= 7;
+
+ if (tag)
+ lower |= 0x80;
+
+ data[bytes++] = lower;
+ } while (tag);
+
+ if (evbuf != NULL)
+ evbuffer_add(evbuf, data, bytes);
+
+ return (bytes);
+}
+
+static int
+decode_tag_internal(ev_uint32_t *ptag, struct evbuffer *evbuf, int dodrain)
+{
+ ev_uint32_t number = 0;
+ ev_uint8_t *data = EVBUFFER_DATA(evbuf);
+ int len = EVBUFFER_LENGTH(evbuf);
+ int count = 0, shift = 0, done = 0;
+
+ while (count++ < len) {
+ ev_uint8_t lower = *data++;
+ number |= (lower & 0x7f) << shift;
+ shift += 7;
+
+ if (!(lower & 0x80)) {
+ done = 1;
+ break;
+ }
+ }
+
+ if (!done)
+ return (-1);
+
+ if (dodrain)
+ evbuffer_drain(evbuf, count);
+
+ if (ptag != NULL)
+ *ptag = number;
+
+ return (count);
+}
+
+int
+evtag_decode_tag(ev_uint32_t *ptag, struct evbuffer *evbuf)
+{
+ return (decode_tag_internal(ptag, evbuf, 1 /* dodrain */));
+}
+
+/*
+ * Marshal a data type, the general format is as follows:
+ *
+ * tag number: one byte; length: var bytes; payload: var bytes
+ */
+
+void
+evtag_marshal(struct evbuffer *evbuf, ev_uint32_t tag,
+ const void *data, ev_uint32_t len)
+{
+ evtag_encode_tag(evbuf, tag);
+ encode_int(evbuf, len);
+ evbuffer_add(evbuf, (void *)data, len);
+}
+
+/* Marshaling for integers */
+void
+evtag_marshal_int(struct evbuffer *evbuf, ev_uint32_t tag, ev_uint32_t integer)
+{
+ evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+ encode_int(_buf, integer);
+
+ evtag_encode_tag(evbuf, tag);
+ encode_int(evbuf, EVBUFFER_LENGTH(_buf));
+ evbuffer_add_buffer(evbuf, _buf);
+}
+
+void
+evtag_marshal_string(struct evbuffer *buf, ev_uint32_t tag, const char *string)
+{
+ evtag_marshal(buf, tag, string, strlen(string));
+}
+
+void
+evtag_marshal_timeval(struct evbuffer *evbuf, ev_uint32_t tag, struct timeval *tv)
+{
+ evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+
+ encode_int(_buf, tv->tv_sec);
+ encode_int(_buf, tv->tv_usec);
+
+ evtag_marshal(evbuf, tag, EVBUFFER_DATA(_buf),
+ EVBUFFER_LENGTH(_buf));
+}
+
+static int
+decode_int_internal(ev_uint32_t *pnumber, struct evbuffer *evbuf, int dodrain)
+{
+ ev_uint32_t number = 0;
+ ev_uint8_t *data = EVBUFFER_DATA(evbuf);
+ int len = EVBUFFER_LENGTH(evbuf);
+ int nibbles = 0;
+
+ if (!len)
+ return (-1);
+
+ nibbles = ((data[0] & 0xf0) >> 4) + 1;
+ if (nibbles > 8 || (nibbles >> 1) + 1 > len)
+ return (-1);
+ len = (nibbles >> 1) + 1;
+
+ while (nibbles > 0) {
+ number <<= 4;
+ if (nibbles & 0x1)
+ number |= data[nibbles >> 1] & 0x0f;
+ else
+ number |= (data[nibbles >> 1] & 0xf0) >> 4;
+ nibbles--;
+ }
+
+ if (dodrain)
+ evbuffer_drain(evbuf, len);
+
+ *pnumber = number;
+
+ return (len);
+}
+
+int
+evtag_decode_int(ev_uint32_t *pnumber, struct evbuffer *evbuf)
+{
+ return (decode_int_internal(pnumber, evbuf, 1) == -1 ? -1 : 0);
+}
+
+int
+evtag_peek(struct evbuffer *evbuf, ev_uint32_t *ptag)
+{
+ return (decode_tag_internal(ptag, evbuf, 0 /* dodrain */));
+}
+
+int
+evtag_peek_length(struct evbuffer *evbuf, ev_uint32_t *plength)
+{
+ struct evbuffer tmp;
+ int res, len;
+
+ len = decode_tag_internal(NULL, evbuf, 0 /* dodrain */);
+ if (len == -1)
+ return (-1);
+
+ tmp = *evbuf;
+ tmp.buffer += len;
+ tmp.off -= len;
+
+ res = decode_int_internal(plength, &tmp, 0);
+ if (res == -1)
+ return (-1);
+
+ *plength += res + len;
+
+ return (0);
+}
+
+int
+evtag_payload_length(struct evbuffer *evbuf, ev_uint32_t *plength)
+{
+ struct evbuffer tmp;
+ int res, len;
+
+ len = decode_tag_internal(NULL, evbuf, 0 /* dodrain */);
+ if (len == -1)
+ return (-1);
+
+ tmp = *evbuf;
+ tmp.buffer += len;
+ tmp.off -= len;
+
+ res = decode_int_internal(plength, &tmp, 0);
+ if (res == -1)
+ return (-1);
+
+ return (0);
+}
+
+int
+evtag_consume(struct evbuffer *evbuf)
+{
+ ev_uint32_t len;
+ if (decode_tag_internal(NULL, evbuf, 1 /* dodrain */) == -1)
+ return (-1);
+ if (evtag_decode_int(&len, evbuf) == -1)
+ return (-1);
+ evbuffer_drain(evbuf, len);
+
+ return (0);
+}
+
+/* Reads the data type from an event buffer */
+
+int
+evtag_unmarshal(struct evbuffer *src, ev_uint32_t *ptag, struct evbuffer *dst)
+{
+ ev_uint32_t len;
+ ev_uint32_t integer;
+
+ if (decode_tag_internal(ptag, src, 1 /* dodrain */) == -1)
+ return (-1);
+ if (evtag_decode_int(&integer, src) == -1)
+ return (-1);
+ len = integer;
+
+ if (EVBUFFER_LENGTH(src) < len)
+ return (-1);
+
+ if (evbuffer_add(dst, EVBUFFER_DATA(src), len) == -1)
+ return (-1);
+
+ evbuffer_drain(src, len);
+
+ return (len);
+}
+
+/* Marshaling for integers */
+
+int
+evtag_unmarshal_int(struct evbuffer *evbuf, ev_uint32_t need_tag,
+ ev_uint32_t *pinteger)
+{
+ ev_uint32_t tag;
+ ev_uint32_t len;
+ ev_uint32_t integer;
+
+ if (decode_tag_internal(&tag, evbuf, 1 /* dodrain */) == -1)
+ return (-1);
+ if (need_tag != tag)
+ return (-1);
+ if (evtag_decode_int(&integer, evbuf) == -1)
+ return (-1);
+ len = integer;
+
+ if (EVBUFFER_LENGTH(evbuf) < len)
+ return (-1);
+
+ evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+ if (evbuffer_add(_buf, EVBUFFER_DATA(evbuf), len) == -1)
+ return (-1);
+
+ evbuffer_drain(evbuf, len);
+
+ return (evtag_decode_int(pinteger, _buf));
+}
+
+/* Unmarshal a fixed length tag */
+
+int
+evtag_unmarshal_fixed(struct evbuffer *src, ev_uint32_t need_tag, void *data,
+ size_t len)
+{
+ ev_uint32_t tag;
+
+ /* Initialize this event buffer so that we can read into it */
+ evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+
+ /* Now unmarshal a tag and check that it matches the tag we want */
+ if (evtag_unmarshal(src, &tag, _buf) == -1 || tag != need_tag)
+ return (-1);
+
+ if (EVBUFFER_LENGTH(_buf) != len)
+ return (-1);
+
+ memcpy(data, EVBUFFER_DATA(_buf), len);
+ return (0);
+}
+
+int
+evtag_unmarshal_string(struct evbuffer *evbuf, ev_uint32_t need_tag,
+ char **pstring)
+{
+ ev_uint32_t tag;
+
+ evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+
+ if (evtag_unmarshal(evbuf, &tag, _buf) == -1 || tag != need_tag)
+ return (-1);
+
+ *pstring = calloc(EVBUFFER_LENGTH(_buf) + 1, 1);
+ if (*pstring == NULL)
+ event_err(1, "%s: calloc", __func__);
+ evbuffer_remove(_buf, *pstring, EVBUFFER_LENGTH(_buf));
+
+ return (0);
+}
+
+int
+evtag_unmarshal_timeval(struct evbuffer *evbuf, ev_uint32_t need_tag,
+ struct timeval *ptv)
+{
+ ev_uint32_t tag;
+ ev_uint32_t integer;
+
+ evbuffer_drain(_buf, EVBUFFER_LENGTH(_buf));
+ if (evtag_unmarshal(evbuf, &tag, _buf) == -1 || tag != need_tag)
+ return (-1);
+
+ if (evtag_decode_int(&integer, _buf) == -1)
+ return (-1);
+ ptv->tv_sec = integer;
+ if (evtag_decode_int(&integer, _buf) == -1)
+ return (-1);
+ ptv->tv_usec = integer;
+
+ return (0);
+}
diff --git a/libevent/evhttp.h b/libevent/evhttp.h
new file mode 100644
index 00000000000..99d16a2f47a
--- /dev/null
+++ b/libevent/evhttp.h
@@ -0,0 +1,371 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVHTTP_H_
+#define _EVHTTP_H_
+
+#include <event.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+
+/** @file evhttp.h
+ *
+ * Basic support for HTTP serving.
+ *
+ * As libevent is a library for dealing with event notification and most
+ * interesting applications are networked today, I have often found the
+ * need to write HTTP code. The following prototypes and definitions provide
+ * an application with a minimal interface for making HTTP requests and for
+ * creating a very simple HTTP server.
+ */
+
+/* Response codes */
+#define HTTP_OK 200
+#define HTTP_NOCONTENT 204
+#define HTTP_MOVEPERM 301
+#define HTTP_MOVETEMP 302
+#define HTTP_NOTMODIFIED 304
+#define HTTP_BADREQUEST 400
+#define HTTP_NOTFOUND 404
+#define HTTP_SERVUNAVAIL 503
+
+struct evhttp;
+struct evhttp_request;
+struct evkeyvalq;
+
+/** Create a new HTTP server
+ *
+ * @param base (optional) the event base to receive the HTTP events
+ * @return a pointer to a newly initialized evhttp server structure
+ */
+struct evhttp *evhttp_new(struct event_base *base);
+
+/**
+ * Binds an HTTP server on the specified address and port.
+ *
+ * Can be called multiple times to bind the same http server
+ * to multiple different ports.
+ *
+ * @param http a pointer to an evhttp object
+ * @param address a string containing the IP address to listen(2) on
+ * @param port the port number to listen on
+ * @return a newly allocated evhttp struct
+ * @see evhttp_free()
+ */
+int evhttp_bind_socket(struct evhttp *http, const char *address, u_short port);
+
+/**
+ * Makes an HTTP server accept connections on the specified socket
+ *
+ * This may be useful to create a socket and then fork multiple instances
+ * of an http server, or when a socket has been communicated via file
+ * descriptor passing in situations where an http servers does not have
+ * permissions to bind to a low-numbered port.
+ *
+ * Can be called multiple times to have the http server listen to
+ * multiple different sockets.
+ *
+ * @param http a pointer to an evhttp object
+ * @param fd a socket fd that is ready for accepting connections
+ * @return 0 on success, -1 on failure.
+ * @see evhttp_free(), evhttp_bind_socket()
+ */
+int evhttp_accept_socket(struct evhttp *http, int fd);
+
+/**
+ * Free the previously created HTTP server.
+ *
+ * Works only if no requests are currently being served.
+ *
+ * @param http the evhttp server object to be freed
+ * @see evhttp_start()
+ */
+void evhttp_free(struct evhttp* http);
+
+/** Set a callback for a specified URI */
+void evhttp_set_cb(struct evhttp *, const char *,
+ void (*)(struct evhttp_request *, void *), void *);
+
+/** Removes the callback for a specified URI */
+int evhttp_del_cb(struct evhttp *, const char *);
+
+/** Set a callback for all requests that are not caught by specific callbacks
+ */
+void evhttp_set_gencb(struct evhttp *,
+ void (*)(struct evhttp_request *, void *), void *);
+
+/**
+ * Set the timeout for an HTTP request.
+ *
+ * @param http an evhttp object
+ * @param timeout_in_secs the timeout, in seconds
+ */
+void evhttp_set_timeout(struct evhttp *, int timeout_in_secs);
+
+/* Request/Response functionality */
+
+/**
+ * Send an HTML error message to the client.
+ *
+ * @param req a request object
+ * @param error the HTTP error code
+ * @param reason a brief explanation of the error
+ */
+void evhttp_send_error(struct evhttp_request *req, int error,
+ const char *reason);
+
+/**
+ * Send an HTML reply to the client.
+ *
+ * @param req a request object
+ * @param code the HTTP response code to send
+ * @param reason a brief message to send with the response code
+ * @param databuf the body of the response
+ */
+void evhttp_send_reply(struct evhttp_request *req, int code,
+ const char *reason, struct evbuffer *databuf);
+
+/* Low-level response interface, for streaming/chunked replies */
+void evhttp_send_reply_start(struct evhttp_request *, int, const char *);
+void evhttp_send_reply_chunk(struct evhttp_request *, struct evbuffer *);
+void evhttp_send_reply_end(struct evhttp_request *);
+
+/**
+ * Start an HTTP server on the specified address and port
+ *
+ * DEPRECATED: it does not allow an event base to be specified
+ *
+ * @param address the address to which the HTTP server should be bound
+ * @param port the port number on which the HTTP server should listen
+ * @return an struct evhttp object
+ */
+struct evhttp *evhttp_start(const char *address, u_short port);
+
+/*
+ * Interfaces for making requests
+ */
+enum evhttp_cmd_type { EVHTTP_REQ_GET, EVHTTP_REQ_POST, EVHTTP_REQ_HEAD };
+
+enum evhttp_request_kind { EVHTTP_REQUEST, EVHTTP_RESPONSE };
+
+/**
+ * the request structure that a server receives.
+ * WARNING: expect this structure to change. I will try to provide
+ * reasonable accessors.
+ */
+struct evhttp_request {
+#if defined(TAILQ_ENTRY)
+ TAILQ_ENTRY(evhttp_request) next;
+#else
+struct {
+ struct evhttp_request *tqe_next;
+ struct evhttp_request **tqe_prev;
+} next;
+#endif
+
+ /* the connection object that this request belongs to */
+ struct evhttp_connection *evcon;
+ int flags;
+#define EVHTTP_REQ_OWN_CONNECTION 0x0001
+#define EVHTTP_PROXY_REQUEST 0x0002
+
+ struct evkeyvalq *input_headers;
+ struct evkeyvalq *output_headers;
+
+ /* address of the remote host and the port connection came from */
+ char *remote_host;
+ u_short remote_port;
+
+ enum evhttp_request_kind kind;
+ enum evhttp_cmd_type type;
+
+ char *uri; /* uri after HTTP request was parsed */
+
+ char major; /* HTTP Major number */
+ char minor; /* HTTP Minor number */
+
+ int response_code; /* HTTP Response code */
+ char *response_code_line; /* Readable response */
+
+ struct evbuffer *input_buffer; /* read data */
+ ev_int64_t ntoread;
+ int chunked;
+
+ struct evbuffer *output_buffer; /* outgoing post or data */
+
+ /* Callback */
+ void (*cb)(struct evhttp_request *, void *);
+ void *cb_arg;
+
+ /*
+ * Chunked data callback - call for each completed chunk if
+ * specified. If not specified, all the data is delivered via
+ * the regular callback.
+ */
+ void (*chunk_cb)(struct evhttp_request *, void *);
+};
+
+/**
+ * Creates a new request object that needs to be filled in with the request
+ * parameters. The callback is executed when the request completed or an
+ * error occurred.
+ */
+struct evhttp_request *evhttp_request_new(
+ void (*cb)(struct evhttp_request *, void *), void *arg);
+
+/** enable delivery of chunks to requestor */
+void evhttp_request_set_chunked_cb(struct evhttp_request *,
+ void (*cb)(struct evhttp_request *, void *));
+
+/** Frees the request object and removes associated events. */
+void evhttp_request_free(struct evhttp_request *req);
+
+/**
+ * A connection object that can be used to for making HTTP requests. The
+ * connection object tries to establish the connection when it is given an
+ * http request object.
+ */
+struct evhttp_connection *evhttp_connection_new(
+ const char *address, unsigned short port);
+
+/** Frees an http connection */
+void evhttp_connection_free(struct evhttp_connection *evcon);
+
+/** sets the ip address from which http connections are made */
+void evhttp_connection_set_local_address(struct evhttp_connection *evcon,
+ const char *address);
+
+/** sets the local port from which http connections are made */
+void evhttp_connection_set_local_port(struct evhttp_connection *evcon,
+ unsigned short port);
+
+/** Sets the timeout for events related to this connection */
+void evhttp_connection_set_timeout(struct evhttp_connection *evcon,
+ int timeout_in_secs);
+
+/** Sets the retry limit for this connection - -1 repeats indefnitely */
+void evhttp_connection_set_retries(struct evhttp_connection *evcon,
+ int retry_max);
+
+/** Set a callback for connection close. */
+void evhttp_connection_set_closecb(struct evhttp_connection *evcon,
+ void (*)(struct evhttp_connection *, void *), void *);
+
+/**
+ * Associates an event base with the connection - can only be called
+ * on a freshly created connection object that has not been used yet.
+ */
+void evhttp_connection_set_base(struct evhttp_connection *evcon,
+ struct event_base *base);
+
+/** Get the remote address and port associated with this connection. */
+void evhttp_connection_get_peer(struct evhttp_connection *evcon,
+ char **address, u_short *port);
+
+/** The connection gets ownership of the request */
+int evhttp_make_request(struct evhttp_connection *evcon,
+ struct evhttp_request *req,
+ enum evhttp_cmd_type type, const char *uri);
+
+const char *evhttp_request_uri(struct evhttp_request *req);
+
+/* Interfaces for dealing with HTTP headers */
+
+const char *evhttp_find_header(const struct evkeyvalq *, const char *);
+int evhttp_remove_header(struct evkeyvalq *, const char *);
+int evhttp_add_header(struct evkeyvalq *, const char *, const char *);
+void evhttp_clear_headers(struct evkeyvalq *);
+
+/* Miscellaneous utility functions */
+
+
+/**
+ Helper function to encode a URI.
+
+ The returned string must be freed by the caller.
+
+ @param uri an unencoded URI
+ @return a newly allocated URI-encoded string
+ */
+char *evhttp_encode_uri(const char *uri);
+
+
+/**
+ Helper function to decode a URI.
+
+ The returned string must be freed by the caller.
+
+ @param uri an encoded URI
+ @return a newly allocated unencoded URI
+ */
+char *evhttp_decode_uri(const char *uri);
+
+
+/**
+ * Helper function to parse out arguments in a query.
+ *
+ * Parsing a uri like
+ *
+ * http://foo.com/?q=test&s=some+thing
+ *
+ * will result in two entries in the key value queue.
+
+ * The first entry is: key="q", value="test"
+ * The second entry is: key="s", value="some thing"
+ *
+ * @param uri the request URI
+ * @param headers the head of the evkeyval queue
+ */
+void evhttp_parse_query(const char *uri, struct evkeyvalq *headers);
+
+
+/**
+ * Escape HTML character entities in a string.
+ *
+ * Replaces <, >, ", ' and & with &lt;, &gt;, &quot;,
+ * &#039; and &amp; correspondingly.
+ *
+ * The returned string needs to be freed by the caller.
+ *
+ * @param html an unescaped HTML string
+ * @return an escaped HTML string
+ */
+char *evhttp_htmlescape(const char *html);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EVHTTP_H_ */
diff --git a/libevent/evport.c b/libevent/evport.c
new file mode 100644
index 00000000000..dae6900cc10
--- /dev/null
+++ b/libevent/evport.c
@@ -0,0 +1,513 @@
+/*
+ * Submitted by David Pacheco (dp.spambait@gmail.com)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2007 Sun Microsystems. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * evport.c: event backend using Solaris 10 event ports. See port_create(3C).
+ * This implementation is loosely modeled after the one used for select(2) (in
+ * select.c).
+ *
+ * The outstanding events are tracked in a data structure called evport_data.
+ * Each entry in the ed_fds array corresponds to a file descriptor, and contains
+ * pointers to the read and write events that correspond to that fd. (That is,
+ * when the file is readable, the "read" event should handle it, etc.)
+ *
+ * evport_add and evport_del update this data structure. evport_dispatch uses it
+ * to determine where to callback when an event occurs (which it gets from
+ * port_getn).
+ *
+ * Helper functions are used: grow() grows the file descriptor array as
+ * necessary when large fd's come in. reassociate() takes care of maintaining
+ * the proper file-descriptor/event-port associations.
+ *
+ * As in the select(2) implementation, signals are handled by evsignal.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/time.h>
+#include <assert.h>
+#include <sys/queue.h>
+#include <errno.h>
+#include <poll.h>
+#include <port.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#ifdef CHECK_INVARIANTS
+#include <assert.h>
+#endif
+
+#include "event.h"
+#include "event-internal.h"
+#include "log.h"
+#include "evsignal.h"
+
+
+/*
+ * Default value for ed_nevents, which is the maximum file descriptor number we
+ * can handle. If an event comes in for a file descriptor F > nevents, we will
+ * grow the array of file descriptors, doubling its size.
+ */
+#define DEFAULT_NFDS 16
+
+
+/*
+ * EVENTS_PER_GETN is the maximum number of events to retrieve from port_getn on
+ * any particular call. You can speed things up by increasing this, but it will
+ * (obviously) require more memory.
+ */
+#define EVENTS_PER_GETN 8
+
+/*
+ * Per-file-descriptor information about what events we're subscribed to. These
+ * fields are NULL if no event is subscribed to either of them.
+ */
+
+struct fd_info {
+ struct event* fdi_revt; /* the event responsible for the "read" */
+ struct event* fdi_wevt; /* the event responsible for the "write" */
+};
+
+#define FDI_HAS_READ(fdi) ((fdi)->fdi_revt != NULL)
+#define FDI_HAS_WRITE(fdi) ((fdi)->fdi_wevt != NULL)
+#define FDI_HAS_EVENTS(fdi) (FDI_HAS_READ(fdi) || FDI_HAS_WRITE(fdi))
+#define FDI_TO_SYSEVENTS(fdi) (FDI_HAS_READ(fdi) ? POLLIN : 0) | \
+ (FDI_HAS_WRITE(fdi) ? POLLOUT : 0)
+
+struct evport_data {
+ int ed_port; /* event port for system events */
+ int ed_nevents; /* number of allocated fdi's */
+ struct fd_info *ed_fds; /* allocated fdi table */
+ /* fdi's that we need to reassoc */
+ int ed_pending[EVENTS_PER_GETN]; /* fd's with pending events */
+};
+
+static void* evport_init (struct event_base *);
+static int evport_add (void *, struct event *);
+static int evport_del (void *, struct event *);
+static int evport_dispatch (struct event_base *, void *, struct timeval *);
+static void evport_dealloc (struct event_base *, void *);
+
+const struct eventop evportops = {
+ "evport",
+ evport_init,
+ evport_add,
+ evport_del,
+ evport_dispatch,
+ evport_dealloc,
+ 1 /* need reinit */
+};
+
+/*
+ * Initialize the event port implementation.
+ */
+
+static void*
+evport_init(struct event_base *base)
+{
+ struct evport_data *evpd;
+ int i;
+ /*
+ * Disable event ports when this environment variable is set
+ */
+ if (getenv("EVENT_NOEVPORT"))
+ return (NULL);
+
+ if (!(evpd = calloc(1, sizeof(struct evport_data))))
+ return (NULL);
+
+ if ((evpd->ed_port = port_create()) == -1) {
+ free(evpd);
+ return (NULL);
+ }
+
+ /*
+ * Initialize file descriptor structure
+ */
+ evpd->ed_fds = calloc(DEFAULT_NFDS, sizeof(struct fd_info));
+ if (evpd->ed_fds == NULL) {
+ close(evpd->ed_port);
+ free(evpd);
+ return (NULL);
+ }
+ evpd->ed_nevents = DEFAULT_NFDS;
+ for (i = 0; i < EVENTS_PER_GETN; i++)
+ evpd->ed_pending[i] = -1;
+
+ evsignal_init(base);
+
+ return (evpd);
+}
+
+#ifdef CHECK_INVARIANTS
+/*
+ * Checks some basic properties about the evport_data structure. Because it
+ * checks all file descriptors, this function can be expensive when the maximum
+ * file descriptor ever used is rather large.
+ */
+
+static void
+check_evportop(struct evport_data *evpd)
+{
+ assert(evpd);
+ assert(evpd->ed_nevents > 0);
+ assert(evpd->ed_port > 0);
+ assert(evpd->ed_fds > 0);
+
+ /*
+ * Verify the integrity of the fd_info struct as well as the events to
+ * which it points (at least, that they're valid references and correct
+ * for their position in the structure).
+ */
+ int i;
+ for (i = 0; i < evpd->ed_nevents; ++i) {
+ struct event *ev;
+ struct fd_info *fdi;
+
+ fdi = &evpd->ed_fds[i];
+ if ((ev = fdi->fdi_revt) != NULL) {
+ assert(ev->ev_fd == i);
+ }
+ if ((ev = fdi->fdi_wevt) != NULL) {
+ assert(ev->ev_fd == i);
+ }
+ }
+}
+
+/*
+ * Verifies very basic integrity of a given port_event.
+ */
+static void
+check_event(port_event_t* pevt)
+{
+ /*
+ * We've only registered for PORT_SOURCE_FD events. The only
+ * other thing we can legitimately receive is PORT_SOURCE_ALERT,
+ * but since we're not using port_alert either, we can assume
+ * PORT_SOURCE_FD.
+ */
+ assert(pevt->portev_source == PORT_SOURCE_FD);
+ assert(pevt->portev_user == NULL);
+}
+
+#else
+#define check_evportop(epop)
+#define check_event(pevt)
+#endif /* CHECK_INVARIANTS */
+
+/*
+ * Doubles the size of the allocated file descriptor array.
+ */
+static int
+grow(struct evport_data *epdp, int factor)
+{
+ struct fd_info *tmp;
+ int oldsize = epdp->ed_nevents;
+ int newsize = factor * oldsize;
+ assert(factor > 1);
+
+ check_evportop(epdp);
+
+ tmp = realloc(epdp->ed_fds, sizeof(struct fd_info) * newsize);
+ if (NULL == tmp)
+ return -1;
+ epdp->ed_fds = tmp;
+ memset((char*) (epdp->ed_fds + oldsize), 0,
+ (newsize - oldsize)*sizeof(struct fd_info));
+ epdp->ed_nevents = newsize;
+
+ check_evportop(epdp);
+
+ return 0;
+}
+
+
+/*
+ * (Re)associates the given file descriptor with the event port. The OS events
+ * are specified (implicitly) from the fd_info struct.
+ */
+static int
+reassociate(struct evport_data *epdp, struct fd_info *fdip, int fd)
+{
+ int sysevents = FDI_TO_SYSEVENTS(fdip);
+
+ if (sysevents != 0) {
+ if (port_associate(epdp->ed_port, PORT_SOURCE_FD,
+ fd, sysevents, NULL) == -1) {
+ event_warn("port_associate");
+ return (-1);
+ }
+ }
+
+ check_evportop(epdp);
+
+ return (0);
+}
+
+/*
+ * Main event loop - polls port_getn for some number of events, and processes
+ * them.
+ */
+
+static int
+evport_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+ int i, res;
+ struct evport_data *epdp = arg;
+ port_event_t pevtlist[EVENTS_PER_GETN];
+
+ /*
+ * port_getn will block until it has at least nevents events. It will
+ * also return how many it's given us (which may be more than we asked
+ * for, as long as it's less than our maximum (EVENTS_PER_GETN)) in
+ * nevents.
+ */
+ int nevents = 1;
+
+ /*
+ * We have to convert a struct timeval to a struct timespec
+ * (only difference is nanoseconds vs. microseconds). If no time-based
+ * events are active, we should wait for I/O (and tv == NULL).
+ */
+ struct timespec ts;
+ struct timespec *ts_p = NULL;
+ if (tv != NULL) {
+ ts.tv_sec = tv->tv_sec;
+ ts.tv_nsec = tv->tv_usec * 1000;
+ ts_p = &ts;
+ }
+
+ /*
+ * Before doing anything else, we need to reassociate the events we hit
+ * last time which need reassociation. See comment at the end of the
+ * loop below.
+ */
+ for (i = 0; i < EVENTS_PER_GETN; ++i) {
+ struct fd_info *fdi = NULL;
+ if (epdp->ed_pending[i] != -1) {
+ fdi = &(epdp->ed_fds[epdp->ed_pending[i]]);
+ }
+
+ if (fdi != NULL && FDI_HAS_EVENTS(fdi)) {
+ int fd = FDI_HAS_READ(fdi) ? fdi->fdi_revt->ev_fd :
+ fdi->fdi_wevt->ev_fd;
+ reassociate(epdp, fdi, fd);
+ epdp->ed_pending[i] = -1;
+ }
+ }
+
+ if ((res = port_getn(epdp->ed_port, pevtlist, EVENTS_PER_GETN,
+ (unsigned int *) &nevents, ts_p)) == -1) {
+ if (errno == EINTR || errno == EAGAIN) {
+ evsignal_process(base);
+ return (0);
+ } else if (errno == ETIME) {
+ if (nevents == 0)
+ return (0);
+ } else {
+ event_warn("port_getn");
+ return (-1);
+ }
+ } else if (base->sig.evsignal_caught) {
+ evsignal_process(base);
+ }
+
+ event_debug(("%s: port_getn reports %d events", __func__, nevents));
+
+ for (i = 0; i < nevents; ++i) {
+ struct event *ev;
+ struct fd_info *fdi;
+ port_event_t *pevt = &pevtlist[i];
+ int fd = (int) pevt->portev_object;
+
+ check_evportop(epdp);
+ check_event(pevt);
+ epdp->ed_pending[i] = fd;
+
+ /*
+ * Figure out what kind of event it was
+ * (because we have to pass this to the callback)
+ */
+ res = 0;
+ if (pevt->portev_events & POLLIN)
+ res |= EV_READ;
+ if (pevt->portev_events & POLLOUT)
+ res |= EV_WRITE;
+
+ assert(epdp->ed_nevents > fd);
+ fdi = &(epdp->ed_fds[fd]);
+
+ /*
+ * We now check for each of the possible events (READ
+ * or WRITE). Then, we activate the event (which will
+ * cause its callback to be executed).
+ */
+
+ if ((res & EV_READ) && ((ev = fdi->fdi_revt) != NULL)) {
+ event_active(ev, res, 1);
+ }
+
+ if ((res & EV_WRITE) && ((ev = fdi->fdi_wevt) != NULL)) {
+ event_active(ev, res, 1);
+ }
+ } /* end of all events gotten */
+
+ check_evportop(epdp);
+
+ return (0);
+}
+
+
+/*
+ * Adds the given event (so that you will be notified when it happens via
+ * the callback function).
+ */
+
+static int
+evport_add(void *arg, struct event *ev)
+{
+ struct evport_data *evpd = arg;
+ struct fd_info *fdi;
+ int factor;
+
+ check_evportop(evpd);
+
+ /*
+ * Delegate, if it's not ours to handle.
+ */
+ if (ev->ev_events & EV_SIGNAL)
+ return (evsignal_add(ev));
+
+ /*
+ * If necessary, grow the file descriptor info table
+ */
+
+ factor = 1;
+ while (ev->ev_fd >= factor * evpd->ed_nevents)
+ factor *= 2;
+
+ if (factor > 1) {
+ if (-1 == grow(evpd, factor)) {
+ return (-1);
+ }
+ }
+
+ fdi = &evpd->ed_fds[ev->ev_fd];
+ if (ev->ev_events & EV_READ)
+ fdi->fdi_revt = ev;
+ if (ev->ev_events & EV_WRITE)
+ fdi->fdi_wevt = ev;
+
+ return reassociate(evpd, fdi, ev->ev_fd);
+}
+
+/*
+ * Removes the given event from the list of events to wait for.
+ */
+
+static int
+evport_del(void *arg, struct event *ev)
+{
+ struct evport_data *evpd = arg;
+ struct fd_info *fdi;
+ int i;
+ int associated = 1;
+
+ check_evportop(evpd);
+
+ /*
+ * Delegate, if it's not ours to handle
+ */
+ if (ev->ev_events & EV_SIGNAL) {
+ return (evsignal_del(ev));
+ }
+
+ if (evpd->ed_nevents < ev->ev_fd) {
+ return (-1);
+ }
+
+ for (i = 0; i < EVENTS_PER_GETN; ++i) {
+ if (evpd->ed_pending[i] == ev->ev_fd) {
+ associated = 0;
+ break;
+ }
+ }
+
+ fdi = &evpd->ed_fds[ev->ev_fd];
+ if (ev->ev_events & EV_READ)
+ fdi->fdi_revt = NULL;
+ if (ev->ev_events & EV_WRITE)
+ fdi->fdi_wevt = NULL;
+
+ if (associated) {
+ if (!FDI_HAS_EVENTS(fdi) &&
+ port_dissociate(evpd->ed_port, PORT_SOURCE_FD,
+ ev->ev_fd) == -1) {
+ /*
+ * Ignre EBADFD error the fd could have been closed
+ * before event_del() was called.
+ */
+ if (errno != EBADFD) {
+ event_warn("port_dissociate");
+ return (-1);
+ }
+ } else {
+ if (FDI_HAS_EVENTS(fdi)) {
+ return (reassociate(evpd, fdi, ev->ev_fd));
+ }
+ }
+ } else {
+ if (fdi->fdi_revt == NULL && fdi->fdi_wevt == NULL) {
+ evpd->ed_pending[i] = -1;
+ }
+ }
+ return 0;
+}
+
+
+static void
+evport_dealloc(struct event_base *base, void *arg)
+{
+ struct evport_data *evpd = arg;
+
+ evsignal_dealloc(base);
+
+ close(evpd->ed_port);
+
+ if (evpd->ed_fds)
+ free(evpd->ed_fds);
+ free(evpd);
+}
diff --git a/libevent/evrpc-internal.h b/libevent/evrpc-internal.h
new file mode 100644
index 00000000000..c900f959f97
--- /dev/null
+++ b/libevent/evrpc-internal.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVRPC_INTERNAL_H_
+#define _EVRPC_INTERNAL_H_
+
+#include "http-internal.h"
+
+struct evrpc;
+
+#define EVRPC_URI_PREFIX "/.rpc."
+
+struct evrpc_hook {
+ TAILQ_ENTRY(evrpc_hook) (next);
+
+ /* returns -1; if the rpc should be aborted, is allowed to rewrite */
+ int (*process)(struct evhttp_request *, struct evbuffer *, void *);
+ void *process_arg;
+};
+
+TAILQ_HEAD(evrpc_hook_list, evrpc_hook);
+
+/*
+ * this is shared between the base and the pool, so that we can reuse
+ * the hook adding functions; we alias both evrpc_pool and evrpc_base
+ * to this common structure.
+ */
+struct _evrpc_hooks {
+ /* hooks for processing outbound and inbound rpcs */
+ struct evrpc_hook_list in_hooks;
+ struct evrpc_hook_list out_hooks;
+};
+
+#define input_hooks common.in_hooks
+#define output_hooks common.out_hooks
+
+struct evrpc_base {
+ struct _evrpc_hooks common;
+
+ /* the HTTP server under which we register our RPC calls */
+ struct evhttp* http_server;
+
+ /* a list of all RPCs registered with us */
+ TAILQ_HEAD(evrpc_list, evrpc) registered_rpcs;
+};
+
+struct evrpc_req_generic;
+void evrpc_reqstate_free(struct evrpc_req_generic* rpc_state);
+
+/* A pool for holding evhttp_connection objects */
+struct evrpc_pool {
+ struct _evrpc_hooks common;
+
+ struct event_base *base;
+
+ struct evconq connections;
+
+ int timeout;
+
+ TAILQ_HEAD(evrpc_requestq, evrpc_request_wrapper) requests;
+};
+
+
+#endif /* _EVRPC_INTERNAL_H_ */
diff --git a/libevent/evrpc.c b/libevent/evrpc.c
new file mode 100644
index 00000000000..8b3b071d0bf
--- /dev/null
+++ b/libevent/evrpc.c
@@ -0,0 +1,661 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+
+#include <sys/types.h>
+#ifndef WIN32
+#include <sys/socket.h>
+#endif
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#include <stdio.h>
+#include <stdlib.h>
+#ifndef WIN32
+#include <unistd.h>
+#endif
+#ifndef HAVE_TAILQFOREACH
+#include <event-internal.h>
+#endif
+
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <assert.h>
+
+#include "event.h"
+#include "evrpc.h"
+#include "evrpc-internal.h"
+#include "evhttp.h"
+#include "evutil.h"
+#include "log.h"
+
+struct evrpc_base *
+evrpc_init(struct evhttp *http_server)
+{
+ struct evrpc_base* base = calloc(1, sizeof(struct evrpc_base));
+ if (base == NULL)
+ return (NULL);
+
+ /* we rely on the tagging sub system */
+ evtag_init();
+
+ TAILQ_INIT(&base->registered_rpcs);
+ TAILQ_INIT(&base->input_hooks);
+ TAILQ_INIT(&base->output_hooks);
+ base->http_server = http_server;
+
+ return (base);
+}
+
+void
+evrpc_free(struct evrpc_base *base)
+{
+ struct evrpc *rpc;
+ struct evrpc_hook *hook;
+
+ while ((rpc = TAILQ_FIRST(&base->registered_rpcs)) != NULL) {
+ assert(evrpc_unregister_rpc(base, rpc->uri));
+ }
+ while ((hook = TAILQ_FIRST(&base->input_hooks)) != NULL) {
+ assert(evrpc_remove_hook(base, EVRPC_INPUT, hook));
+ }
+ while ((hook = TAILQ_FIRST(&base->output_hooks)) != NULL) {
+ assert(evrpc_remove_hook(base, EVRPC_OUTPUT, hook));
+ }
+ free(base);
+}
+
+void *
+evrpc_add_hook(void *vbase,
+ enum EVRPC_HOOK_TYPE hook_type,
+ int (*cb)(struct evhttp_request *, struct evbuffer *, void *),
+ void *cb_arg)
+{
+ struct _evrpc_hooks *base = vbase;
+ struct evrpc_hook_list *head = NULL;
+ struct evrpc_hook *hook = NULL;
+ switch (hook_type) {
+ case EVRPC_INPUT:
+ head = &base->in_hooks;
+ break;
+ case EVRPC_OUTPUT:
+ head = &base->out_hooks;
+ break;
+ default:
+ assert(hook_type == EVRPC_INPUT || hook_type == EVRPC_OUTPUT);
+ }
+
+ hook = calloc(1, sizeof(struct evrpc_hook));
+ assert(hook != NULL);
+
+ hook->process = cb;
+ hook->process_arg = cb_arg;
+ TAILQ_INSERT_TAIL(head, hook, next);
+
+ return (hook);
+}
+
+static int
+evrpc_remove_hook_internal(struct evrpc_hook_list *head, void *handle)
+{
+ struct evrpc_hook *hook = NULL;
+ TAILQ_FOREACH(hook, head, next) {
+ if (hook == handle) {
+ TAILQ_REMOVE(head, hook, next);
+ free(hook);
+ return (1);
+ }
+ }
+
+ return (0);
+}
+
+/*
+ * remove the hook specified by the handle
+ */
+
+int
+evrpc_remove_hook(void *vbase, enum EVRPC_HOOK_TYPE hook_type, void *handle)
+{
+ struct _evrpc_hooks *base = vbase;
+ struct evrpc_hook_list *head = NULL;
+ switch (hook_type) {
+ case EVRPC_INPUT:
+ head = &base->in_hooks;
+ break;
+ case EVRPC_OUTPUT:
+ head = &base->out_hooks;
+ break;
+ default:
+ assert(hook_type == EVRPC_INPUT || hook_type == EVRPC_OUTPUT);
+ }
+
+ return (evrpc_remove_hook_internal(head, handle));
+}
+
+static int
+evrpc_process_hooks(struct evrpc_hook_list *head,
+ struct evhttp_request *req, struct evbuffer *evbuf)
+{
+ struct evrpc_hook *hook;
+ TAILQ_FOREACH(hook, head, next) {
+ if (hook->process(req, evbuf, hook->process_arg) == -1)
+ return (-1);
+ }
+
+ return (0);
+}
+
+static void evrpc_pool_schedule(struct evrpc_pool *pool);
+static void evrpc_request_cb(struct evhttp_request *, void *);
+void evrpc_request_done(struct evrpc_req_generic*);
+
+/*
+ * Registers a new RPC with the HTTP server. The evrpc object is expected
+ * to have been filled in via the EVRPC_REGISTER_OBJECT macro which in turn
+ * calls this function.
+ */
+
+static char *
+evrpc_construct_uri(const char *uri)
+{
+ char *constructed_uri;
+ int constructed_uri_len;
+
+ constructed_uri_len = strlen(EVRPC_URI_PREFIX) + strlen(uri) + 1;
+ if ((constructed_uri = malloc(constructed_uri_len)) == NULL)
+ event_err(1, "%s: failed to register rpc at %s",
+ __func__, uri);
+ memcpy(constructed_uri, EVRPC_URI_PREFIX, strlen(EVRPC_URI_PREFIX));
+ memcpy(constructed_uri + strlen(EVRPC_URI_PREFIX), uri, strlen(uri));
+ constructed_uri[constructed_uri_len - 1] = '\0';
+
+ return (constructed_uri);
+}
+
+int
+evrpc_register_rpc(struct evrpc_base *base, struct evrpc *rpc,
+ void (*cb)(struct evrpc_req_generic *, void *), void *cb_arg)
+{
+ char *constructed_uri = evrpc_construct_uri(rpc->uri);
+
+ rpc->base = base;
+ rpc->cb = cb;
+ rpc->cb_arg = cb_arg;
+
+ TAILQ_INSERT_TAIL(&base->registered_rpcs, rpc, next);
+
+ evhttp_set_cb(base->http_server,
+ constructed_uri,
+ evrpc_request_cb,
+ rpc);
+
+ free(constructed_uri);
+
+ return (0);
+}
+
+int
+evrpc_unregister_rpc(struct evrpc_base *base, const char *name)
+{
+ char *registered_uri = NULL;
+ struct evrpc *rpc;
+
+ /* find the right rpc; linear search might be slow */
+ TAILQ_FOREACH(rpc, &base->registered_rpcs, next) {
+ if (strcmp(rpc->uri, name) == 0)
+ break;
+ }
+ if (rpc == NULL) {
+ /* We did not find an RPC with this name */
+ return (-1);
+ }
+ TAILQ_REMOVE(&base->registered_rpcs, rpc, next);
+
+ free((char *)rpc->uri);
+ free(rpc);
+
+ registered_uri = evrpc_construct_uri(name);
+
+ /* remove the http server callback */
+ assert(evhttp_del_cb(base->http_server, registered_uri) == 0);
+
+ free(registered_uri);
+ return (0);
+}
+
+static void
+evrpc_request_cb(struct evhttp_request *req, void *arg)
+{
+ struct evrpc *rpc = arg;
+ struct evrpc_req_generic *rpc_state = NULL;
+
+ /* let's verify the outside parameters */
+ if (req->type != EVHTTP_REQ_POST ||
+ EVBUFFER_LENGTH(req->input_buffer) <= 0)
+ goto error;
+
+ /*
+ * we might want to allow hooks to suspend the processing,
+ * but at the moment, we assume that they just act as simple
+ * filters.
+ */
+ if (evrpc_process_hooks(&rpc->base->input_hooks,
+ req, req->input_buffer) == -1)
+ goto error;
+
+ rpc_state = calloc(1, sizeof(struct evrpc_req_generic));
+ if (rpc_state == NULL)
+ goto error;
+
+ /* let's check that we can parse the request */
+ rpc_state->request = rpc->request_new();
+ if (rpc_state->request == NULL)
+ goto error;
+
+ rpc_state->rpc = rpc;
+
+ if (rpc->request_unmarshal(
+ rpc_state->request, req->input_buffer) == -1) {
+ /* we failed to parse the request; that's a bummer */
+ goto error;
+ }
+
+ /* at this point, we have a well formed request, prepare the reply */
+
+ rpc_state->reply = rpc->reply_new();
+ if (rpc_state->reply == NULL)
+ goto error;
+
+ rpc_state->http_req = req;
+ rpc_state->done = evrpc_request_done;
+
+ /* give the rpc to the user; they can deal with it */
+ rpc->cb(rpc_state, rpc->cb_arg);
+
+ return;
+
+error:
+ evrpc_reqstate_free(rpc_state);
+ evhttp_send_error(req, HTTP_SERVUNAVAIL, "Service Error");
+ return;
+}
+
+void
+evrpc_reqstate_free(struct evrpc_req_generic* rpc_state)
+{
+ /* clean up all memory */
+ if (rpc_state != NULL) {
+ struct evrpc *rpc = rpc_state->rpc;
+
+ if (rpc_state->request != NULL)
+ rpc->request_free(rpc_state->request);
+ if (rpc_state->reply != NULL)
+ rpc->reply_free(rpc_state->reply);
+ free(rpc_state);
+ }
+}
+
+void
+evrpc_request_done(struct evrpc_req_generic* rpc_state)
+{
+ struct evhttp_request *req = rpc_state->http_req;
+ struct evrpc *rpc = rpc_state->rpc;
+ struct evbuffer* data = NULL;
+
+ if (rpc->reply_complete(rpc_state->reply) == -1) {
+ /* the reply was not completely filled in. error out */
+ goto error;
+ }
+
+ if ((data = evbuffer_new()) == NULL) {
+ /* out of memory */
+ goto error;
+ }
+
+ /* serialize the reply */
+ rpc->reply_marshal(data, rpc_state->reply);
+
+ /* do hook based tweaks to the request */
+ if (evrpc_process_hooks(&rpc->base->output_hooks,
+ req, data) == -1)
+ goto error;
+
+ /* on success, we are going to transmit marshaled binary data */
+ if (evhttp_find_header(req->output_headers, "Content-Type") == NULL) {
+ evhttp_add_header(req->output_headers,
+ "Content-Type", "application/octet-stream");
+ }
+
+ evhttp_send_reply(req, HTTP_OK, "OK", data);
+
+ evbuffer_free(data);
+
+ evrpc_reqstate_free(rpc_state);
+
+ return;
+
+error:
+ if (data != NULL)
+ evbuffer_free(data);
+ evrpc_reqstate_free(rpc_state);
+ evhttp_send_error(req, HTTP_SERVUNAVAIL, "Service Error");
+ return;
+}
+
+/* Client implementation of RPC site */
+
+static int evrpc_schedule_request(struct evhttp_connection *connection,
+ struct evrpc_request_wrapper *ctx);
+
+struct evrpc_pool *
+evrpc_pool_new(struct event_base *base)
+{
+ struct evrpc_pool *pool = calloc(1, sizeof(struct evrpc_pool));
+ if (pool == NULL)
+ return (NULL);
+
+ TAILQ_INIT(&pool->connections);
+ TAILQ_INIT(&pool->requests);
+
+ TAILQ_INIT(&pool->input_hooks);
+ TAILQ_INIT(&pool->output_hooks);
+
+ pool->base = base;
+ pool->timeout = -1;
+
+ return (pool);
+}
+
+static void
+evrpc_request_wrapper_free(struct evrpc_request_wrapper *request)
+{
+ free(request->name);
+ free(request);
+}
+
+void
+evrpc_pool_free(struct evrpc_pool *pool)
+{
+ struct evhttp_connection *connection;
+ struct evrpc_request_wrapper *request;
+ struct evrpc_hook *hook;
+
+ while ((request = TAILQ_FIRST(&pool->requests)) != NULL) {
+ TAILQ_REMOVE(&pool->requests, request, next);
+ /* if this gets more complicated we need our own function */
+ evrpc_request_wrapper_free(request);
+ }
+
+ while ((connection = TAILQ_FIRST(&pool->connections)) != NULL) {
+ TAILQ_REMOVE(&pool->connections, connection, next);
+ evhttp_connection_free(connection);
+ }
+
+ while ((hook = TAILQ_FIRST(&pool->input_hooks)) != NULL) {
+ assert(evrpc_remove_hook(pool, EVRPC_INPUT, hook));
+ }
+
+ while ((hook = TAILQ_FIRST(&pool->output_hooks)) != NULL) {
+ assert(evrpc_remove_hook(pool, EVRPC_OUTPUT, hook));
+ }
+
+ free(pool);
+}
+
+/*
+ * Add a connection to the RPC pool. A request scheduled on the pool
+ * may use any available connection.
+ */
+
+void
+evrpc_pool_add_connection(struct evrpc_pool *pool,
+ struct evhttp_connection *connection) {
+ assert(connection->http_server == NULL);
+ TAILQ_INSERT_TAIL(&pool->connections, connection, next);
+
+ /*
+ * associate an event base with this connection
+ */
+ if (pool->base != NULL)
+ evhttp_connection_set_base(connection, pool->base);
+
+ /*
+ * unless a timeout was specifically set for a connection,
+ * the connection inherits the timeout from the pool.
+ */
+ if (connection->timeout == -1)
+ connection->timeout = pool->timeout;
+
+ /*
+ * if we have any requests pending, schedule them with the new
+ * connections.
+ */
+
+ if (TAILQ_FIRST(&pool->requests) != NULL) {
+ struct evrpc_request_wrapper *request =
+ TAILQ_FIRST(&pool->requests);
+ TAILQ_REMOVE(&pool->requests, request, next);
+ evrpc_schedule_request(connection, request);
+ }
+}
+
+void
+evrpc_pool_set_timeout(struct evrpc_pool *pool, int timeout_in_secs)
+{
+ struct evhttp_connection *evcon;
+ TAILQ_FOREACH(evcon, &pool->connections, next) {
+ evcon->timeout = timeout_in_secs;
+ }
+ pool->timeout = timeout_in_secs;
+}
+
+
+static void evrpc_reply_done(struct evhttp_request *, void *);
+static void evrpc_request_timeout(int, short, void *);
+
+/*
+ * Finds a connection object associated with the pool that is currently
+ * idle and can be used to make a request.
+ */
+static struct evhttp_connection *
+evrpc_pool_find_connection(struct evrpc_pool *pool)
+{
+ struct evhttp_connection *connection;
+ TAILQ_FOREACH(connection, &pool->connections, next) {
+ if (TAILQ_FIRST(&connection->requests) == NULL)
+ return (connection);
+ }
+
+ return (NULL);
+}
+
+/*
+ * We assume that the ctx is no longer queued on the pool.
+ */
+static int
+evrpc_schedule_request(struct evhttp_connection *connection,
+ struct evrpc_request_wrapper *ctx)
+{
+ struct evhttp_request *req = NULL;
+ struct evrpc_pool *pool = ctx->pool;
+ struct evrpc_status status;
+ char *uri = NULL;
+ int res = 0;
+
+ if ((req = evhttp_request_new(evrpc_reply_done, ctx)) == NULL)
+ goto error;
+
+ /* serialize the request data into the output buffer */
+ ctx->request_marshal(req->output_buffer, ctx->request);
+
+ uri = evrpc_construct_uri(ctx->name);
+ if (uri == NULL)
+ goto error;
+
+ /* we need to know the connection that we might have to abort */
+ ctx->evcon = connection;
+
+ /* apply hooks to the outgoing request */
+ if (evrpc_process_hooks(&pool->output_hooks,
+ req, req->output_buffer) == -1)
+ goto error;
+
+ if (pool->timeout > 0) {
+ /*
+ * a timeout after which the whole rpc is going to be aborted.
+ */
+ struct timeval tv;
+ evutil_timerclear(&tv);
+ tv.tv_sec = pool->timeout;
+ evtimer_add(&ctx->ev_timeout, &tv);
+ }
+
+ /* start the request over the connection */
+ res = evhttp_make_request(connection, req, EVHTTP_REQ_POST, uri);
+ free(uri);
+
+ if (res == -1)
+ goto error;
+
+ return (0);
+
+error:
+ memset(&status, 0, sizeof(status));
+ status.error = EVRPC_STATUS_ERR_UNSTARTED;
+ (*ctx->cb)(&status, ctx->request, ctx->reply, ctx->cb_arg);
+ evrpc_request_wrapper_free(ctx);
+ return (-1);
+}
+
+int
+evrpc_make_request(struct evrpc_request_wrapper *ctx)
+{
+ struct evrpc_pool *pool = ctx->pool;
+
+ /* initialize the event structure for this rpc */
+ evtimer_set(&ctx->ev_timeout, evrpc_request_timeout, ctx);
+ if (pool->base != NULL)
+ event_base_set(pool->base, &ctx->ev_timeout);
+
+ /* we better have some available connections on the pool */
+ assert(TAILQ_FIRST(&pool->connections) != NULL);
+
+ /*
+ * if no connection is available, we queue the request on the pool,
+ * the next time a connection is empty, the rpc will be send on that.
+ */
+ TAILQ_INSERT_TAIL(&pool->requests, ctx, next);
+
+ evrpc_pool_schedule(pool);
+
+ return (0);
+}
+
+static void
+evrpc_reply_done(struct evhttp_request *req, void *arg)
+{
+ struct evrpc_request_wrapper *ctx = arg;
+ struct evrpc_pool *pool = ctx->pool;
+ struct evrpc_status status;
+ int res = -1;
+
+ /* cancel any timeout we might have scheduled */
+ event_del(&ctx->ev_timeout);
+
+ memset(&status, 0, sizeof(status));
+ status.http_req = req;
+
+ /* we need to get the reply now */
+ if (req != NULL) {
+ /* apply hooks to the incoming request */
+ if (evrpc_process_hooks(&pool->input_hooks,
+ req, req->input_buffer) == -1) {
+ status.error = EVRPC_STATUS_ERR_HOOKABORTED;
+ res = -1;
+ } else {
+ res = ctx->reply_unmarshal(ctx->reply,
+ req->input_buffer);
+ if (res == -1) {
+ status.error = EVRPC_STATUS_ERR_BADPAYLOAD;
+ }
+ }
+ } else {
+ status.error = EVRPC_STATUS_ERR_TIMEOUT;
+ }
+
+ if (res == -1) {
+ /* clear everything that we might have written previously */
+ ctx->reply_clear(ctx->reply);
+ }
+
+ (*ctx->cb)(&status, ctx->request, ctx->reply, ctx->cb_arg);
+
+ evrpc_request_wrapper_free(ctx);
+
+ /* the http layer owns the request structure */
+
+ /* see if we can schedule another request */
+ evrpc_pool_schedule(pool);
+}
+
+static void
+evrpc_pool_schedule(struct evrpc_pool *pool)
+{
+ struct evrpc_request_wrapper *ctx = TAILQ_FIRST(&pool->requests);
+ struct evhttp_connection *evcon;
+
+ /* if no requests are pending, we have no work */
+ if (ctx == NULL)
+ return;
+
+ if ((evcon = evrpc_pool_find_connection(pool)) != NULL) {
+ TAILQ_REMOVE(&pool->requests, ctx, next);
+ evrpc_schedule_request(evcon, ctx);
+ }
+}
+
+static void
+evrpc_request_timeout(int fd, short what, void *arg)
+{
+ struct evrpc_request_wrapper *ctx = arg;
+ struct evhttp_connection *evcon = ctx->evcon;
+ assert(evcon != NULL);
+
+ evhttp_connection_fail(evcon, EVCON_HTTP_TIMEOUT);
+}
diff --git a/libevent/evrpc.h b/libevent/evrpc.h
new file mode 100644
index 00000000000..7c16b95c775
--- /dev/null
+++ b/libevent/evrpc.h
@@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVRPC_H_
+#define _EVRPC_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @file evrpc.h
+ *
+ * This header files provides basic support for an RPC server and client.
+ *
+ * To support RPCs in a server, every supported RPC command needs to be
+ * defined and registered.
+ *
+ * EVRPC_HEADER(SendCommand, Request, Reply);
+ *
+ * SendCommand is the name of the RPC command.
+ * Request is the name of a structure generated by event_rpcgen.py.
+ * It contains all parameters relating to the SendCommand RPC. The
+ * server needs to fill in the Reply structure.
+ * Reply is the name of a structure generated by event_rpcgen.py. It
+ * contains the answer to the RPC.
+ *
+ * To register an RPC with an HTTP server, you need to first create an RPC
+ * base with:
+ *
+ * struct evrpc_base *base = evrpc_init(http);
+ *
+ * A specific RPC can then be registered with
+ *
+ * EVRPC_REGISTER(base, SendCommand, Request, Reply, FunctionCB, arg);
+ *
+ * when the server receives an appropriately formatted RPC, the user callback
+ * is invokved. The callback needs to fill in the reply structure.
+ *
+ * void FunctionCB(EVRPC_STRUCT(SendCommand)* rpc, void *arg);
+ *
+ * To send the reply, call EVRPC_REQUEST_DONE(rpc);
+ *
+ * See the regression test for an example.
+ */
+
+struct evbuffer;
+struct event_base;
+struct evrpc_req_generic;
+
+/* Encapsulates a request */
+struct evrpc {
+ TAILQ_ENTRY(evrpc) next;
+
+ /* the URI at which the request handler lives */
+ const char* uri;
+
+ /* creates a new request structure */
+ void *(*request_new)(void);
+
+ /* frees the request structure */
+ void (*request_free)(void *);
+
+ /* unmarshals the buffer into the proper request structure */
+ int (*request_unmarshal)(void *, struct evbuffer *);
+
+ /* creates a new reply structure */
+ void *(*reply_new)(void);
+
+ /* creates a new reply structure */
+ void (*reply_free)(void *);
+
+ /* verifies that the reply is valid */
+ int (*reply_complete)(void *);
+
+ /* marshals the reply into a buffer */
+ void (*reply_marshal)(struct evbuffer*, void *);
+
+ /* the callback invoked for each received rpc */
+ void (*cb)(struct evrpc_req_generic *, void *);
+ void *cb_arg;
+
+ /* reference for further configuration */
+ struct evrpc_base *base;
+};
+
+/** The type of a specific RPC Message
+ *
+ * @param rpcname the name of the RPC message
+ */
+#define EVRPC_STRUCT(rpcname) struct evrpc_req__##rpcname
+
+struct evhttp_request;
+struct evrpc_status;
+
+/* We alias the RPC specific structs to this voided one */
+struct evrpc_req_generic {
+ /* the unmarshaled request object */
+ void *request;
+
+ /* the empty reply object that needs to be filled in */
+ void *reply;
+
+ /*
+ * the static structure for this rpc; that can be used to
+ * automatically unmarshal and marshal the http buffers.
+ */
+ struct evrpc *rpc;
+
+ /*
+ * the http request structure on which we need to answer.
+ */
+ struct evhttp_request* http_req;
+
+ /*
+ * callback to reply and finish answering this rpc
+ */
+ void (*done)(struct evrpc_req_generic* rpc);
+};
+
+/** Creates the definitions and prototypes for an RPC
+ *
+ * You need to use EVRPC_HEADER to create structures and function prototypes
+ * needed by the server and client implementation. The structures have to be
+ * defined in an .rpc file and converted to source code via event_rpcgen.py
+ *
+ * @param rpcname the name of the RPC
+ * @param reqstruct the name of the RPC request structure
+ * @param replystruct the name of the RPC reply structure
+ * @see EVRPC_GENERATE()
+ */
+#define EVRPC_HEADER(rpcname, reqstruct, rplystruct) \
+EVRPC_STRUCT(rpcname) { \
+ struct reqstruct* request; \
+ struct rplystruct* reply; \
+ struct evrpc* rpc; \
+ struct evhttp_request* http_req; \
+ void (*done)(struct evrpc_status *, \
+ struct evrpc* rpc, void *request, void *reply); \
+}; \
+int evrpc_send_request_##rpcname(struct evrpc_pool *, \
+ struct reqstruct *, struct rplystruct *, \
+ void (*)(struct evrpc_status *, \
+ struct reqstruct *, struct rplystruct *, void *cbarg), \
+ void *);
+
+/** Generates the code for receiving and sending an RPC message
+ *
+ * EVRPC_GENERATE is used to create the code corresponding to sending
+ * and receiving a particular RPC message
+ *
+ * @param rpcname the name of the RPC
+ * @param reqstruct the name of the RPC request structure
+ * @param replystruct the name of the RPC reply structure
+ * @see EVRPC_HEADER()
+ */
+#define EVRPC_GENERATE(rpcname, reqstruct, rplystruct) \
+int evrpc_send_request_##rpcname(struct evrpc_pool *pool, \
+ struct reqstruct *request, struct rplystruct *reply, \
+ void (*cb)(struct evrpc_status *, \
+ struct reqstruct *, struct rplystruct *, void *cbarg), \
+ void *cbarg) { \
+ struct evrpc_status status; \
+ struct evrpc_request_wrapper *ctx; \
+ ctx = (struct evrpc_request_wrapper *) \
+ malloc(sizeof(struct evrpc_request_wrapper)); \
+ if (ctx == NULL) \
+ goto error; \
+ ctx->pool = pool; \
+ ctx->evcon = NULL; \
+ ctx->name = strdup(#rpcname); \
+ if (ctx->name == NULL) { \
+ free(ctx); \
+ goto error; \
+ } \
+ ctx->cb = (void (*)(struct evrpc_status *, \
+ void *, void *, void *))cb; \
+ ctx->cb_arg = cbarg; \
+ ctx->request = (void *)request; \
+ ctx->reply = (void *)reply; \
+ ctx->request_marshal = (void (*)(struct evbuffer *, void *))reqstruct##_marshal; \
+ ctx->reply_clear = (void (*)(void *))rplystruct##_clear; \
+ ctx->reply_unmarshal = (int (*)(void *, struct evbuffer *))rplystruct##_unmarshal; \
+ return (evrpc_make_request(ctx)); \
+error: \
+ memset(&status, 0, sizeof(status)); \
+ status.error = EVRPC_STATUS_ERR_UNSTARTED; \
+ (*(cb))(&status, request, reply, cbarg); \
+ return (-1); \
+}
+
+/** Provides access to the HTTP request object underlying an RPC
+ *
+ * Access to the underlying http object; can be used to look at headers or
+ * for getting the remote ip address
+ *
+ * @param rpc_req the rpc request structure provided to the server callback
+ * @return an struct evhttp_request object that can be inspected for
+ * HTTP headers or sender information.
+ */
+#define EVRPC_REQUEST_HTTP(rpc_req) (rpc_req)->http_req
+
+/** Creates the reply to an RPC request
+ *
+ * EVRPC_REQUEST_DONE is used to answer a request; the reply is expected
+ * to have been filled in. The request and reply pointers become invalid
+ * after this call has finished.
+ *
+ * @param rpc_req the rpc request structure provided to the server callback
+ */
+#define EVRPC_REQUEST_DONE(rpc_req) do { \
+ struct evrpc_req_generic *_req = (struct evrpc_req_generic *)(rpc_req); \
+ _req->done(_req); \
+} while (0)
+
+
+/* Takes a request object and fills it in with the right magic */
+#define EVRPC_REGISTER_OBJECT(rpc, name, request, reply) \
+ do { \
+ (rpc)->uri = strdup(#name); \
+ if ((rpc)->uri == NULL) { \
+ fprintf(stderr, "failed to register object\n"); \
+ exit(1); \
+ } \
+ (rpc)->request_new = (void *(*)(void))request##_new; \
+ (rpc)->request_free = (void (*)(void *))request##_free; \
+ (rpc)->request_unmarshal = (int (*)(void *, struct evbuffer *))request##_unmarshal; \
+ (rpc)->reply_new = (void *(*)(void))reply##_new; \
+ (rpc)->reply_free = (void (*)(void *))reply##_free; \
+ (rpc)->reply_complete = (int (*)(void *))reply##_complete; \
+ (rpc)->reply_marshal = (void (*)(struct evbuffer*, void *))reply##_marshal; \
+ } while (0)
+
+struct evrpc_base;
+struct evhttp;
+
+/* functions to start up the rpc system */
+
+/** Creates a new rpc base from which RPC requests can be received
+ *
+ * @param server a pointer to an existing HTTP server
+ * @return a newly allocated evrpc_base struct
+ * @see evrpc_free()
+ */
+struct evrpc_base *evrpc_init(struct evhttp *server);
+
+/**
+ * Frees the evrpc base
+ *
+ * For now, you are responsible for making sure that no rpcs are ongoing.
+ *
+ * @param base the evrpc_base object to be freed
+ * @see evrpc_init
+ */
+void evrpc_free(struct evrpc_base *base);
+
+/** register RPCs with the HTTP Server
+ *
+ * registers a new RPC with the HTTP server, each RPC needs to have
+ * a unique name under which it can be identified.
+ *
+ * @param base the evrpc_base structure in which the RPC should be
+ * registered.
+ * @param name the name of the RPC
+ * @param request the name of the RPC request structure
+ * @param reply the name of the RPC reply structure
+ * @param callback the callback that should be invoked when the RPC
+ * is received. The callback has the following prototype
+ * void (*callback)(EVRPC_STRUCT(Message)* rpc, void *arg)
+ * @param cbarg an additional parameter that can be passed to the callback.
+ * The parameter can be used to carry around state.
+ */
+#define EVRPC_REGISTER(base, name, request, reply, callback, cbarg) \
+ do { \
+ struct evrpc* rpc = (struct evrpc *)calloc(1, sizeof(struct evrpc)); \
+ EVRPC_REGISTER_OBJECT(rpc, name, request, reply); \
+ evrpc_register_rpc(base, rpc, \
+ (void (*)(struct evrpc_req_generic*, void *))callback, cbarg); \
+ } while (0)
+
+int evrpc_register_rpc(struct evrpc_base *, struct evrpc *,
+ void (*)(struct evrpc_req_generic*, void *), void *);
+
+/**
+ * Unregisters an already registered RPC
+ *
+ * @param base the evrpc_base object from which to unregister an RPC
+ * @param name the name of the rpc to unregister
+ * @return -1 on error or 0 when successful.
+ * @see EVRPC_REGISTER()
+ */
+#define EVRPC_UNREGISTER(base, name) evrpc_unregister_rpc(base, #name)
+
+int evrpc_unregister_rpc(struct evrpc_base *base, const char *name);
+
+/*
+ * Client-side RPC support
+ */
+
+struct evrpc_pool;
+struct evhttp_connection;
+
+/**
+ * provides information about the completed RPC request.
+ */
+struct evrpc_status {
+#define EVRPC_STATUS_ERR_NONE 0
+#define EVRPC_STATUS_ERR_TIMEOUT 1
+#define EVRPC_STATUS_ERR_BADPAYLOAD 2
+#define EVRPC_STATUS_ERR_UNSTARTED 3
+#define EVRPC_STATUS_ERR_HOOKABORTED 4
+ int error;
+
+ /* for looking at headers or other information */
+ struct evhttp_request *http_req;
+};
+
+struct evrpc_request_wrapper {
+ TAILQ_ENTRY(evrpc_request_wrapper) next;
+
+ /* pool on which this rpc request is being made */
+ struct evrpc_pool *pool;
+
+ /* connection on which the request is being sent */
+ struct evhttp_connection *evcon;
+
+ /* event for implementing request timeouts */
+ struct event ev_timeout;
+
+ /* the name of the rpc */
+ char *name;
+
+ /* callback */
+ void (*cb)(struct evrpc_status*, void *request, void *reply, void *arg);
+ void *cb_arg;
+
+ void *request;
+ void *reply;
+
+ /* unmarshals the buffer into the proper request structure */
+ void (*request_marshal)(struct evbuffer *, void *);
+
+ /* removes all stored state in the reply */
+ void (*reply_clear)(void *);
+
+ /* marshals the reply into a buffer */
+ int (*reply_unmarshal)(void *, struct evbuffer*);
+};
+
+/** launches an RPC and sends it to the server
+ *
+ * EVRPC_MAKE_REQUEST() is used by the client to send an RPC to the server.
+ *
+ * @param name the name of the RPC
+ * @param pool the evrpc_pool that contains the connection objects over which
+ * the request should be sent.
+ * @param request a pointer to the RPC request structure - it contains the
+ * data to be sent to the server.
+ * @param reply a pointer to the RPC reply structure. It is going to be filled
+ * if the request was answered successfully
+ * @param cb the callback to invoke when the RPC request has been answered
+ * @param cbarg an additional argument to be passed to the client
+ * @return 0 on success, -1 on failure
+ */
+#define EVRPC_MAKE_REQUEST(name, pool, request, reply, cb, cbarg) \
+ evrpc_send_request_##name(pool, request, reply, cb, cbarg)
+
+int evrpc_make_request(struct evrpc_request_wrapper *);
+
+/** creates an rpc connection pool
+ *
+ * a pool has a number of connections associated with it.
+ * rpc requests are always made via a pool.
+ *
+ * @param base a pointer to an struct event_based object; can be left NULL
+ * in singled-threaded applications
+ * @return a newly allocated struct evrpc_pool object
+ * @see evrpc_pool_free()
+ */
+struct evrpc_pool *evrpc_pool_new(struct event_base *base);
+/** frees an rpc connection pool
+ *
+ * @param pool a pointer to an evrpc_pool allocated via evrpc_pool_new()
+ * @see evrpc_pool_new()
+ */
+void evrpc_pool_free(struct evrpc_pool *pool);
+/*
+ * adds a connection over which rpc can be dispatched. the connection
+ * object must have been newly created.
+ */
+void evrpc_pool_add_connection(struct evrpc_pool *,
+ struct evhttp_connection *);
+
+/**
+ * Sets the timeout in secs after which a request has to complete. The
+ * RPC is completely aborted if it does not complete by then. Setting
+ * the timeout to 0 means that it never timeouts and can be used to
+ * implement callback type RPCs.
+ *
+ * Any connection already in the pool will be updated with the new
+ * timeout. Connections added to the pool after set_timeout has be
+ * called receive the pool timeout only if no timeout has been set
+ * for the connection itself.
+ *
+ * @param pool a pointer to a struct evrpc_pool object
+ * @param timeout_in_secs the number of seconds after which a request should
+ * timeout and a failure be returned to the callback.
+ */
+void evrpc_pool_set_timeout(struct evrpc_pool *pool, int timeout_in_secs);
+
+/**
+ * Hooks for changing the input and output of RPCs; this can be used to
+ * implement compression, authentication, encryption, ...
+ */
+
+enum EVRPC_HOOK_TYPE {
+ EVRPC_INPUT, /**< apply the function to an input hook */
+ EVRPC_OUTPUT /**< apply the function to an output hook */
+};
+
+#ifndef WIN32
+/** Deprecated alias for EVRPC_INPUT. Not available on windows, where it
+ * conflicts with platform headers. */
+#define INPUT EVRPC_INPUT
+/** Deprecated alias for EVRPC_OUTPUT. Not available on windows, where it
+ * conflicts with platform headers. */
+#define OUTPUT EVRPC_OUTPUT
+#endif
+
+/** adds a processing hook to either an rpc base or rpc pool
+ *
+ * If a hook returns -1, the processing is aborted.
+ *
+ * The add functions return handles that can be used for removing hooks.
+ *
+ * @param vbase a pointer to either struct evrpc_base or struct evrpc_pool
+ * @param hook_type either INPUT or OUTPUT
+ * @param cb the callback to call when the hook is activated
+ * @param cb_arg an additional argument for the callback
+ * @return a handle to the hook so it can be removed later
+ * @see evrpc_remove_hook()
+ */
+void *evrpc_add_hook(void *vbase,
+ enum EVRPC_HOOK_TYPE hook_type,
+ int (*cb)(struct evhttp_request *, struct evbuffer *, void *),
+ void *cb_arg);
+
+/** removes a previously added hook
+ *
+ * @param vbase a pointer to either struct evrpc_base or struct evrpc_pool
+ * @param hook_type either INPUT or OUTPUT
+ * @param handle a handle returned by evrpc_add_hook()
+ * @return 1 on success or 0 on failure
+ * @see evrpc_add_hook()
+ */
+int evrpc_remove_hook(void *vbase,
+ enum EVRPC_HOOK_TYPE hook_type,
+ void *handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EVRPC_H_ */
diff --git a/libevent/evsignal.h b/libevent/evsignal.h
new file mode 100644
index 00000000000..9b0405eea09
--- /dev/null
+++ b/libevent/evsignal.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVSIGNAL_H_
+#define _EVSIGNAL_H_
+
+#include <signal.h>
+
+typedef void (*ev_sighandler_t)(int);
+
+struct evsignal_info {
+ struct event ev_signal;
+ int ev_signal_pair[2];
+ int ev_signal_added;
+ volatile sig_atomic_t evsignal_caught;
+ struct event_list evsigevents[NSIG];
+ sig_atomic_t evsigcaught[NSIG];
+#ifdef HAVE_SIGACTION
+ struct sigaction **sh_old;
+#else
+ ev_sighandler_t **sh_old;
+#endif
+ int sh_old_max;
+};
+int evsignal_init(struct event_base *);
+void evsignal_process(struct event_base *);
+int evsignal_add(struct event *);
+int evsignal_del(struct event *);
+void evsignal_dealloc(struct event_base *);
+
+#endif /* _EVSIGNAL_H_ */
diff --git a/libevent/evutil.c b/libevent/evutil.c
new file mode 100644
index 00000000000..7d22d3eac16
--- /dev/null
+++ b/libevent/evutil.c
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2007 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#include <winsock2.h>
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+
+#include <sys/types.h>
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#include <errno.h>
+#if defined WIN32 && !defined(HAVE_GETTIMEOFDAY_H)
+#include <sys/timeb.h>
+#endif
+#include <stdio.h>
+
+#include "evutil.h"
+#include "log.h"
+
+int
+evutil_socketpair(int family, int type, int protocol, int fd[2])
+{
+#ifndef WIN32
+ return socketpair(family, type, protocol, fd);
+#else
+ /* This code is originally from Tor. Used with permission. */
+
+ /* This socketpair does not work when localhost is down. So
+ * it's really not the same thing at all. But it's close enough
+ * for now, and really, when localhost is down sometimes, we
+ * have other problems too.
+ */
+ int listener = -1;
+ int connector = -1;
+ int acceptor = -1;
+ struct sockaddr_in listen_addr;
+ struct sockaddr_in connect_addr;
+ int size;
+ int saved_errno = -1;
+
+ if (protocol
+#ifdef AF_UNIX
+ || family != AF_UNIX
+#endif
+ ) {
+ EVUTIL_SET_SOCKET_ERROR(WSAEAFNOSUPPORT);
+ return -1;
+ }
+ if (!fd) {
+ EVUTIL_SET_SOCKET_ERROR(WSAEINVAL);
+ return -1;
+ }
+
+ listener = socket(AF_INET, type, 0);
+ if (listener < 0)
+ return -1;
+ memset(&listen_addr, 0, sizeof(listen_addr));
+ listen_addr.sin_family = AF_INET;
+ listen_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ listen_addr.sin_port = 0; /* kernel chooses port. */
+ if (bind(listener, (struct sockaddr *) &listen_addr, sizeof (listen_addr))
+ == -1)
+ goto tidy_up_and_fail;
+ if (listen(listener, 1) == -1)
+ goto tidy_up_and_fail;
+
+ connector = socket(AF_INET, type, 0);
+ if (connector < 0)
+ goto tidy_up_and_fail;
+ /* We want to find out the port number to connect to. */
+ size = sizeof(connect_addr);
+ if (getsockname(listener, (struct sockaddr *) &connect_addr, &size) == -1)
+ goto tidy_up_and_fail;
+ if (size != sizeof (connect_addr))
+ goto abort_tidy_up_and_fail;
+ if (connect(connector, (struct sockaddr *) &connect_addr,
+ sizeof(connect_addr)) == -1)
+ goto tidy_up_and_fail;
+
+ size = sizeof(listen_addr);
+ acceptor = accept(listener, (struct sockaddr *) &listen_addr, &size);
+ if (acceptor < 0)
+ goto tidy_up_and_fail;
+ if (size != sizeof(listen_addr))
+ goto abort_tidy_up_and_fail;
+ EVUTIL_CLOSESOCKET(listener);
+ /* Now check we are talking to ourself by matching port and host on the
+ two sockets. */
+ if (getsockname(connector, (struct sockaddr *) &connect_addr, &size) == -1)
+ goto tidy_up_and_fail;
+ if (size != sizeof (connect_addr)
+ || listen_addr.sin_family != connect_addr.sin_family
+ || listen_addr.sin_addr.s_addr != connect_addr.sin_addr.s_addr
+ || listen_addr.sin_port != connect_addr.sin_port)
+ goto abort_tidy_up_and_fail;
+ fd[0] = connector;
+ fd[1] = acceptor;
+
+ return 0;
+
+ abort_tidy_up_and_fail:
+ saved_errno = WSAECONNABORTED;
+ tidy_up_and_fail:
+ if (saved_errno < 0)
+ saved_errno = WSAGetLastError();
+ if (listener != -1)
+ EVUTIL_CLOSESOCKET(listener);
+ if (connector != -1)
+ EVUTIL_CLOSESOCKET(connector);
+ if (acceptor != -1)
+ EVUTIL_CLOSESOCKET(acceptor);
+
+ EVUTIL_SET_SOCKET_ERROR(saved_errno);
+ return -1;
+#endif
+}
+
+int
+evutil_make_socket_nonblocking(int fd)
+{
+#ifdef WIN32
+ {
+ unsigned long nonblocking = 1;
+ ioctlsocket(fd, FIONBIO, (unsigned long*) &nonblocking);
+ }
+#else
+ if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
+ event_warn("fcntl(O_NONBLOCK)");
+ return -1;
+}
+#endif
+ return 0;
+}
+
+ev_int64_t
+evutil_strtoll(const char *s, char **endptr, int base)
+{
+#ifdef HAVE_STRTOLL
+ return (ev_int64_t)strtoll(s, endptr, base);
+#elif SIZEOF_LONG == 8
+ return (ev_int64_t)strtol(s, endptr, base);
+#elif defined(WIN32) && defined(_MSC_VER) && _MSC_VER < 1300
+ /* XXXX on old versions of MS APIs, we only support base
+ * 10. */
+ ev_int64_t r;
+ if (base != 10)
+ return 0;
+ r = (ev_int64_t) _atoi64(s);
+ while (isspace(*s))
+ ++s;
+ while (isdigit(*s))
+ ++s;
+ if (endptr)
+ *endptr = (char*) s;
+ return r;
+#elif defined(WIN32)
+ return (ev_int64_t) _strtoi64(s, endptr, base);
+#else
+#error "I don't know how to parse 64-bit integers."
+#endif
+}
+
+#ifndef HAVE_GETTIMEOFDAY
+int
+evutil_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ struct _timeb tb;
+
+ if(tv == NULL)
+ return -1;
+
+ _ftime(&tb);
+ tv->tv_sec = (long) tb.time;
+ tv->tv_usec = ((int) tb.millitm) * 1000;
+ return 0;
+}
+#endif
+
+int
+evutil_snprintf(char *buf, size_t buflen, const char *format, ...)
+{
+ int r;
+ va_list ap;
+ va_start(ap, format);
+ r = evutil_vsnprintf(buf, buflen, format, ap);
+ va_end(ap);
+ return r;
+}
+
+int
+evutil_vsnprintf(char *buf, size_t buflen, const char *format, va_list ap)
+{
+#ifdef _MSC_VER
+ int r = _vsnprintf(buf, buflen, format, ap);
+ buf[buflen-1] = '\0';
+ if (r >= 0)
+ return r;
+ else
+ return _vscprintf(format, ap);
+#else
+ int r = vsnprintf(buf, buflen, format, ap);
+ buf[buflen-1] = '\0';
+ return r;
+#endif
+}
diff --git a/libevent/evutil.h b/libevent/evutil.h
new file mode 100644
index 00000000000..ea751ddf7b7
--- /dev/null
+++ b/libevent/evutil.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2007 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _EVUTIL_H_
+#define _EVUTIL_H_
+
+/** @file evutil.h
+
+ Common convenience functions for cross-platform portability and
+ related socket manipulations.
+
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <config.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#elif defined(HAVE_INTTYPES_H)
+#include <inttypes.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+#include <stdarg.h>
+
+#ifdef HAVE_UINT64_T
+#define ev_uint64_t uint64_t
+#define ev_int64_t int64_t
+#elif defined(WIN32)
+#define ev_uint64_t unsigned __int64
+#define ev_int64_t signed __int64
+#elif SIZEOF_LONG_LONG == 8
+#define ev_uint64_t unsigned long long
+#define ev_int64_t long long
+#elif SIZEOF_LONG == 8
+#define ev_uint64_t unsigned long
+#define ev_int64_t long
+#else
+#error "No way to define ev_uint64_t"
+#endif
+
+#ifdef HAVE_UINT32_T
+#define ev_uint32_t uint32_t
+#elif defined(WIN32)
+#define ev_uint32_t unsigned int
+#elif SIZEOF_LONG == 4
+#define ev_uint32_t unsigned long
+#elif SIZEOF_INT == 4
+#define ev_uint32_t unsigned int
+#else
+#error "No way to define ev_uint32_t"
+#endif
+
+#ifdef HAVE_UINT16_T
+#define ev_uint16_t uint16_t
+#elif defined(WIN32)
+#define ev_uint16_t unsigned short
+#elif SIZEOF_INT == 2
+#define ev_uint16_t unsigned int
+#elif SIZEOF_SHORT == 2
+#define ev_uint16_t unsigned short
+#else
+#error "No way to define ev_uint16_t"
+#endif
+
+#ifdef HAVE_UINT8_T
+#define ev_uint8_t uint8_t
+#else
+#define ev_uint8_t unsigned char
+#endif
+
+int evutil_socketpair(int d, int type, int protocol, int sv[2]);
+int evutil_make_socket_nonblocking(int sock);
+#ifdef WIN32
+#define EVUTIL_CLOSESOCKET(s) closesocket(s)
+#else
+#define EVUTIL_CLOSESOCKET(s) close(s)
+#endif
+
+#ifdef WIN32
+#define EVUTIL_SOCKET_ERROR() WSAGetLastError()
+#define EVUTIL_SET_SOCKET_ERROR(errcode) \
+ do { WSASetLastError(errcode); } while (0)
+#else
+#define EVUTIL_SOCKET_ERROR() (errno)
+#define EVUTIL_SET_SOCKET_ERROR(errcode) \
+ do { errno = (errcode); } while (0)
+#endif
+
+/*
+ * Manipulation functions for struct timeval
+ */
+#ifdef HAVE_TIMERADD
+#define evutil_timeradd(tvp, uvp, vvp) timeradd((tvp), (uvp), (vvp))
+#define evutil_timersub(tvp, uvp, vvp) timersub((tvp), (uvp), (vvp))
+#else
+#define evutil_timeradd(tvp, uvp, vvp) \
+ do { \
+ (vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec; \
+ (vvp)->tv_usec = (tvp)->tv_usec + (uvp)->tv_usec; \
+ if ((vvp)->tv_usec >= 1000000) { \
+ (vvp)->tv_sec++; \
+ (vvp)->tv_usec -= 1000000; \
+ } \
+ } while (0)
+#define evutil_timersub(tvp, uvp, vvp) \
+ do { \
+ (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
+ (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \
+ if ((vvp)->tv_usec < 0) { \
+ (vvp)->tv_sec--; \
+ (vvp)->tv_usec += 1000000; \
+ } \
+ } while (0)
+#endif /* !HAVE_HAVE_TIMERADD */
+
+#ifdef HAVE_TIMERCLEAR
+#define evutil_timerclear(tvp) timerclear(tvp)
+#else
+#define evutil_timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_usec = 0
+#endif
+
+#define evutil_timercmp(tvp, uvp, cmp) \
+ (((tvp)->tv_sec == (uvp)->tv_sec) ? \
+ ((tvp)->tv_usec cmp (uvp)->tv_usec) : \
+ ((tvp)->tv_sec cmp (uvp)->tv_sec))
+
+#ifdef HAVE_TIMERISSET
+#define evutil_timerisset(tvp) timerisset(tvp)
+#else
+#define evutil_timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec)
+#endif
+
+
+/* big-int related functions */
+ev_int64_t evutil_strtoll(const char *s, char **endptr, int base);
+
+
+#ifdef HAVE_GETTIMEOFDAY
+#define evutil_gettimeofday(tv, tz) gettimeofday((tv), (tz))
+#else
+int evutil_gettimeofday(struct timeval *tv, struct timezone *tz);
+#endif
+
+int evutil_snprintf(char *buf, size_t buflen, const char *format, ...)
+#ifdef __GNUC__
+ __attribute__((format(printf, 3, 4)))
+#endif
+ ;
+int evutil_vsnprintf(char *buf, size_t buflen, const char *format, va_list ap);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _EVUTIL_H_ */
diff --git a/libevent/http-internal.h b/libevent/http-internal.h
new file mode 100644
index 00000000000..9cd03cdd2bc
--- /dev/null
+++ b/libevent/http-internal.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * This header file contains definitions for dealing with HTTP requests
+ * that are internal to libevent. As user of the library, you should not
+ * need to know about these.
+ */
+
+#ifndef _HTTP_H_
+#define _HTTP_H_
+
+#define HTTP_CONNECT_TIMEOUT 45
+#define HTTP_WRITE_TIMEOUT 50
+#define HTTP_READ_TIMEOUT 50
+
+#define HTTP_PREFIX "http://"
+#define HTTP_DEFAULTPORT 80
+
+enum message_read_status {
+ ALL_DATA_READ = 1,
+ MORE_DATA_EXPECTED = 0,
+ DATA_CORRUPTED = -1,
+ REQUEST_CANCELED = -2
+};
+
+enum evhttp_connection_error {
+ EVCON_HTTP_TIMEOUT,
+ EVCON_HTTP_EOF,
+ EVCON_HTTP_INVALID_HEADER
+};
+
+struct evbuffer;
+struct addrinfo;
+struct evhttp_request;
+
+/* A stupid connection object - maybe make this a bufferevent later */
+
+enum evhttp_connection_state {
+ EVCON_DISCONNECTED, /**< not currently connected not trying either*/
+ EVCON_CONNECTING, /**< tries to currently connect */
+ EVCON_IDLE, /**< connection is established */
+ EVCON_READING_FIRSTLINE,/**< reading Request-Line (incoming conn) or
+ **< Status-Line (outgoing conn) */
+ EVCON_READING_HEADERS, /**< reading request/response headers */
+ EVCON_READING_BODY, /**< reading request/response body */
+ EVCON_READING_TRAILER, /**< reading request/response chunked trailer */
+ EVCON_WRITING /**< writing request/response headers/body */
+};
+
+struct event_base;
+
+struct evhttp_connection {
+ /* we use tailq only if they were created for an http server */
+ TAILQ_ENTRY(evhttp_connection) (next);
+
+ int fd;
+ struct event ev;
+ struct event close_ev;
+ struct evbuffer *input_buffer;
+ struct evbuffer *output_buffer;
+
+ char *bind_address; /* address to use for binding the src */
+ u_short bind_port; /* local port for binding the src */
+
+ char *address; /* address to connect to */
+ u_short port;
+
+ int flags;
+#define EVHTTP_CON_INCOMING 0x0001 /* only one request on it ever */
+#define EVHTTP_CON_OUTGOING 0x0002 /* multiple requests possible */
+#define EVHTTP_CON_CLOSEDETECT 0x0004 /* detecting if persistent close */
+
+ int timeout; /* timeout in seconds for events */
+ int retry_cnt; /* retry count */
+ int retry_max; /* maximum number of retries */
+
+ enum evhttp_connection_state state;
+
+ /* for server connections, the http server they are connected with */
+ struct evhttp *http_server;
+
+ TAILQ_HEAD(evcon_requestq, evhttp_request) requests;
+
+ void (*cb)(struct evhttp_connection *, void *);
+ void *cb_arg;
+
+ void (*closecb)(struct evhttp_connection *, void *);
+ void *closecb_arg;
+
+ struct event_base *base;
+};
+
+struct evhttp_cb {
+ TAILQ_ENTRY(evhttp_cb) next;
+
+ char *what;
+
+ void (*cb)(struct evhttp_request *req, void *);
+ void *cbarg;
+};
+
+/* both the http server as well as the rpc system need to queue connections */
+TAILQ_HEAD(evconq, evhttp_connection);
+
+/* each bound socket is stored in one of these */
+struct evhttp_bound_socket {
+ TAILQ_ENTRY(evhttp_bound_socket) (next);
+
+ struct event bind_ev;
+};
+
+struct evhttp {
+ TAILQ_HEAD(boundq, evhttp_bound_socket) sockets;
+
+ TAILQ_HEAD(httpcbq, evhttp_cb) callbacks;
+ struct evconq connections;
+
+ int timeout;
+
+ void (*gencb)(struct evhttp_request *req, void *);
+ void *gencbarg;
+
+ struct event_base *base;
+};
+
+/* resets the connection; can be reused for more requests */
+void evhttp_connection_reset(struct evhttp_connection *);
+
+/* connects if necessary */
+int evhttp_connection_connect(struct evhttp_connection *);
+
+/* notifies the current request that it failed; resets connection */
+void evhttp_connection_fail(struct evhttp_connection *,
+ enum evhttp_connection_error error);
+
+void evhttp_get_request(struct evhttp *, int, struct sockaddr *, socklen_t);
+
+int evhttp_hostportfile(char *, char **, u_short *, char **);
+
+int evhttp_parse_firstline(struct evhttp_request *, struct evbuffer*);
+int evhttp_parse_headers(struct evhttp_request *, struct evbuffer*);
+
+void evhttp_start_read(struct evhttp_connection *);
+void evhttp_make_header(struct evhttp_connection *, struct evhttp_request *);
+
+void evhttp_write_buffer(struct evhttp_connection *,
+ void (*)(struct evhttp_connection *, void *), void *);
+
+/* response sending HTML the data in the buffer */
+void evhttp_response_code(struct evhttp_request *, int, const char *);
+void evhttp_send_page(struct evhttp_request *, struct evbuffer *);
+
+#endif /* _HTTP_H */
diff --git a/libevent/http.c b/libevent/http.c
new file mode 100644
index 00000000000..871bc2e4d0c
--- /dev/null
+++ b/libevent/http.c
@@ -0,0 +1,2830 @@
+/*
+ * Copyright (c) 2002-2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_SYS_IOCCOM_H
+#include <sys/ioccom.h>
+#endif
+
+#ifndef WIN32
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#endif
+
+#include <sys/queue.h>
+
+#ifndef HAVE_TAILQFOREACH
+#include <event-internal.h>
+#endif
+
+#ifndef WIN32
+#include <netinet/in.h>
+#include <netdb.h>
+#endif
+
+#ifdef WIN32
+#include <winsock2.h>
+#endif
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef WIN32
+#include <syslog.h>
+#endif
+#include <signal.h>
+#include <time.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+
+#undef timeout_pending
+#undef timeout_initialized
+
+#include "strlcpy-internal.h"
+#include "event.h"
+#include "evhttp.h"
+#include "evutil.h"
+#include "log.h"
+#include "http-internal.h"
+
+#ifdef WIN32
+#define strcasecmp _stricmp
+#define strncasecmp _strnicmp
+#define strdup _strdup
+#endif
+
+#ifndef HAVE_GETNAMEINFO
+#define NI_MAXSERV 32
+#define NI_MAXHOST 1025
+
+#define NI_NUMERICHOST 1
+#define NI_NUMERICSERV 2
+
+static int
+fake_getnameinfo(const struct sockaddr *sa, size_t salen, char *host,
+ size_t hostlen, char *serv, size_t servlen, int flags)
+{
+ struct sockaddr_in *sin = (struct sockaddr_in *)sa;
+
+ if (serv != NULL) {
+ char tmpserv[16];
+ evutil_snprintf(tmpserv, sizeof(tmpserv),
+ "%d", ntohs(sin->sin_port));
+ if (strlcpy(serv, tmpserv, servlen) >= servlen)
+ return (-1);
+ }
+
+ if (host != NULL) {
+ if (flags & NI_NUMERICHOST) {
+ if (strlcpy(host, inet_ntoa(sin->sin_addr),
+ hostlen) >= hostlen)
+ return (-1);
+ else
+ return (0);
+ } else {
+ struct hostent *hp;
+ hp = gethostbyaddr((char *)&sin->sin_addr,
+ sizeof(struct in_addr), AF_INET);
+ if (hp == NULL)
+ return (-2);
+
+ if (strlcpy(host, hp->h_name, hostlen) >= hostlen)
+ return (-1);
+ else
+ return (0);
+ }
+ }
+ return (0);
+}
+
+#endif
+
+#ifndef HAVE_GETADDRINFO
+struct addrinfo {
+ int ai_family;
+ int ai_socktype;
+ int ai_protocol;
+ size_t ai_addrlen;
+ struct sockaddr *ai_addr;
+ struct addrinfo *ai_next;
+};
+static int
+fake_getaddrinfo(const char *hostname, struct addrinfo *ai)
+{
+ struct hostent *he = NULL;
+ struct sockaddr_in *sa;
+ if (hostname) {
+ he = gethostbyname(hostname);
+ if (!he)
+ return (-1);
+ }
+ ai->ai_family = he ? he->h_addrtype : AF_INET;
+ ai->ai_socktype = SOCK_STREAM;
+ ai->ai_protocol = 0;
+ ai->ai_addrlen = sizeof(struct sockaddr_in);
+ if (NULL == (ai->ai_addr = malloc(ai->ai_addrlen)))
+ return (-1);
+ sa = (struct sockaddr_in*)ai->ai_addr;
+ memset(sa, 0, ai->ai_addrlen);
+ if (he) {
+ sa->sin_family = he->h_addrtype;
+ memcpy(&sa->sin_addr, he->h_addr_list[0], he->h_length);
+ } else {
+ sa->sin_family = AF_INET;
+ sa->sin_addr.s_addr = INADDR_ANY;
+ }
+ ai->ai_next = NULL;
+ return (0);
+}
+static void
+fake_freeaddrinfo(struct addrinfo *ai)
+{
+ free(ai->ai_addr);
+}
+#endif
+
+#ifndef MIN
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#endif
+
+/* wrapper for setting the base from the http server */
+#define EVHTTP_BASE_SET(x, y) do { \
+ if ((x)->base != NULL) event_base_set((x)->base, y); \
+} while (0)
+
+extern int debug;
+
+static int socket_connect(int fd, const char *address, unsigned short port);
+static int bind_socket_ai(struct addrinfo *, int reuse);
+static int bind_socket(const char *, u_short, int reuse);
+static void name_from_addr(struct sockaddr *, socklen_t, char **, char **);
+static int evhttp_associate_new_request_with_connection(
+ struct evhttp_connection *evcon);
+static void evhttp_connection_start_detectclose(
+ struct evhttp_connection *evcon);
+static void evhttp_connection_stop_detectclose(
+ struct evhttp_connection *evcon);
+static void evhttp_request_dispatch(struct evhttp_connection* evcon);
+static void evhttp_read_firstline(struct evhttp_connection *evcon,
+ struct evhttp_request *req);
+static void evhttp_read_header(struct evhttp_connection *evcon,
+ struct evhttp_request *req);
+static int evhttp_add_header_internal(struct evkeyvalq *headers,
+ const char *key, const char *value);
+static int evhttp_decode_uri_internal(const char *uri, size_t length,
+ char *ret, int always_decode_plus);
+
+void evhttp_read(int, short, void *);
+void evhttp_write(int, short, void *);
+
+#ifndef HAVE_STRSEP
+/* strsep replacement for platforms that lack it. Only works if
+ * del is one character long. */
+static char *
+strsep(char **s, const char *del)
+{
+ char *d, *tok;
+ assert(strlen(del) == 1);
+ if (!s || !*s)
+ return NULL;
+ tok = *s;
+ d = strstr(tok, del);
+ if (d) {
+ *d = '\0';
+ *s = d + 1;
+ } else
+ *s = NULL;
+ return tok;
+}
+#endif
+
+static const char *
+html_replace(char ch, char *buf)
+{
+ switch (ch) {
+ case '<':
+ return "&lt;";
+ case '>':
+ return "&gt;";
+ case '"':
+ return "&quot;";
+ case '\'':
+ return "&#039;";
+ case '&':
+ return "&amp;";
+ default:
+ break;
+ }
+
+ /* Echo the character back */
+ buf[0] = ch;
+ buf[1] = '\0';
+
+ return buf;
+}
+
+/*
+ * Replaces <, >, ", ' and & with &lt;, &gt;, &quot;,
+ * &#039; and &amp; correspondingly.
+ *
+ * The returned string needs to be freed by the caller.
+ */
+
+char *
+evhttp_htmlescape(const char *html)
+{
+ int i, new_size = 0, old_size = strlen(html);
+ char *escaped_html, *p;
+ char scratch_space[2];
+
+ for (i = 0; i < old_size; ++i)
+ new_size += strlen(html_replace(html[i], scratch_space));
+
+ p = escaped_html = malloc(new_size + 1);
+ if (escaped_html == NULL)
+ event_err(1, "%s: malloc(%d)", __func__, new_size + 1);
+ for (i = 0; i < old_size; ++i) {
+ const char *replaced = html_replace(html[i], scratch_space);
+ /* this is length checked */
+ strcpy(p, replaced);
+ p += strlen(replaced);
+ }
+
+ *p = '\0';
+
+ return (escaped_html);
+}
+
+static const char *
+evhttp_method(enum evhttp_cmd_type type)
+{
+ const char *method;
+
+ switch (type) {
+ case EVHTTP_REQ_GET:
+ method = "GET";
+ break;
+ case EVHTTP_REQ_POST:
+ method = "POST";
+ break;
+ case EVHTTP_REQ_HEAD:
+ method = "HEAD";
+ break;
+ default:
+ method = NULL;
+ break;
+ }
+
+ return (method);
+}
+
+static void
+evhttp_add_event(struct event *ev, int timeout, int default_timeout)
+{
+ if (timeout != 0) {
+ struct timeval tv;
+
+ evutil_timerclear(&tv);
+ tv.tv_sec = timeout != -1 ? timeout : default_timeout;
+ event_add(ev, &tv);
+ } else {
+ event_add(ev, NULL);
+ }
+}
+
+void
+evhttp_write_buffer(struct evhttp_connection *evcon,
+ void (*cb)(struct evhttp_connection *, void *), void *arg)
+{
+ event_debug(("%s: preparing to write buffer\n", __func__));
+
+ /* Set call back */
+ evcon->cb = cb;
+ evcon->cb_arg = arg;
+
+ /* check if the event is already pending */
+ if (event_pending(&evcon->ev, EV_WRITE|EV_TIMEOUT, NULL))
+ event_del(&evcon->ev);
+
+ event_set(&evcon->ev, evcon->fd, EV_WRITE, evhttp_write, evcon);
+ EVHTTP_BASE_SET(evcon, &evcon->ev);
+ evhttp_add_event(&evcon->ev, evcon->timeout, HTTP_WRITE_TIMEOUT);
+}
+
+static int
+evhttp_connected(struct evhttp_connection *evcon)
+{
+ switch (evcon->state) {
+ case EVCON_DISCONNECTED:
+ case EVCON_CONNECTING:
+ return (0);
+ case EVCON_IDLE:
+ case EVCON_READING_FIRSTLINE:
+ case EVCON_READING_HEADERS:
+ case EVCON_READING_BODY:
+ case EVCON_READING_TRAILER:
+ case EVCON_WRITING:
+ default:
+ return (1);
+ }
+}
+
+/*
+ * Create the headers needed for an HTTP request
+ */
+static void
+evhttp_make_header_request(struct evhttp_connection *evcon,
+ struct evhttp_request *req)
+{
+ const char *method;
+
+ evhttp_remove_header(req->output_headers, "Proxy-Connection");
+
+ /* Generate request line */
+ method = evhttp_method(req->type);
+ evbuffer_add_printf(evcon->output_buffer, "%s %s HTTP/%d.%d\r\n",
+ method, req->uri, req->major, req->minor);
+
+ /* Add the content length on a post request if missing */
+ if (req->type == EVHTTP_REQ_POST &&
+ evhttp_find_header(req->output_headers, "Content-Length") == NULL){
+ char size[12];
+ evutil_snprintf(size, sizeof(size), "%ld",
+ (long)EVBUFFER_LENGTH(req->output_buffer));
+ evhttp_add_header(req->output_headers, "Content-Length", size);
+ }
+}
+
+static int
+evhttp_is_connection_close(int flags, struct evkeyvalq* headers)
+{
+ if (flags & EVHTTP_PROXY_REQUEST) {
+ /* proxy connection */
+ const char *connection = evhttp_find_header(headers, "Proxy-Connection");
+ return (connection == NULL || strcasecmp(connection, "keep-alive") != 0);
+ } else {
+ const char *connection = evhttp_find_header(headers, "Connection");
+ return (connection != NULL && strcasecmp(connection, "close") == 0);
+ }
+}
+
+static int
+evhttp_is_connection_keepalive(struct evkeyvalq* headers)
+{
+ const char *connection = evhttp_find_header(headers, "Connection");
+ return (connection != NULL
+ && strncasecmp(connection, "keep-alive", 10) == 0);
+}
+
+static void
+evhttp_maybe_add_date_header(struct evkeyvalq *headers)
+{
+ if (evhttp_find_header(headers, "Date") == NULL) {
+ char date[50];
+#ifndef WIN32
+ struct tm cur;
+#endif
+ struct tm *cur_p;
+ time_t t = time(NULL);
+#ifdef WIN32
+ cur_p = gmtime(&t);
+#else
+ gmtime_r(&t, &cur);
+ cur_p = &cur;
+#endif
+ if (strftime(date, sizeof(date),
+ "%a, %d %b %Y %H:%M:%S GMT", cur_p) != 0) {
+ evhttp_add_header(headers, "Date", date);
+ }
+ }
+}
+
+static void
+evhttp_maybe_add_content_length_header(struct evkeyvalq *headers,
+ long content_length)
+{
+ if (evhttp_find_header(headers, "Transfer-Encoding") == NULL &&
+ evhttp_find_header(headers, "Content-Length") == NULL) {
+ char len[12];
+ evutil_snprintf(len, sizeof(len), "%ld", content_length);
+ evhttp_add_header(headers, "Content-Length", len);
+ }
+}
+
+/*
+ * Create the headers needed for an HTTP reply
+ */
+
+static void
+evhttp_make_header_response(struct evhttp_connection *evcon,
+ struct evhttp_request *req)
+{
+ int is_keepalive = evhttp_is_connection_keepalive(req->input_headers);
+ evbuffer_add_printf(evcon->output_buffer, "HTTP/%d.%d %d %s\r\n",
+ req->major, req->minor, req->response_code,
+ req->response_code_line);
+
+ if (req->major == 1) {
+ if (req->minor == 1)
+ evhttp_maybe_add_date_header(req->output_headers);
+
+ /*
+ * if the protocol is 1.0; and the connection was keep-alive
+ * we need to add a keep-alive header, too.
+ */
+ if (req->minor == 0 && is_keepalive)
+ evhttp_add_header(req->output_headers,
+ "Connection", "keep-alive");
+
+ if (req->minor == 1 || is_keepalive) {
+ /*
+ * we need to add the content length if the
+ * user did not give it, this is required for
+ * persistent connections to work.
+ */
+ evhttp_maybe_add_content_length_header(
+ req->output_headers,
+ (long)EVBUFFER_LENGTH(req->output_buffer));
+ }
+ }
+
+ /* Potentially add headers for unidentified content. */
+ if (EVBUFFER_LENGTH(req->output_buffer)) {
+ if (evhttp_find_header(req->output_headers,
+ "Content-Type") == NULL) {
+ evhttp_add_header(req->output_headers,
+ "Content-Type", "text/html; charset=ISO-8859-1");
+ }
+ }
+
+ /* if the request asked for a close, we send a close, too */
+ if (evhttp_is_connection_close(req->flags, req->input_headers)) {
+ evhttp_remove_header(req->output_headers, "Connection");
+ if (!(req->flags & EVHTTP_PROXY_REQUEST))
+ evhttp_add_header(req->output_headers, "Connection", "close");
+ evhttp_remove_header(req->output_headers, "Proxy-Connection");
+ }
+}
+
+void
+evhttp_make_header(struct evhttp_connection *evcon, struct evhttp_request *req)
+{
+ struct evkeyval *header;
+
+ /*
+ * Depending if this is a HTTP request or response, we might need to
+ * add some new headers or remove existing headers.
+ */
+ if (req->kind == EVHTTP_REQUEST) {
+ evhttp_make_header_request(evcon, req);
+ } else {
+ evhttp_make_header_response(evcon, req);
+ }
+
+ TAILQ_FOREACH(header, req->output_headers, next) {
+ evbuffer_add_printf(evcon->output_buffer, "%s: %s\r\n",
+ header->key, header->value);
+ }
+ evbuffer_add(evcon->output_buffer, "\r\n", 2);
+
+ if (EVBUFFER_LENGTH(req->output_buffer) > 0) {
+ /*
+ * For a request, we add the POST data, for a reply, this
+ * is the regular data.
+ */
+ evbuffer_add_buffer(evcon->output_buffer, req->output_buffer);
+ }
+}
+
+/* Separated host, port and file from URI */
+
+int
+evhttp_hostportfile(char *url, char **phost, u_short *pport, char **pfile)
+{
+ /* XXX not threadsafe. */
+ static char host[1024];
+ static char file[1024];
+ char *p;
+ const char *p2;
+ int len;
+ u_short port;
+
+ len = strlen(HTTP_PREFIX);
+ if (strncasecmp(url, HTTP_PREFIX, len))
+ return (-1);
+
+ url += len;
+
+ /* We might overrun */
+ if (strlcpy(host, url, sizeof (host)) >= sizeof(host))
+ return (-1);
+
+ p = strchr(host, '/');
+ if (p != NULL) {
+ *p = '\0';
+ p2 = p + 1;
+ } else
+ p2 = NULL;
+
+ if (pfile != NULL) {
+ /* Generate request file */
+ if (p2 == NULL)
+ p2 = "";
+ evutil_snprintf(file, sizeof(file), "/%s", p2);
+ }
+
+ p = strchr(host, ':');
+ if (p != NULL) {
+ *p = '\0';
+ port = atoi(p + 1);
+
+ if (port == 0)
+ return (-1);
+ } else
+ port = HTTP_DEFAULTPORT;
+
+ if (phost != NULL)
+ *phost = host;
+ if (pport != NULL)
+ *pport = port;
+ if (pfile != NULL)
+ *pfile = file;
+
+ return (0);
+}
+
+static int
+evhttp_connection_incoming_fail(struct evhttp_request *req,
+ enum evhttp_connection_error error)
+{
+ switch (error) {
+ case EVCON_HTTP_TIMEOUT:
+ case EVCON_HTTP_EOF:
+ /*
+ * these are cases in which we probably should just
+ * close the connection and not send a reply. this
+ * case may happen when a browser keeps a persistent
+ * connection open and we timeout on the read.
+ */
+ return (-1);
+ case EVCON_HTTP_INVALID_HEADER:
+ default: /* xxx: probably should just error on default */
+ /* the callback looks at the uri to determine errors */
+ if (req->uri) {
+ free(req->uri);
+ req->uri = NULL;
+ }
+
+ /*
+ * the callback needs to send a reply, once the reply has
+ * been send, the connection should get freed.
+ */
+ (*req->cb)(req, req->cb_arg);
+ }
+
+ return (0);
+}
+
+void
+evhttp_connection_fail(struct evhttp_connection *evcon,
+ enum evhttp_connection_error error)
+{
+ struct evhttp_request* req = TAILQ_FIRST(&evcon->requests);
+ void (*cb)(struct evhttp_request *, void *);
+ void *cb_arg;
+ assert(req != NULL);
+
+ if (evcon->flags & EVHTTP_CON_INCOMING) {
+ /*
+ * for incoming requests, there are two different
+ * failure cases. it's either a network level error
+ * or an http layer error. for problems on the network
+ * layer like timeouts we just drop the connections.
+ * For HTTP problems, we might have to send back a
+ * reply before the connection can be freed.
+ */
+ if (evhttp_connection_incoming_fail(req, error) == -1)
+ evhttp_connection_free(evcon);
+ return;
+ }
+
+ /* save the callback for later; the cb might free our object */
+ cb = req->cb;
+ cb_arg = req->cb_arg;
+
+ TAILQ_REMOVE(&evcon->requests, req, next);
+ evhttp_request_free(req);
+
+ /* xxx: maybe we should fail all requests??? */
+
+ /* reset the connection */
+ evhttp_connection_reset(evcon);
+
+ /* We are trying the next request that was queued on us */
+ if (TAILQ_FIRST(&evcon->requests) != NULL)
+ evhttp_connection_connect(evcon);
+
+ /* inform the user */
+ if (cb != NULL)
+ (*cb)(NULL, cb_arg);
+}
+
+void
+evhttp_write(int fd, short what, void *arg)
+{
+ struct evhttp_connection *evcon = arg;
+ int n;
+
+ if (what == EV_TIMEOUT) {
+ evhttp_connection_fail(evcon, EVCON_HTTP_TIMEOUT);
+ return;
+ }
+
+ n = evbuffer_write(evcon->output_buffer, fd);
+ if (n == -1) {
+ event_debug(("%s: evbuffer_write", __func__));
+ evhttp_connection_fail(evcon, EVCON_HTTP_EOF);
+ return;
+ }
+
+ if (n == 0) {
+ event_debug(("%s: write nothing", __func__));
+ evhttp_connection_fail(evcon, EVCON_HTTP_EOF);
+ return;
+ }
+
+ if (EVBUFFER_LENGTH(evcon->output_buffer) != 0) {
+ evhttp_add_event(&evcon->ev,
+ evcon->timeout, HTTP_WRITE_TIMEOUT);
+ return;
+ }
+
+ /* Activate our call back */
+ if (evcon->cb != NULL)
+ (*evcon->cb)(evcon, evcon->cb_arg);
+}
+
+/**
+ * Advance the connection state.
+ * - If this is an outgoing connection, we've just processed the response;
+ * idle or close the connection.
+ * - If this is an incoming connection, we've just processed the request;
+ * respond.
+ */
+static void
+evhttp_connection_done(struct evhttp_connection *evcon)
+{
+ struct evhttp_request *req = TAILQ_FIRST(&evcon->requests);
+ int con_outgoing = evcon->flags & EVHTTP_CON_OUTGOING;
+
+ if (con_outgoing) {
+ /* idle or close the connection */
+ int need_close;
+ TAILQ_REMOVE(&evcon->requests, req, next);
+ req->evcon = NULL;
+
+ evcon->state = EVCON_IDLE;
+
+ need_close =
+ evhttp_is_connection_close(req->flags, req->input_headers)||
+ evhttp_is_connection_close(req->flags, req->output_headers);
+
+ /* check if we got asked to close the connection */
+ if (need_close)
+ evhttp_connection_reset(evcon);
+
+ if (TAILQ_FIRST(&evcon->requests) != NULL) {
+ /*
+ * We have more requests; reset the connection
+ * and deal with the next request.
+ */
+ if (!evhttp_connected(evcon))
+ evhttp_connection_connect(evcon);
+ else
+ evhttp_request_dispatch(evcon);
+ } else if (!need_close) {
+ /*
+ * The connection is going to be persistent, but we
+ * need to detect if the other side closes it.
+ */
+ evhttp_connection_start_detectclose(evcon);
+ }
+ } else {
+ /*
+ * incoming connection - we need to leave the request on the
+ * connection so that we can reply to it.
+ */
+ evcon->state = EVCON_WRITING;
+ }
+
+ /* notify the user of the request */
+ (*req->cb)(req, req->cb_arg);
+
+ /* if this was an outgoing request, we own and it's done. so free it */
+ if (con_outgoing) {
+ evhttp_request_free(req);
+ }
+}
+
+/*
+ * Handles reading from a chunked request.
+ * return ALL_DATA_READ:
+ * all data has been read
+ * return MORE_DATA_EXPECTED:
+ * more data is expected
+ * return DATA_CORRUPTED:
+ * data is corrupted
+ * return REQUEST_CANCLED:
+ * request was canceled by the user calling evhttp_cancel_request
+ */
+
+static enum message_read_status
+evhttp_handle_chunked_read(struct evhttp_request *req, struct evbuffer *buf)
+{
+ int len;
+
+ while ((len = EVBUFFER_LENGTH(buf)) > 0) {
+ if (req->ntoread < 0) {
+ /* Read chunk size */
+ ev_int64_t ntoread;
+ char *p = evbuffer_readline(buf);
+ char *endp;
+ int error;
+ if (p == NULL)
+ break;
+ /* the last chunk is on a new line? */
+ if (strlen(p) == 0) {
+ free(p);
+ continue;
+ }
+ ntoread = evutil_strtoll(p, &endp, 16);
+ error = (*p == '\0' ||
+ (*endp != '\0' && *endp != ' ') ||
+ ntoread < 0);
+ free(p);
+ if (error) {
+ /* could not get chunk size */
+ return (DATA_CORRUPTED);
+ }
+ req->ntoread = ntoread;
+ if (req->ntoread == 0) {
+ /* Last chunk */
+ return (ALL_DATA_READ);
+ }
+ continue;
+ }
+
+ /* don't have enough to complete a chunk; wait for more */
+ if (len < req->ntoread)
+ return (MORE_DATA_EXPECTED);
+
+ /* Completed chunk */
+ evbuffer_add(req->input_buffer,
+ EVBUFFER_DATA(buf), (size_t)req->ntoread);
+ evbuffer_drain(buf, (size_t)req->ntoread);
+ req->ntoread = -1;
+ if (req->chunk_cb != NULL) {
+ (*req->chunk_cb)(req, req->cb_arg);
+ evbuffer_drain(req->input_buffer,
+ EVBUFFER_LENGTH(req->input_buffer));
+ }
+ }
+
+ return (MORE_DATA_EXPECTED);
+}
+
+static void
+evhttp_read_trailer(struct evhttp_connection *evcon, struct evhttp_request *req)
+{
+ struct evbuffer *buf = evcon->input_buffer;
+
+ switch (evhttp_parse_headers(req, buf)) {
+ case DATA_CORRUPTED:
+ evhttp_connection_fail(evcon, EVCON_HTTP_INVALID_HEADER);
+ break;
+ case ALL_DATA_READ:
+ event_del(&evcon->ev);
+ evhttp_connection_done(evcon);
+ break;
+ case MORE_DATA_EXPECTED:
+ default:
+ evhttp_add_event(&evcon->ev, evcon->timeout,
+ HTTP_READ_TIMEOUT);
+ break;
+ }
+}
+
+static void
+evhttp_read_body(struct evhttp_connection *evcon, struct evhttp_request *req)
+{
+ struct evbuffer *buf = evcon->input_buffer;
+
+ if (req->chunked) {
+ switch (evhttp_handle_chunked_read(req, buf)) {
+ case ALL_DATA_READ:
+ /* finished last chunk */
+ evcon->state = EVCON_READING_TRAILER;
+ evhttp_read_trailer(evcon, req);
+ return;
+ case DATA_CORRUPTED:
+ /* corrupted data */
+ evhttp_connection_fail(evcon,
+ EVCON_HTTP_INVALID_HEADER);
+ return;
+ case REQUEST_CANCELED:
+ /* request canceled */
+ evhttp_request_free(req);
+ return;
+ case MORE_DATA_EXPECTED:
+ default:
+ break;
+ }
+ } else if (req->ntoread < 0) {
+ /* Read until connection close. */
+ evbuffer_add_buffer(req->input_buffer, buf);
+ } else if (EVBUFFER_LENGTH(buf) >= req->ntoread) {
+ /* Completed content length */
+ evbuffer_add(req->input_buffer, EVBUFFER_DATA(buf),
+ (size_t)req->ntoread);
+ evbuffer_drain(buf, (size_t)req->ntoread);
+ req->ntoread = 0;
+ evhttp_connection_done(evcon);
+ return;
+ }
+ /* Read more! */
+ event_set(&evcon->ev, evcon->fd, EV_READ, evhttp_read, evcon);
+ EVHTTP_BASE_SET(evcon, &evcon->ev);
+ evhttp_add_event(&evcon->ev, evcon->timeout, HTTP_READ_TIMEOUT);
+}
+
+/*
+ * Reads data into a buffer structure until no more data
+ * can be read on the file descriptor or we have read all
+ * the data that we wanted to read.
+ * Execute callback when done.
+ */
+
+void
+evhttp_read(int fd, short what, void *arg)
+{
+ struct evhttp_connection *evcon = arg;
+ struct evhttp_request *req = TAILQ_FIRST(&evcon->requests);
+ struct evbuffer *buf = evcon->input_buffer;
+ int n, len;
+
+ if (what == EV_TIMEOUT) {
+ evhttp_connection_fail(evcon, EVCON_HTTP_TIMEOUT);
+ return;
+ }
+ n = evbuffer_read(buf, fd, -1);
+ len = EVBUFFER_LENGTH(buf);
+ event_debug(("%s: got %d on %d\n", __func__, n, fd));
+
+ if (n == -1) {
+ if (errno != EINTR && errno != EAGAIN) {
+ event_debug(("%s: evbuffer_read", __func__));
+ evhttp_connection_fail(evcon, EVCON_HTTP_EOF);
+ } else {
+ evhttp_add_event(&evcon->ev, evcon->timeout,
+ HTTP_READ_TIMEOUT);
+ }
+ return;
+ } else if (n == 0) {
+ /* Connection closed */
+ evhttp_connection_done(evcon);
+ return;
+ }
+
+ switch (evcon->state) {
+ case EVCON_READING_FIRSTLINE:
+ evhttp_read_firstline(evcon, req);
+ break;
+ case EVCON_READING_HEADERS:
+ evhttp_read_header(evcon, req);
+ break;
+ case EVCON_READING_BODY:
+ evhttp_read_body(evcon, req);
+ break;
+ case EVCON_READING_TRAILER:
+ evhttp_read_trailer(evcon, req);
+ break;
+ case EVCON_DISCONNECTED:
+ case EVCON_CONNECTING:
+ case EVCON_IDLE:
+ case EVCON_WRITING:
+ default:
+ event_errx(1, "%s: illegal connection state %d",
+ __func__, evcon->state);
+ }
+}
+
+static void
+evhttp_write_connectioncb(struct evhttp_connection *evcon, void *arg)
+{
+ /* This is after writing the request to the server */
+ struct evhttp_request *req = TAILQ_FIRST(&evcon->requests);
+ assert(req != NULL);
+
+ assert(evcon->state == EVCON_WRITING);
+
+ /* We are done writing our header and are now expecting the response */
+ req->kind = EVHTTP_RESPONSE;
+
+ evhttp_start_read(evcon);
+}
+
+/*
+ * Clean up a connection object
+ */
+
+void
+evhttp_connection_free(struct evhttp_connection *evcon)
+{
+ struct evhttp_request *req;
+
+ /* notify interested parties that this connection is going down */
+ if (evcon->fd != -1) {
+ if (evhttp_connected(evcon) && evcon->closecb != NULL)
+ (*evcon->closecb)(evcon, evcon->closecb_arg);
+ }
+
+ /* remove all requests that might be queued on this connection */
+ while ((req = TAILQ_FIRST(&evcon->requests)) != NULL) {
+ TAILQ_REMOVE(&evcon->requests, req, next);
+ evhttp_request_free(req);
+ }
+
+ if (evcon->http_server != NULL) {
+ struct evhttp *http = evcon->http_server;
+ TAILQ_REMOVE(&http->connections, evcon, next);
+ }
+
+ if (event_initialized(&evcon->close_ev))
+ event_del(&evcon->close_ev);
+
+ if (event_initialized(&evcon->ev))
+ event_del(&evcon->ev);
+
+ if (evcon->fd != -1)
+ EVUTIL_CLOSESOCKET(evcon->fd);
+
+ if (evcon->bind_address != NULL)
+ free(evcon->bind_address);
+
+ if (evcon->address != NULL)
+ free(evcon->address);
+
+ if (evcon->input_buffer != NULL)
+ evbuffer_free(evcon->input_buffer);
+
+ if (evcon->output_buffer != NULL)
+ evbuffer_free(evcon->output_buffer);
+
+ free(evcon);
+}
+
+void
+evhttp_connection_set_local_address(struct evhttp_connection *evcon,
+ const char *address)
+{
+ assert(evcon->state == EVCON_DISCONNECTED);
+ if (evcon->bind_address)
+ free(evcon->bind_address);
+ if ((evcon->bind_address = strdup(address)) == NULL)
+ event_err(1, "%s: strdup", __func__);
+}
+
+void
+evhttp_connection_set_local_port(struct evhttp_connection *evcon,
+ unsigned short port)
+{
+ assert(evcon->state == EVCON_DISCONNECTED);
+ evcon->bind_port = port;
+}
+
+static void
+evhttp_request_dispatch(struct evhttp_connection* evcon)
+{
+ struct evhttp_request *req = TAILQ_FIRST(&evcon->requests);
+
+ /* this should not usually happy but it's possible */
+ if (req == NULL)
+ return;
+
+ /* delete possible close detection events */
+ evhttp_connection_stop_detectclose(evcon);
+
+ /* we assume that the connection is connected already */
+ assert(evcon->state == EVCON_IDLE);
+
+ evcon->state = EVCON_WRITING;
+
+ /* Create the header from the store arguments */
+ evhttp_make_header(evcon, req);
+
+ evhttp_write_buffer(evcon, evhttp_write_connectioncb, NULL);
+}
+
+/* Reset our connection state */
+void
+evhttp_connection_reset(struct evhttp_connection *evcon)
+{
+ if (event_initialized(&evcon->ev))
+ event_del(&evcon->ev);
+
+ if (evcon->fd != -1) {
+ /* inform interested parties about connection close */
+ if (evhttp_connected(evcon) && evcon->closecb != NULL)
+ (*evcon->closecb)(evcon, evcon->closecb_arg);
+
+ EVUTIL_CLOSESOCKET(evcon->fd);
+ evcon->fd = -1;
+ }
+ evcon->state = EVCON_DISCONNECTED;
+
+ evbuffer_drain(evcon->input_buffer,
+ EVBUFFER_LENGTH(evcon->input_buffer));
+ evbuffer_drain(evcon->output_buffer,
+ EVBUFFER_LENGTH(evcon->output_buffer));
+}
+
+static void
+evhttp_detect_close_cb(int fd, short what, void *arg)
+{
+ struct evhttp_connection *evcon = arg;
+ evhttp_connection_reset(evcon);
+}
+
+static void
+evhttp_connection_start_detectclose(struct evhttp_connection *evcon)
+{
+ evcon->flags |= EVHTTP_CON_CLOSEDETECT;
+
+ if (event_initialized(&evcon->close_ev))
+ event_del(&evcon->close_ev);
+ event_set(&evcon->close_ev, evcon->fd, EV_READ,
+ evhttp_detect_close_cb, evcon);
+ EVHTTP_BASE_SET(evcon, &evcon->close_ev);
+ event_add(&evcon->close_ev, NULL);
+}
+
+static void
+evhttp_connection_stop_detectclose(struct evhttp_connection *evcon)
+{
+ evcon->flags &= ~EVHTTP_CON_CLOSEDETECT;
+ event_del(&evcon->close_ev);
+}
+
+static void
+evhttp_connection_retry(int fd, short what, void *arg)
+{
+ struct evhttp_connection *evcon = arg;
+
+ evcon->state = EVCON_DISCONNECTED;
+ evhttp_connection_connect(evcon);
+}
+
+/*
+ * Call back for asynchronous connection attempt.
+ */
+
+static void
+evhttp_connectioncb(int fd, short what, void *arg)
+{
+ struct evhttp_connection *evcon = arg;
+ int error;
+ socklen_t errsz = sizeof(error);
+
+ if (what == EV_TIMEOUT) {
+ event_debug(("%s: connection timeout for \"%s:%d\" on %d",
+ __func__, evcon->address, evcon->port, evcon->fd));
+ goto cleanup;
+ }
+
+ /* Check if the connection completed */
+ if (getsockopt(evcon->fd, SOL_SOCKET, SO_ERROR, (void*)&error,
+ &errsz) == -1) {
+ event_debug(("%s: getsockopt for \"%s:%d\" on %d",
+ __func__, evcon->address, evcon->port, evcon->fd));
+ goto cleanup;
+ }
+
+ if (error) {
+ event_debug(("%s: connect failed for \"%s:%d\" on %d: %s",
+ __func__, evcon->address, evcon->port, evcon->fd,
+ strerror(error)));
+ goto cleanup;
+ }
+
+ /* We are connected to the server now */
+ event_debug(("%s: connected to \"%s:%d\" on %d\n",
+ __func__, evcon->address, evcon->port, evcon->fd));
+
+ /* Reset the retry count as we were successful in connecting */
+ evcon->retry_cnt = 0;
+ evcon->state = EVCON_IDLE;
+
+ /* try to start requests that have queued up on this connection */
+ evhttp_request_dispatch(evcon);
+ return;
+
+ cleanup:
+ if (evcon->retry_max < 0 || evcon->retry_cnt < evcon->retry_max) {
+ evtimer_set(&evcon->ev, evhttp_connection_retry, evcon);
+ EVHTTP_BASE_SET(evcon, &evcon->ev);
+ evhttp_add_event(&evcon->ev, MIN(3600, 2 << evcon->retry_cnt),
+ HTTP_CONNECT_TIMEOUT);
+ evcon->retry_cnt++;
+ return;
+ }
+ evhttp_connection_reset(evcon);
+
+ /* for now, we just signal all requests by executing their callbacks */
+ while (TAILQ_FIRST(&evcon->requests) != NULL) {
+ struct evhttp_request *request = TAILQ_FIRST(&evcon->requests);
+ TAILQ_REMOVE(&evcon->requests, request, next);
+ request->evcon = NULL;
+
+ /* we might want to set an error here */
+ request->cb(request, request->cb_arg);
+ evhttp_request_free(request);
+ }
+}
+
+/*
+ * Check if we got a valid response code.
+ */
+
+static int
+evhttp_valid_response_code(int code)
+{
+ if (code == 0)
+ return (0);
+
+ return (1);
+}
+
+/* Parses the status line of a web server */
+
+static int
+evhttp_parse_response_line(struct evhttp_request *req, char *line)
+{
+ char *protocol;
+ char *number;
+ char *readable;
+
+ protocol = strsep(&line, " ");
+ if (line == NULL)
+ return (-1);
+ number = strsep(&line, " ");
+ if (line == NULL)
+ return (-1);
+ readable = line;
+
+ if (strcmp(protocol, "HTTP/1.0") == 0) {
+ req->major = 1;
+ req->minor = 0;
+ } else if (strcmp(protocol, "HTTP/1.1") == 0) {
+ req->major = 1;
+ req->minor = 1;
+ } else {
+ event_debug(("%s: bad protocol \"%s\"",
+ __func__, protocol));
+ return (-1);
+ }
+
+ req->response_code = atoi(number);
+ if (!evhttp_valid_response_code(req->response_code)) {
+ event_debug(("%s: bad response code \"%s\"",
+ __func__, number));
+ return (-1);
+ }
+
+ if ((req->response_code_line = strdup(readable)) == NULL)
+ event_err(1, "%s: strdup", __func__);
+
+ return (0);
+}
+
+/* Parse the first line of a HTTP request */
+
+static int
+evhttp_parse_request_line(struct evhttp_request *req, char *line)
+{
+ char *method;
+ char *uri;
+ char *version;
+
+ /* Parse the request line */
+ method = strsep(&line, " ");
+ if (line == NULL)
+ return (-1);
+ uri = strsep(&line, " ");
+ if (line == NULL)
+ return (-1);
+ version = strsep(&line, " ");
+ if (line != NULL)
+ return (-1);
+
+ /* First line */
+ if (strcmp(method, "GET") == 0) {
+ req->type = EVHTTP_REQ_GET;
+ } else if (strcmp(method, "POST") == 0) {
+ req->type = EVHTTP_REQ_POST;
+ } else if (strcmp(method, "HEAD") == 0) {
+ req->type = EVHTTP_REQ_HEAD;
+ } else {
+ event_debug(("%s: bad method %s on request %p from %s",
+ __func__, method, req, req->remote_host));
+ return (-1);
+ }
+
+ if (strcmp(version, "HTTP/1.0") == 0) {
+ req->major = 1;
+ req->minor = 0;
+ } else if (strcmp(version, "HTTP/1.1") == 0) {
+ req->major = 1;
+ req->minor = 1;
+ } else {
+ event_debug(("%s: bad version %s on request %p from %s",
+ __func__, version, req, req->remote_host));
+ return (-1);
+ }
+
+ if ((req->uri = strdup(uri)) == NULL) {
+ event_debug(("%s: evhttp_decode_uri", __func__));
+ return (-1);
+ }
+
+ /* determine if it's a proxy request */
+ if (strlen(req->uri) > 0 && req->uri[0] != '/')
+ req->flags |= EVHTTP_PROXY_REQUEST;
+
+ return (0);
+}
+
+const char *
+evhttp_find_header(const struct evkeyvalq *headers, const char *key)
+{
+ struct evkeyval *header;
+
+ TAILQ_FOREACH(header, headers, next) {
+ if (strcasecmp(header->key, key) == 0)
+ return (header->value);
+ }
+
+ return (NULL);
+}
+
+void
+evhttp_clear_headers(struct evkeyvalq *headers)
+{
+ struct evkeyval *header;
+
+ for (header = TAILQ_FIRST(headers);
+ header != NULL;
+ header = TAILQ_FIRST(headers)) {
+ TAILQ_REMOVE(headers, header, next);
+ free(header->key);
+ free(header->value);
+ free(header);
+ }
+}
+
+/*
+ * Returns 0, if the header was successfully removed.
+ * Returns -1, if the header could not be found.
+ */
+
+int
+evhttp_remove_header(struct evkeyvalq *headers, const char *key)
+{
+ struct evkeyval *header;
+
+ TAILQ_FOREACH(header, headers, next) {
+ if (strcasecmp(header->key, key) == 0)
+ break;
+ }
+
+ if (header == NULL)
+ return (-1);
+
+ /* Free and remove the header that we found */
+ TAILQ_REMOVE(headers, header, next);
+ free(header->key);
+ free(header->value);
+ free(header);
+
+ return (0);
+}
+
+static int
+evhttp_header_is_valid_value(const char *value)
+{
+ const char *p = value;
+
+ while ((p = strpbrk(p, "\r\n")) != NULL) {
+ /* we really expect only one new line */
+ p += strspn(p, "\r\n");
+ /* we expect a space or tab for continuation */
+ if (*p != ' ' && *p != '\t')
+ return (0);
+ }
+ return (1);
+}
+
+int
+evhttp_add_header(struct evkeyvalq *headers,
+ const char *key, const char *value)
+{
+ event_debug(("%s: key: %s val: %s\n", __func__, key, value));
+
+ if (strchr(key, '\r') != NULL || strchr(key, '\n') != NULL) {
+ /* drop illegal headers */
+ event_debug(("%s: dropping illegal header key\n", __func__));
+ return (-1);
+ }
+
+ if (!evhttp_header_is_valid_value(value)) {
+ event_debug(("%s: dropping illegal header value\n", __func__));
+ return (-1);
+ }
+
+ return (evhttp_add_header_internal(headers, key, value));
+}
+
+static int
+evhttp_add_header_internal(struct evkeyvalq *headers,
+ const char *key, const char *value)
+{
+ struct evkeyval *header = calloc(1, sizeof(struct evkeyval));
+ if (header == NULL) {
+ event_warn("%s: calloc", __func__);
+ return (-1);
+ }
+ if ((header->key = strdup(key)) == NULL) {
+ free(header);
+ event_warn("%s: strdup", __func__);
+ return (-1);
+ }
+ if ((header->value = strdup(value)) == NULL) {
+ free(header->key);
+ free(header);
+ event_warn("%s: strdup", __func__);
+ return (-1);
+ }
+
+ TAILQ_INSERT_TAIL(headers, header, next);
+
+ return (0);
+}
+
+/*
+ * Parses header lines from a request or a response into the specified
+ * request object given an event buffer.
+ *
+ * Returns
+ * DATA_CORRUPTED on error
+ * MORE_DATA_EXPECTED when we need to read more headers
+ * ALL_DATA_READ when all headers have been read.
+ */
+
+enum message_read_status
+evhttp_parse_firstline(struct evhttp_request *req, struct evbuffer *buffer)
+{
+ char *line;
+ enum message_read_status status = ALL_DATA_READ;
+
+ line = evbuffer_readline(buffer);
+ if (line == NULL)
+ return (MORE_DATA_EXPECTED);
+
+ switch (req->kind) {
+ case EVHTTP_REQUEST:
+ if (evhttp_parse_request_line(req, line) == -1)
+ status = DATA_CORRUPTED;
+ break;
+ case EVHTTP_RESPONSE:
+ if (evhttp_parse_response_line(req, line) == -1)
+ status = DATA_CORRUPTED;
+ break;
+ default:
+ status = DATA_CORRUPTED;
+ }
+
+ free(line);
+ return (status);
+}
+
+static int
+evhttp_append_to_last_header(struct evkeyvalq *headers, const char *line)
+{
+ struct evkeyval *header = TAILQ_LAST(headers, evkeyvalq);
+ char *newval;
+ size_t old_len, line_len;
+
+ if (header == NULL)
+ return (-1);
+
+ old_len = strlen(header->value);
+ line_len = strlen(line);
+
+ newval = realloc(header->value, old_len + line_len + 1);
+ if (newval == NULL)
+ return (-1);
+
+ memcpy(newval + old_len, line, line_len + 1);
+ header->value = newval;
+
+ return (0);
+}
+
+enum message_read_status
+evhttp_parse_headers(struct evhttp_request *req, struct evbuffer* buffer)
+{
+ char *line;
+ enum message_read_status status = MORE_DATA_EXPECTED;
+
+ struct evkeyvalq* headers = req->input_headers;
+ while ((line = evbuffer_readline(buffer))
+ != NULL) {
+ char *skey, *svalue;
+
+ if (*line == '\0') { /* Last header - Done */
+ status = ALL_DATA_READ;
+ free(line);
+ break;
+ }
+
+ /* Check if this is a continuation line */
+ if (*line == ' ' || *line == '\t') {
+ if (evhttp_append_to_last_header(headers, line) == -1)
+ goto error;
+ free(line);
+ continue;
+ }
+
+ /* Processing of header lines */
+ svalue = line;
+ skey = strsep(&svalue, ":");
+ if (svalue == NULL)
+ goto error;
+
+ svalue += strspn(svalue, " ");
+
+ if (evhttp_add_header(headers, skey, svalue) == -1)
+ goto error;
+
+ free(line);
+ }
+
+ return (status);
+
+ error:
+ free(line);
+ return (DATA_CORRUPTED);
+}
+
+static int
+evhttp_get_body_length(struct evhttp_request *req)
+{
+ struct evkeyvalq *headers = req->input_headers;
+ const char *content_length;
+ const char *connection;
+
+ content_length = evhttp_find_header(headers, "Content-Length");
+ connection = evhttp_find_header(headers, "Connection");
+
+ if (content_length == NULL && connection == NULL)
+ req->ntoread = -1;
+ else if (content_length == NULL &&
+ strcasecmp(connection, "Close") != 0) {
+ /* Bad combination, we don't know when it will end */
+ event_warnx("%s: we got no content length, but the "
+ "server wants to keep the connection open: %s.",
+ __func__, connection);
+ return (-1);
+ } else if (content_length == NULL) {
+ req->ntoread = -1;
+ } else {
+ char *endp;
+ ev_int64_t ntoread = evutil_strtoll(content_length, &endp, 10);
+ if (*content_length == '\0' || *endp != '\0' || ntoread < 0) {
+ event_debug(("%s: illegal content length: %s",
+ __func__, content_length));
+ return (-1);
+ }
+ req->ntoread = ntoread;
+ }
+
+ event_debug(("%s: bytes to read: %lld (in buffer %ld)\n",
+ __func__, req->ntoread,
+ EVBUFFER_LENGTH(req->evcon->input_buffer)));
+
+ return (0);
+}
+
+static void
+evhttp_get_body(struct evhttp_connection *evcon, struct evhttp_request *req)
+{
+ const char *xfer_enc;
+
+ /* If this is a request without a body, then we are done */
+ if (req->kind == EVHTTP_REQUEST && req->type != EVHTTP_REQ_POST) {
+ evhttp_connection_done(evcon);
+ return;
+ }
+ evcon->state = EVCON_READING_BODY;
+ xfer_enc = evhttp_find_header(req->input_headers, "Transfer-Encoding");
+ if (xfer_enc != NULL && strcasecmp(xfer_enc, "chunked") == 0) {
+ req->chunked = 1;
+ req->ntoread = -1;
+ } else {
+ if (evhttp_get_body_length(req) == -1) {
+ evhttp_connection_fail(evcon,
+ EVCON_HTTP_INVALID_HEADER);
+ return;
+ }
+ }
+ evhttp_read_body(evcon, req);
+}
+
+static void
+evhttp_read_firstline(struct evhttp_connection *evcon,
+ struct evhttp_request *req)
+{
+ enum message_read_status res;
+
+ res = evhttp_parse_firstline(req, evcon->input_buffer);
+ if (res == DATA_CORRUPTED) {
+ /* Error while reading, terminate */
+ event_debug(("%s: bad header lines on %d\n",
+ __func__, evcon->fd));
+ evhttp_connection_fail(evcon, EVCON_HTTP_INVALID_HEADER);
+ return;
+ } else if (res == MORE_DATA_EXPECTED) {
+ /* Need more header lines */
+ evhttp_add_event(&evcon->ev,
+ evcon->timeout, HTTP_READ_TIMEOUT);
+ return;
+ }
+
+ evcon->state = EVCON_READING_HEADERS;
+ evhttp_read_header(evcon, req);
+}
+
+static void
+evhttp_read_header(struct evhttp_connection *evcon, struct evhttp_request *req)
+{
+ enum message_read_status res;
+ int fd = evcon->fd;
+
+ res = evhttp_parse_headers(req, evcon->input_buffer);
+ if (res == DATA_CORRUPTED) {
+ /* Error while reading, terminate */
+ event_debug(("%s: bad header lines on %d\n", __func__, fd));
+ evhttp_connection_fail(evcon, EVCON_HTTP_INVALID_HEADER);
+ return;
+ } else if (res == MORE_DATA_EXPECTED) {
+ /* Need more header lines */
+ evhttp_add_event(&evcon->ev,
+ evcon->timeout, HTTP_READ_TIMEOUT);
+ return;
+ }
+
+ /* Done reading headers, do the real work */
+ switch (req->kind) {
+ case EVHTTP_REQUEST:
+ event_debug(("%s: checking for post data on %d\n",
+ __func__, fd));
+ evhttp_get_body(evcon, req);
+ break;
+
+ case EVHTTP_RESPONSE:
+ if (req->response_code == HTTP_NOCONTENT ||
+ req->response_code == HTTP_NOTMODIFIED ||
+ (req->response_code >= 100 && req->response_code < 200)) {
+ event_debug(("%s: skipping body for code %d\n",
+ __func__, req->response_code));
+ evhttp_connection_done(evcon);
+ } else {
+ event_debug(("%s: start of read body for %s on %d\n",
+ __func__, req->remote_host, fd));
+ evhttp_get_body(evcon, req);
+ }
+ break;
+
+ default:
+ event_warnx("%s: bad header on %d", __func__, fd);
+ evhttp_connection_fail(evcon, EVCON_HTTP_INVALID_HEADER);
+ break;
+ }
+}
+
+/*
+ * Creates a TCP connection to the specified port and executes a callback
+ * when finished. Failure or sucess is indicate by the passed connection
+ * object.
+ *
+ * Although this interface accepts a hostname, it is intended to take
+ * only numeric hostnames so that non-blocking DNS resolution can
+ * happen elsewhere.
+ */
+
+struct evhttp_connection *
+evhttp_connection_new(const char *address, unsigned short port)
+{
+ struct evhttp_connection *evcon = NULL;
+
+ event_debug(("Attempting connection to %s:%d\n", address, port));
+
+ if ((evcon = calloc(1, sizeof(struct evhttp_connection))) == NULL) {
+ event_warn("%s: calloc failed", __func__);
+ goto error;
+ }
+
+ evcon->fd = -1;
+ evcon->port = port;
+
+ evcon->timeout = -1;
+ evcon->retry_cnt = evcon->retry_max = 0;
+
+ if ((evcon->address = strdup(address)) == NULL) {
+ event_warn("%s: strdup failed", __func__);
+ goto error;
+ }
+
+ if ((evcon->input_buffer = evbuffer_new()) == NULL) {
+ event_warn("%s: evbuffer_new failed", __func__);
+ goto error;
+ }
+
+ if ((evcon->output_buffer = evbuffer_new()) == NULL) {
+ event_warn("%s: evbuffer_new failed", __func__);
+ goto error;
+ }
+
+ evcon->state = EVCON_DISCONNECTED;
+ TAILQ_INIT(&evcon->requests);
+
+ return (evcon);
+
+ error:
+ if (evcon != NULL)
+ evhttp_connection_free(evcon);
+ return (NULL);
+}
+
+void evhttp_connection_set_base(struct evhttp_connection *evcon,
+ struct event_base *base)
+{
+ assert(evcon->base == NULL);
+ assert(evcon->state == EVCON_DISCONNECTED);
+ evcon->base = base;
+}
+
+void
+evhttp_connection_set_timeout(struct evhttp_connection *evcon,
+ int timeout_in_secs)
+{
+ evcon->timeout = timeout_in_secs;
+}
+
+void
+evhttp_connection_set_retries(struct evhttp_connection *evcon,
+ int retry_max)
+{
+ evcon->retry_max = retry_max;
+}
+
+void
+evhttp_connection_set_closecb(struct evhttp_connection *evcon,
+ void (*cb)(struct evhttp_connection *, void *), void *cbarg)
+{
+ evcon->closecb = cb;
+ evcon->closecb_arg = cbarg;
+}
+
+void
+evhttp_connection_get_peer(struct evhttp_connection *evcon,
+ char **address, u_short *port)
+{
+ *address = evcon->address;
+ *port = evcon->port;
+}
+
+int
+evhttp_connection_connect(struct evhttp_connection *evcon)
+{
+ if (evcon->state == EVCON_CONNECTING)
+ return (0);
+
+ evhttp_connection_reset(evcon);
+
+ assert(!(evcon->flags & EVHTTP_CON_INCOMING));
+ evcon->flags |= EVHTTP_CON_OUTGOING;
+
+ evcon->fd = bind_socket(
+ evcon->bind_address, evcon->bind_port, 0 /*reuse*/);
+ if (evcon->fd == -1) {
+ event_debug(("%s: failed to bind to \"%s\"",
+ __func__, evcon->bind_address));
+ return (-1);
+ }
+
+ if (socket_connect(evcon->fd, evcon->address, evcon->port) == -1) {
+ EVUTIL_CLOSESOCKET(evcon->fd); evcon->fd = -1;
+ return (-1);
+ }
+
+ /* Set up a callback for successful connection setup */
+ event_set(&evcon->ev, evcon->fd, EV_WRITE, evhttp_connectioncb, evcon);
+ EVHTTP_BASE_SET(evcon, &evcon->ev);
+ evhttp_add_event(&evcon->ev, evcon->timeout, HTTP_CONNECT_TIMEOUT);
+
+ evcon->state = EVCON_CONNECTING;
+
+ return (0);
+}
+
+/*
+ * Starts an HTTP request on the provided evhttp_connection object.
+ * If the connection object is not connected to the web server already,
+ * this will start the connection.
+ */
+
+int
+evhttp_make_request(struct evhttp_connection *evcon,
+ struct evhttp_request *req,
+ enum evhttp_cmd_type type, const char *uri)
+{
+ /* We are making a request */
+ req->kind = EVHTTP_REQUEST;
+ req->type = type;
+ if (req->uri != NULL)
+ free(req->uri);
+ if ((req->uri = strdup(uri)) == NULL)
+ event_err(1, "%s: strdup", __func__);
+
+ /* Set the protocol version if it is not supplied */
+ if (!req->major && !req->minor) {
+ req->major = 1;
+ req->minor = 1;
+ }
+
+ assert(req->evcon == NULL);
+ req->evcon = evcon;
+ assert(!(req->flags & EVHTTP_REQ_OWN_CONNECTION));
+
+ TAILQ_INSERT_TAIL(&evcon->requests, req, next);
+
+ /* If the connection object is not connected; make it so */
+ if (!evhttp_connected(evcon))
+ return (evhttp_connection_connect(evcon));
+
+ /*
+ * If it's connected already and we are the first in the queue,
+ * then we can dispatch this request immediately. Otherwise, it
+ * will be dispatched once the pending requests are completed.
+ */
+ if (TAILQ_FIRST(&evcon->requests) == req)
+ evhttp_request_dispatch(evcon);
+
+ return (0);
+}
+
+/*
+ * Reads data from file descriptor into request structure
+ * Request structure needs to be set up correctly.
+ */
+
+void
+evhttp_start_read(struct evhttp_connection *evcon)
+{
+ /* Set up an event to read the headers */
+ if (event_initialized(&evcon->ev))
+ event_del(&evcon->ev);
+ event_set(&evcon->ev, evcon->fd, EV_READ, evhttp_read, evcon);
+ EVHTTP_BASE_SET(evcon, &evcon->ev);
+
+ evhttp_add_event(&evcon->ev, evcon->timeout, HTTP_READ_TIMEOUT);
+ evcon->state = EVCON_READING_FIRSTLINE;
+}
+
+static void
+evhttp_send_done(struct evhttp_connection *evcon, void *arg)
+{
+ int need_close;
+ struct evhttp_request *req = TAILQ_FIRST(&evcon->requests);
+ TAILQ_REMOVE(&evcon->requests, req, next);
+
+ /* delete possible close detection events */
+ evhttp_connection_stop_detectclose(evcon);
+
+ need_close =
+ (req->minor == 0 &&
+ !evhttp_is_connection_keepalive(req->input_headers))||
+ evhttp_is_connection_close(req->flags, req->input_headers) ||
+ evhttp_is_connection_close(req->flags, req->output_headers);
+
+ assert(req->flags & EVHTTP_REQ_OWN_CONNECTION);
+ evhttp_request_free(req);
+
+ if (need_close) {
+ evhttp_connection_free(evcon);
+ return;
+ }
+
+ /* we have a persistent connection; try to accept another request. */
+ if (evhttp_associate_new_request_with_connection(evcon) == -1)
+ evhttp_connection_free(evcon);
+}
+
+/*
+ * Returns an error page.
+ */
+
+void
+evhttp_send_error(struct evhttp_request *req, int error, const char *reason)
+{
+#define ERR_FORMAT "<HTML><HEAD>\n" \
+ "<TITLE>%d %s</TITLE>\n" \
+ "</HEAD><BODY>\n" \
+ "<H1>Method Not Implemented</H1>\n" \
+ "Invalid method in request<P>\n" \
+ "</BODY></HTML>\n"
+
+ struct evbuffer *buf = evbuffer_new();
+
+ /* close the connection on error */
+ evhttp_add_header(req->output_headers, "Connection", "close");
+
+ evhttp_response_code(req, error, reason);
+
+ evbuffer_add_printf(buf, ERR_FORMAT, error, reason);
+
+ evhttp_send_page(req, buf);
+
+ evbuffer_free(buf);
+#undef ERR_FORMAT
+}
+
+/* Requires that headers and response code are already set up */
+
+static inline void
+evhttp_send(struct evhttp_request *req, struct evbuffer *databuf)
+{
+ struct evhttp_connection *evcon = req->evcon;
+
+ assert(TAILQ_FIRST(&evcon->requests) == req);
+
+ /* xxx: not sure if we really should expose the data buffer this way */
+ if (databuf != NULL)
+ evbuffer_add_buffer(req->output_buffer, databuf);
+
+ /* Adds headers to the response */
+ evhttp_make_header(evcon, req);
+
+ evhttp_write_buffer(evcon, evhttp_send_done, NULL);
+}
+
+void
+evhttp_send_reply(struct evhttp_request *req, int code, const char *reason,
+ struct evbuffer *databuf)
+{
+ evhttp_response_code(req, code, reason);
+
+ evhttp_send(req, databuf);
+}
+
+void
+evhttp_send_reply_start(struct evhttp_request *req, int code,
+ const char *reason)
+{
+ evhttp_response_code(req, code, reason);
+ if (req->major == 1 && req->minor == 1) {
+ /* use chunked encoding for HTTP/1.1 */
+ evhttp_add_header(req->output_headers, "Transfer-Encoding",
+ "chunked");
+ req->chunked = 1;
+ }
+ evhttp_make_header(req->evcon, req);
+ evhttp_write_buffer(req->evcon, NULL, NULL);
+}
+
+void
+evhttp_send_reply_chunk(struct evhttp_request *req, struct evbuffer *databuf)
+{
+ if (req->chunked) {
+ evbuffer_add_printf(req->evcon->output_buffer, "%x\r\n",
+ (unsigned)EVBUFFER_LENGTH(databuf));
+ }
+ evbuffer_add_buffer(req->evcon->output_buffer, databuf);
+ if (req->chunked) {
+ evbuffer_add(req->evcon->output_buffer, "\r\n", 2);
+ }
+ evhttp_write_buffer(req->evcon, NULL, NULL);
+}
+
+void
+evhttp_send_reply_end(struct evhttp_request *req)
+{
+ struct evhttp_connection *evcon = req->evcon;
+
+ if (req->chunked) {
+ evbuffer_add(req->evcon->output_buffer, "0\r\n\r\n", 5);
+ evhttp_write_buffer(req->evcon, evhttp_send_done, NULL);
+ req->chunked = 0;
+ } else if (!event_pending(&evcon->ev, EV_WRITE|EV_TIMEOUT, NULL)) {
+ /* let the connection know that we are done with the request */
+ evhttp_send_done(evcon, NULL);
+ } else {
+ /* make the callback execute after all data has been written */
+ evcon->cb = evhttp_send_done;
+ evcon->cb_arg = NULL;
+ }
+}
+
+void
+evhttp_response_code(struct evhttp_request *req, int code, const char *reason)
+{
+ req->kind = EVHTTP_RESPONSE;
+ req->response_code = code;
+ if (req->response_code_line != NULL)
+ free(req->response_code_line);
+ req->response_code_line = strdup(reason);
+}
+
+void
+evhttp_send_page(struct evhttp_request *req, struct evbuffer *databuf)
+{
+ if (!req->major || !req->minor) {
+ req->major = 1;
+ req->minor = 1;
+ }
+
+ if (req->kind != EVHTTP_RESPONSE)
+ evhttp_response_code(req, 200, "OK");
+
+ evhttp_clear_headers(req->output_headers);
+ evhttp_add_header(req->output_headers, "Content-Type", "text/html");
+ evhttp_add_header(req->output_headers, "Connection", "close");
+
+ evhttp_send(req, databuf);
+}
+
+static const char uri_chars[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0,
+ /* 64 */
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
+ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0,
+ /* 128 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ /* 192 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+/*
+ * Helper functions to encode/decode a URI.
+ * The returned string must be freed by the caller.
+ */
+char *
+evhttp_encode_uri(const char *uri)
+{
+ struct evbuffer *buf = evbuffer_new();
+ char *p;
+
+ for (p = (char *)uri; *p != '\0'; p++) {
+ if (uri_chars[(u_char)(*p)]) {
+ evbuffer_add(buf, p, 1);
+ } else {
+ evbuffer_add_printf(buf, "%%%02X", (u_char)(*p));
+ }
+ }
+ evbuffer_add(buf, "", 1);
+ p = strdup((char *)EVBUFFER_DATA(buf));
+ evbuffer_free(buf);
+
+ return (p);
+}
+
+/*
+ * @param always_decode_plus: when true we transform plus to space even
+ * if we have not seen a ?.
+ */
+static int
+evhttp_decode_uri_internal(
+ const char *uri, size_t length, char *ret, int always_decode_plus)
+{
+ char c;
+ int i, j, in_query = always_decode_plus;
+
+ for (i = j = 0; uri[i] != '\0'; i++) {
+ c = uri[i];
+ if (c == '?') {
+ in_query = 1;
+ } else if (c == '+' && in_query) {
+ c = ' ';
+ } else if (c == '%' && isxdigit((unsigned char)uri[i+1]) &&
+ isxdigit((unsigned char)uri[i+2])) {
+ char tmp[] = { uri[i+1], uri[i+2], '\0' };
+ c = (char)strtol(tmp, NULL, 16);
+ i += 2;
+ }
+ ret[j++] = c;
+ }
+ ret[j] = '\0';
+
+ return (j);
+}
+
+char *
+evhttp_decode_uri(const char *uri)
+{
+ char *ret;
+
+ if ((ret = malloc(strlen(uri) + 1)) == NULL)
+ event_err(1, "%s: malloc(%lu)", __func__,
+ (unsigned long)(strlen(uri) + 1));
+
+ evhttp_decode_uri_internal(uri, strlen(uri),
+ ret, 0 /*always_decode_plus*/);
+
+ return (ret);
+}
+
+/*
+ * Helper function to parse out arguments in a query.
+ * The arguments are separated by key and value.
+ */
+
+void
+evhttp_parse_query(const char *uri, struct evkeyvalq *headers)
+{
+ char *line;
+ char *argument;
+ char *p;
+
+ TAILQ_INIT(headers);
+
+ /* No arguments - we are done */
+ if (strchr(uri, '?') == NULL)
+ return;
+
+ if ((line = strdup(uri)) == NULL)
+ event_err(1, "%s: strdup", __func__);
+
+
+ argument = line;
+
+ /* We already know that there has to be a ? */
+ strsep(&argument, "?");
+
+ p = argument;
+ while (p != NULL && *p != '\0') {
+ char *key, *value, *decoded_value;
+ argument = strsep(&p, "&");
+
+ value = argument;
+ key = strsep(&value, "=");
+ if (value == NULL)
+ goto error;
+
+ if ((decoded_value = malloc(strlen(value) + 1)) == NULL)
+ event_err(1, "%s: malloc", __func__);
+
+ evhttp_decode_uri_internal(value, strlen(value),
+ decoded_value, 1 /*always_decode_plus*/);
+ event_debug(("Query Param: %s -> %s\n", key, decoded_value));
+ evhttp_add_header_internal(headers, key, decoded_value);
+ free(decoded_value);
+ }
+
+ error:
+ free(line);
+}
+
+static struct evhttp_cb *
+evhttp_dispatch_callback(struct httpcbq *callbacks, struct evhttp_request *req)
+{
+ struct evhttp_cb *cb;
+ size_t offset = 0;
+
+ /* Test for different URLs */
+ char *p = strchr(req->uri, '?');
+ if (p != NULL)
+ offset = (size_t)(p - req->uri);
+
+ TAILQ_FOREACH(cb, callbacks, next) {
+ int res = 0;
+ if (p == NULL) {
+ res = strcmp(cb->what, req->uri) == 0;
+ } else {
+ res = ((strncmp(cb->what, req->uri, offset) == 0) &&
+ (cb->what[offset] == '\0'));
+ }
+
+ if (res)
+ return (cb);
+ }
+
+ return (NULL);
+}
+
+static void
+evhttp_handle_request(struct evhttp_request *req, void *arg)
+{
+ struct evhttp *http = arg;
+ struct evhttp_cb *cb = NULL;
+
+ if (req->uri == NULL) {
+ evhttp_send_error(req, HTTP_BADREQUEST, "Bad Request");
+ return;
+ }
+
+ if ((cb = evhttp_dispatch_callback(&http->callbacks, req)) != NULL) {
+ (*cb->cb)(req, cb->cbarg);
+ return;
+ }
+
+ /* Generic call back */
+ if (http->gencb) {
+ (*http->gencb)(req, http->gencbarg);
+ return;
+ } else {
+ /* We need to send a 404 here */
+#define ERR_FORMAT "<html><head>" \
+ "<title>404 Not Found</title>" \
+ "</head><body>" \
+ "<h1>Not Found</h1>" \
+ "<p>The requested URL %s was not found on this server.</p>"\
+ "</body></html>\n"
+
+ char *escaped_html = evhttp_htmlescape(req->uri);
+ struct evbuffer *buf = evbuffer_new();
+
+ evhttp_response_code(req, HTTP_NOTFOUND, "Not Found");
+
+ evbuffer_add_printf(buf, ERR_FORMAT, escaped_html);
+
+ free(escaped_html);
+
+ evhttp_send_page(req, buf);
+
+ evbuffer_free(buf);
+#undef ERR_FORMAT
+ }
+}
+
+static void
+accept_socket(int fd, short what, void *arg)
+{
+ struct evhttp *http = arg;
+ struct sockaddr_storage ss;
+ socklen_t addrlen = sizeof(ss);
+ int nfd;
+
+ if ((nfd = accept(fd, (struct sockaddr *)&ss, &addrlen)) == -1) {
+ if (errno != EAGAIN && errno != EINTR)
+ event_warn("%s: bad accept", __func__);
+ return;
+ }
+ if (evutil_make_socket_nonblocking(nfd) < 0)
+ return;
+
+ evhttp_get_request(http, nfd, (struct sockaddr *)&ss, addrlen);
+}
+
+int
+evhttp_bind_socket(struct evhttp *http, const char *address, u_short port)
+{
+ int fd;
+ int res;
+
+ if ((fd = bind_socket(address, port, 1 /*reuse*/)) == -1)
+ return (-1);
+
+ if (listen(fd, 128) == -1) {
+ event_warn("%s: listen", __func__);
+ EVUTIL_CLOSESOCKET(fd);
+ return (-1);
+ }
+
+ res = evhttp_accept_socket(http, fd);
+
+ if (res != -1)
+ event_debug(("Bound to port %d - Awaiting connections ... ",
+ port));
+
+ return (res);
+}
+
+int
+evhttp_accept_socket(struct evhttp *http, int fd)
+{
+ struct evhttp_bound_socket *bound;
+ struct event *ev;
+ int res;
+
+ bound = malloc(sizeof(struct evhttp_bound_socket));
+ if (bound == NULL)
+ return (-1);
+
+ ev = &bound->bind_ev;
+
+ /* Schedule the socket for accepting */
+ event_set(ev, fd, EV_READ | EV_PERSIST, accept_socket, http);
+ EVHTTP_BASE_SET(http, ev);
+
+ res = event_add(ev, NULL);
+
+ if (res == -1) {
+ free(bound);
+ return (-1);
+ }
+
+ TAILQ_INSERT_TAIL(&http->sockets, bound, next);
+
+ return (0);
+}
+
+static struct evhttp*
+evhttp_new_object(void)
+{
+ struct evhttp *http = NULL;
+
+ if ((http = calloc(1, sizeof(struct evhttp))) == NULL) {
+ event_warn("%s: calloc", __func__);
+ return (NULL);
+ }
+
+ http->timeout = -1;
+
+ TAILQ_INIT(&http->sockets);
+ TAILQ_INIT(&http->callbacks);
+ TAILQ_INIT(&http->connections);
+
+ return (http);
+}
+
+struct evhttp *
+evhttp_new(struct event_base *base)
+{
+ struct evhttp *http = evhttp_new_object();
+
+ http->base = base;
+
+ return (http);
+}
+
+/*
+ * Start a web server on the specified address and port.
+ */
+
+struct evhttp *
+evhttp_start(const char *address, u_short port)
+{
+ struct evhttp *http = evhttp_new_object();
+
+ if (evhttp_bind_socket(http, address, port) == -1) {
+ free(http);
+ return (NULL);
+ }
+
+ return (http);
+}
+
+void
+evhttp_free(struct evhttp* http)
+{
+ struct evhttp_cb *http_cb;
+ struct evhttp_connection *evcon;
+ struct evhttp_bound_socket *bound;
+ int fd;
+
+ /* Remove the accepting part */
+ while ((bound = TAILQ_FIRST(&http->sockets)) != NULL) {
+ TAILQ_REMOVE(&http->sockets, bound, next);
+
+ fd = bound->bind_ev.ev_fd;
+ event_del(&bound->bind_ev);
+ EVUTIL_CLOSESOCKET(fd);
+
+ free(bound);
+ }
+
+ while ((evcon = TAILQ_FIRST(&http->connections)) != NULL) {
+ /* evhttp_connection_free removes the connection */
+ evhttp_connection_free(evcon);
+ }
+
+ while ((http_cb = TAILQ_FIRST(&http->callbacks)) != NULL) {
+ TAILQ_REMOVE(&http->callbacks, http_cb, next);
+ free(http_cb->what);
+ free(http_cb);
+ }
+
+ free(http);
+}
+
+void
+evhttp_set_timeout(struct evhttp* http, int timeout_in_secs)
+{
+ http->timeout = timeout_in_secs;
+}
+
+void
+evhttp_set_cb(struct evhttp *http, const char *uri,
+ void (*cb)(struct evhttp_request *, void *), void *cbarg)
+{
+ struct evhttp_cb *http_cb;
+
+ if ((http_cb = calloc(1, sizeof(struct evhttp_cb))) == NULL)
+ event_err(1, "%s: calloc", __func__);
+
+ http_cb->what = strdup(uri);
+ http_cb->cb = cb;
+ http_cb->cbarg = cbarg;
+
+ TAILQ_INSERT_TAIL(&http->callbacks, http_cb, next);
+}
+
+int
+evhttp_del_cb(struct evhttp *http, const char *uri)
+{
+ struct evhttp_cb *http_cb;
+
+ TAILQ_FOREACH(http_cb, &http->callbacks, next) {
+ if (strcmp(http_cb->what, uri) == 0)
+ break;
+ }
+ if (http_cb == NULL)
+ return (-1);
+
+ TAILQ_REMOVE(&http->callbacks, http_cb, next);
+ free(http_cb->what);
+ free(http_cb);
+
+ return (0);
+}
+
+void
+evhttp_set_gencb(struct evhttp *http,
+ void (*cb)(struct evhttp_request *, void *), void *cbarg)
+{
+ http->gencb = cb;
+ http->gencbarg = cbarg;
+}
+
+/*
+ * Request related functions
+ */
+
+struct evhttp_request *
+evhttp_request_new(void (*cb)(struct evhttp_request *, void *), void *arg)
+{
+ struct evhttp_request *req = NULL;
+
+ /* Allocate request structure */
+ if ((req = calloc(1, sizeof(struct evhttp_request))) == NULL) {
+ event_warn("%s: calloc", __func__);
+ goto error;
+ }
+
+ req->kind = EVHTTP_RESPONSE;
+ req->input_headers = calloc(1, sizeof(struct evkeyvalq));
+ if (req->input_headers == NULL) {
+ event_warn("%s: calloc", __func__);
+ goto error;
+ }
+ TAILQ_INIT(req->input_headers);
+
+ req->output_headers = calloc(1, sizeof(struct evkeyvalq));
+ if (req->output_headers == NULL) {
+ event_warn("%s: calloc", __func__);
+ goto error;
+ }
+ TAILQ_INIT(req->output_headers);
+
+ if ((req->input_buffer = evbuffer_new()) == NULL) {
+ event_warn("%s: evbuffer_new", __func__);
+ goto error;
+ }
+
+ if ((req->output_buffer = evbuffer_new()) == NULL) {
+ event_warn("%s: evbuffer_new", __func__);
+ goto error;
+ }
+
+ req->cb = cb;
+ req->cb_arg = arg;
+
+ return (req);
+
+ error:
+ if (req != NULL)
+ evhttp_request_free(req);
+ return (NULL);
+}
+
+void
+evhttp_request_free(struct evhttp_request *req)
+{
+ if (req->remote_host != NULL)
+ free(req->remote_host);
+ if (req->uri != NULL)
+ free(req->uri);
+ if (req->response_code_line != NULL)
+ free(req->response_code_line);
+
+ evhttp_clear_headers(req->input_headers);
+ free(req->input_headers);
+
+ evhttp_clear_headers(req->output_headers);
+ free(req->output_headers);
+
+ if (req->input_buffer != NULL)
+ evbuffer_free(req->input_buffer);
+
+ if (req->output_buffer != NULL)
+ evbuffer_free(req->output_buffer);
+
+ free(req);
+}
+
+void
+evhttp_request_set_chunked_cb(struct evhttp_request *req,
+ void (*cb)(struct evhttp_request *, void *))
+{
+ req->chunk_cb = cb;
+}
+
+/*
+ * Allows for inspection of the request URI
+ */
+
+const char *
+evhttp_request_uri(struct evhttp_request *req) {
+ if (req->uri == NULL)
+ event_debug(("%s: request %p has no uri\n", __func__, req));
+ return (req->uri);
+}
+
+/*
+ * Takes a file descriptor to read a request from.
+ * The callback is executed once the whole request has been read.
+ */
+
+static struct evhttp_connection*
+evhttp_get_request_connection(
+ struct evhttp* http,
+ int fd, struct sockaddr *sa, socklen_t salen)
+{
+ struct evhttp_connection *evcon;
+ char *hostname = NULL, *portname = NULL;
+
+ name_from_addr(sa, salen, &hostname, &portname);
+ if (hostname == NULL || portname == NULL) {
+ if (hostname) free(hostname);
+ if (portname) free(portname);
+ return (NULL);
+ }
+
+ event_debug(("%s: new request from %s:%s on %d\n",
+ __func__, hostname, portname, fd));
+
+ /* we need a connection object to put the http request on */
+ evcon = evhttp_connection_new(hostname, atoi(portname));
+ free(hostname);
+ free(portname);
+ if (evcon == NULL)
+ return (NULL);
+
+ /* associate the base if we have one*/
+ evhttp_connection_set_base(evcon, http->base);
+
+ evcon->flags |= EVHTTP_CON_INCOMING;
+ evcon->state = EVCON_READING_FIRSTLINE;
+
+ evcon->fd = fd;
+
+ return (evcon);
+}
+
+static int
+evhttp_associate_new_request_with_connection(struct evhttp_connection *evcon)
+{
+ struct evhttp *http = evcon->http_server;
+ struct evhttp_request *req;
+ if ((req = evhttp_request_new(evhttp_handle_request, http)) == NULL)
+ return (-1);
+
+ req->evcon = evcon; /* the request ends up owning the connection */
+ req->flags |= EVHTTP_REQ_OWN_CONNECTION;
+
+ TAILQ_INSERT_TAIL(&evcon->requests, req, next);
+
+ req->kind = EVHTTP_REQUEST;
+
+ if ((req->remote_host = strdup(evcon->address)) == NULL)
+ event_err(1, "%s: strdup", __func__);
+ req->remote_port = evcon->port;
+
+ evhttp_start_read(evcon);
+
+ return (0);
+}
+
+void
+evhttp_get_request(struct evhttp *http, int fd,
+ struct sockaddr *sa, socklen_t salen)
+{
+ struct evhttp_connection *evcon;
+
+ evcon = evhttp_get_request_connection(http, fd, sa, salen);
+ if (evcon == NULL)
+ return;
+
+ /* the timeout can be used by the server to close idle connections */
+ if (http->timeout != -1)
+ evhttp_connection_set_timeout(evcon, http->timeout);
+
+ /*
+ * if we want to accept more than one request on a connection,
+ * we need to know which http server it belongs to.
+ */
+ evcon->http_server = http;
+ TAILQ_INSERT_TAIL(&http->connections, evcon, next);
+
+ if (evhttp_associate_new_request_with_connection(evcon) == -1)
+ evhttp_connection_free(evcon);
+}
+
+
+/*
+ * Network helper functions that we do not want to export to the rest of
+ * the world.
+ */
+#if 0 /* Unused */
+static struct addrinfo *
+addr_from_name(char *address)
+{
+#ifdef HAVE_GETADDRINFO
+ struct addrinfo ai, *aitop;
+ int ai_result;
+
+ memset(&ai, 0, sizeof(ai));
+ ai.ai_family = AF_INET;
+ ai.ai_socktype = SOCK_RAW;
+ ai.ai_flags = 0;
+ if ((ai_result = getaddrinfo(address, NULL, &ai, &aitop)) != 0) {
+ if ( ai_result == EAI_SYSTEM )
+ event_warn("getaddrinfo");
+ else
+ event_warnx("getaddrinfo: %s", gai_strerror(ai_result));
+ }
+
+ return (aitop);
+#else
+ assert(0);
+ return NULL; /* XXXXX Use gethostbyname, if this function is ever used. */
+#endif
+}
+#endif
+
+static void
+name_from_addr(struct sockaddr *sa, socklen_t salen,
+ char **phost, char **pport)
+{
+ char ntop[NI_MAXHOST];
+ char strport[NI_MAXSERV];
+ int ni_result;
+
+#ifdef HAVE_GETNAMEINFO
+ ni_result = getnameinfo(sa, salen,
+ ntop, sizeof(ntop), strport, sizeof(strport),
+ NI_NUMERICHOST|NI_NUMERICSERV);
+
+ if (ni_result != 0) {
+ if (ni_result == EAI_SYSTEM)
+ event_err(1, "getnameinfo failed");
+ else
+ event_errx(1, "getnameinfo failed: %s", gai_strerror(ni_result));
+ return;
+ }
+#else
+ ni_result = fake_getnameinfo(sa, salen,
+ ntop, sizeof(ntop), strport, sizeof(strport),
+ NI_NUMERICHOST|NI_NUMERICSERV);
+ if (ni_result != 0)
+ return;
+#endif
+ *phost = strdup(ntop);
+ *pport = strdup(strport);
+}
+
+/* Create a non-blocking socket and bind it */
+/* todo: rename this function */
+static int
+bind_socket_ai(struct addrinfo *ai, int reuse)
+{
+ int fd, on = 1, r;
+ int serrno;
+
+ /* Create listen socket */
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd == -1) {
+ event_warn("socket");
+ return (-1);
+ }
+
+ if (evutil_make_socket_nonblocking(fd) < 0)
+ goto out;
+
+#ifndef WIN32
+ if (fcntl(fd, F_SETFD, 1) == -1) {
+ event_warn("fcntl(F_SETFD)");
+ goto out;
+ }
+#endif
+
+ setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, (void *)&on, sizeof(on));
+ if (reuse) {
+ setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+ (void *)&on, sizeof(on));
+ }
+
+ if (ai != NULL) {
+ r = bind(fd, ai->ai_addr, ai->ai_addrlen);
+ if (r == -1)
+ goto out;
+ }
+
+ return (fd);
+
+ out:
+ serrno = EVUTIL_SOCKET_ERROR();
+ EVUTIL_CLOSESOCKET(fd);
+ EVUTIL_SET_SOCKET_ERROR(serrno);
+ return (-1);
+}
+
+static struct addrinfo *
+make_addrinfo(const char *address, u_short port)
+{
+ struct addrinfo *aitop = NULL;
+
+#ifdef HAVE_GETADDRINFO
+ struct addrinfo ai;
+ char strport[NI_MAXSERV];
+ int ai_result;
+
+ memset(&ai, 0, sizeof(ai));
+ ai.ai_family = AF_INET;
+ ai.ai_socktype = SOCK_STREAM;
+ ai.ai_flags = AI_PASSIVE; /* turn NULL host name into INADDR_ANY */
+ evutil_snprintf(strport, sizeof(strport), "%d", port);
+ if ((ai_result = getaddrinfo(address, strport, &ai, &aitop)) != 0) {
+ if ( ai_result == EAI_SYSTEM )
+ event_warn("getaddrinfo");
+ else
+ event_warnx("getaddrinfo: %s", gai_strerror(ai_result));
+ return (NULL);
+ }
+#else
+ static int cur;
+ static struct addrinfo ai[2]; /* We will be returning the address of some of this memory so it has to last even after this call. */
+ if (++cur == 2) cur = 0; /* allow calling this function twice */
+
+ if (fake_getaddrinfo(address, &ai[cur]) < 0) {
+ event_warn("fake_getaddrinfo");
+ return (NULL);
+ }
+ aitop = &ai[cur];
+ ((struct sockaddr_in *) aitop->ai_addr)->sin_port = htons(port);
+#endif
+
+ return (aitop);
+}
+
+static int
+bind_socket(const char *address, u_short port, int reuse)
+{
+ int fd;
+ struct addrinfo *aitop = NULL;
+
+ /* just create an unbound socket */
+ if (address == NULL && port == 0)
+ return bind_socket_ai(NULL, 0);
+
+ aitop = make_addrinfo(address, port);
+
+ if (aitop == NULL)
+ return (-1);
+
+ fd = bind_socket_ai(aitop, reuse);
+
+#ifdef HAVE_GETADDRINFO
+ freeaddrinfo(aitop);
+#else
+ fake_freeaddrinfo(aitop);
+#endif
+
+ return (fd);
+}
+
+static int
+socket_connect(int fd, const char *address, unsigned short port)
+{
+ struct addrinfo *ai = make_addrinfo(address, port);
+ int res = -1;
+
+ if (ai == NULL) {
+ event_debug(("%s: make_addrinfo: \"%s:%d\"",
+ __func__, address, port));
+ return (-1);
+ }
+
+ if (connect(fd, ai->ai_addr, ai->ai_addrlen) == -1) {
+#ifdef WIN32
+ int tmp_error = WSAGetLastError();
+ if (tmp_error != WSAEWOULDBLOCK && tmp_error != WSAEINVAL &&
+ tmp_error != WSAEINPROGRESS) {
+ goto out;
+ }
+#else
+ if (errno != EINPROGRESS) {
+ goto out;
+ }
+#endif
+ }
+
+ /* everything is fine */
+ res = 0;
+
+out:
+#ifdef HAVE_GETADDRINFO
+ freeaddrinfo(ai);
+#else
+ fake_freeaddrinfo(ai);
+#endif
+
+ return (res);
+}
diff --git a/libevent/kqueue.c b/libevent/kqueue.c
new file mode 100644
index 00000000000..36eebe5fc6e
--- /dev/null
+++ b/libevent/kqueue.c
@@ -0,0 +1,449 @@
+/* $OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $ */
+
+/*
+ * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#include <sys/queue.h>
+#include <sys/event.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+/* Some platforms apparently define the udata field of struct kevent as
+ * intptr_t, whereas others define it as void*. There doesn't seem to be an
+ * easy way to tell them apart via autoconf, so we need to use OS macros. */
+#if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__)
+#define PTR_TO_UDATA(x) ((intptr_t)(x))
+#else
+#define PTR_TO_UDATA(x) (x)
+#endif
+
+#include "event.h"
+#include "event-internal.h"
+#include "log.h"
+#include "event-internal.h"
+
+#define EVLIST_X_KQINKERNEL 0x1000
+
+#define NEVENT 64
+
+struct kqop {
+ struct kevent *changes;
+ int nchanges;
+ struct kevent *events;
+ struct event_list evsigevents[NSIG];
+ int nevents;
+ int kq;
+ pid_t pid;
+};
+
+static void *kq_init (struct event_base *);
+static int kq_add (void *, struct event *);
+static int kq_del (void *, struct event *);
+static int kq_dispatch (struct event_base *, void *, struct timeval *);
+static int kq_insert (struct kqop *, struct kevent *);
+static void kq_dealloc (struct event_base *, void *);
+
+const struct eventop kqops = {
+ "kqueue",
+ kq_init,
+ kq_add,
+ kq_del,
+ kq_dispatch,
+ kq_dealloc,
+ 1 /* need reinit */
+};
+
+static void *
+kq_init(struct event_base *base)
+{
+ int i, kq;
+ struct kqop *kqueueop;
+
+ /* Disable kqueue when this environment variable is set */
+ if (getenv("EVENT_NOKQUEUE"))
+ return (NULL);
+
+ if (!(kqueueop = calloc(1, sizeof(struct kqop))))
+ return (NULL);
+
+ /* Initalize the kernel queue */
+
+ if ((kq = kqueue()) == -1) {
+ event_warn("kqueue");
+ free (kqueueop);
+ return (NULL);
+ }
+
+ kqueueop->kq = kq;
+
+ kqueueop->pid = getpid();
+
+ /* Initalize fields */
+ kqueueop->changes = malloc(NEVENT * sizeof(struct kevent));
+ if (kqueueop->changes == NULL) {
+ free (kqueueop);
+ return (NULL);
+ }
+ kqueueop->events = malloc(NEVENT * sizeof(struct kevent));
+ if (kqueueop->events == NULL) {
+ free (kqueueop->changes);
+ free (kqueueop);
+ return (NULL);
+ }
+ kqueueop->nevents = NEVENT;
+
+ /* we need to keep track of multiple events per signal */
+ for (i = 0; i < NSIG; ++i) {
+ TAILQ_INIT(&kqueueop->evsigevents[i]);
+ }
+
+ /* Check for Mac OS X kqueue bug. */
+ kqueueop->changes[0].ident = -1;
+ kqueueop->changes[0].filter = EVFILT_READ;
+ kqueueop->changes[0].flags = EV_ADD;
+ /*
+ * If kqueue works, then kevent will succeed, and it will
+ * stick an error in events[0]. If kqueue is broken, then
+ * kevent will fail.
+ */
+ if (kevent(kq,
+ kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 ||
+ kqueueop->events[0].ident != -1 ||
+ kqueueop->events[0].flags != EV_ERROR) {
+ event_warn("%s: detected broken kqueue; not using.", __func__);
+ free(kqueueop->changes);
+ free(kqueueop->events);
+ free(kqueueop);
+ close(kq);
+ return (NULL);
+ }
+
+ return (kqueueop);
+}
+
+static int
+kq_insert(struct kqop *kqop, struct kevent *kev)
+{
+ int nevents = kqop->nevents;
+
+ if (kqop->nchanges == nevents) {
+ struct kevent *newchange;
+ struct kevent *newresult;
+
+ nevents *= 2;
+
+ newchange = realloc(kqop->changes,
+ nevents * sizeof(struct kevent));
+ if (newchange == NULL) {
+ event_warn("%s: malloc", __func__);
+ return (-1);
+ }
+ kqop->changes = newchange;
+
+ newresult = realloc(kqop->events,
+ nevents * sizeof(struct kevent));
+
+ /*
+ * If we fail, we don't have to worry about freeing,
+ * the next realloc will pick it up.
+ */
+ if (newresult == NULL) {
+ event_warn("%s: malloc", __func__);
+ return (-1);
+ }
+ kqop->events = newresult;
+
+ kqop->nevents = nevents;
+ }
+
+ memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent));
+
+ event_debug(("%s: fd %d %s%s",
+ __func__, (int)kev->ident,
+ kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE",
+ kev->flags == EV_DELETE ? " (del)" : ""));
+
+ return (0);
+}
+
+static void
+kq_sighandler(int sig)
+{
+ /* Do nothing here */
+}
+
+static int
+kq_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+ struct kqop *kqop = arg;
+ struct kevent *changes = kqop->changes;
+ struct kevent *events = kqop->events;
+ struct event *ev;
+ struct timespec ts, *ts_p = NULL;
+ int i, res;
+
+ if (tv != NULL) {
+ TIMEVAL_TO_TIMESPEC(tv, &ts);
+ ts_p = &ts;
+ }
+
+ res = kevent(kqop->kq, changes, kqop->nchanges,
+ events, kqop->nevents, ts_p);
+ kqop->nchanges = 0;
+ if (res == -1) {
+ if (errno != EINTR) {
+ event_warn("kevent");
+ return (-1);
+ }
+
+ return (0);
+ }
+
+ event_debug(("%s: kevent reports %d", __func__, res));
+
+ for (i = 0; i < res; i++) {
+ int which = 0;
+
+ if (events[i].flags & EV_ERROR) {
+ /*
+ * Error messages that can happen, when a delete fails.
+ * EBADF happens when the file discriptor has been
+ * closed,
+ * ENOENT when the file discriptor was closed and
+ * then reopened.
+ * EINVAL for some reasons not understood; EINVAL
+ * should not be returned ever; but FreeBSD does :-\
+ * An error is also indicated when a callback deletes
+ * an event we are still processing. In that case
+ * the data field is set to ENOENT.
+ */
+ if (events[i].data == EBADF ||
+ events[i].data == EINVAL ||
+ events[i].data == ENOENT)
+ continue;
+ errno = events[i].data;
+ return (-1);
+ }
+
+ if (events[i].filter == EVFILT_READ) {
+ which |= EV_READ;
+ } else if (events[i].filter == EVFILT_WRITE) {
+ which |= EV_WRITE;
+ } else if (events[i].filter == EVFILT_SIGNAL) {
+ which |= EV_SIGNAL;
+ }
+
+ if (!which)
+ continue;
+
+ if (events[i].filter == EVFILT_SIGNAL) {
+ struct event_list *head =
+ (struct event_list *)events[i].udata;
+ TAILQ_FOREACH(ev, head, ev_signal_next) {
+ event_active(ev, which, events[i].data);
+ }
+ } else {
+ ev = (struct event *)events[i].udata;
+
+ if (!(ev->ev_events & EV_PERSIST))
+ ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
+
+ event_active(ev, which, 1);
+ }
+ }
+
+ return (0);
+}
+
+
+static int
+kq_add(void *arg, struct event *ev)
+{
+ struct kqop *kqop = arg;
+ struct kevent kev;
+
+ if (ev->ev_events & EV_SIGNAL) {
+ int nsignal = EVENT_SIGNAL(ev);
+
+ assert(nsignal >= 0 && nsignal < NSIG);
+ if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
+ struct timespec timeout = { 0, 0 };
+
+ memset(&kev, 0, sizeof(kev));
+ kev.ident = nsignal;
+ kev.filter = EVFILT_SIGNAL;
+ kev.flags = EV_ADD;
+ kev.udata = PTR_TO_UDATA(&kqop->evsigevents[nsignal]);
+
+ /* Be ready for the signal if it is sent any
+ * time between now and the next call to
+ * kq_dispatch. */
+ if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
+ return (-1);
+
+ if (_evsignal_set_handler(ev->ev_base, nsignal,
+ kq_sighandler) == -1)
+ return (-1);
+ }
+
+ TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev,
+ ev_signal_next);
+ ev->ev_flags |= EVLIST_X_KQINKERNEL;
+ return (0);
+ }
+
+ if (ev->ev_events & EV_READ) {
+ memset(&kev, 0, sizeof(kev));
+ kev.ident = ev->ev_fd;
+ kev.filter = EVFILT_READ;
+#ifdef NOTE_EOF
+ /* Make it behave like select() and poll() */
+ kev.fflags = NOTE_EOF;
+#endif
+ kev.flags = EV_ADD;
+ if (!(ev->ev_events & EV_PERSIST))
+ kev.flags |= EV_ONESHOT;
+ kev.udata = PTR_TO_UDATA(ev);
+
+ if (kq_insert(kqop, &kev) == -1)
+ return (-1);
+
+ ev->ev_flags |= EVLIST_X_KQINKERNEL;
+ }
+
+ if (ev->ev_events & EV_WRITE) {
+ memset(&kev, 0, sizeof(kev));
+ kev.ident = ev->ev_fd;
+ kev.filter = EVFILT_WRITE;
+ kev.flags = EV_ADD;
+ if (!(ev->ev_events & EV_PERSIST))
+ kev.flags |= EV_ONESHOT;
+ kev.udata = PTR_TO_UDATA(ev);
+
+ if (kq_insert(kqop, &kev) == -1)
+ return (-1);
+
+ ev->ev_flags |= EVLIST_X_KQINKERNEL;
+ }
+
+ return (0);
+}
+
+static int
+kq_del(void *arg, struct event *ev)
+{
+ struct kqop *kqop = arg;
+ struct kevent kev;
+
+ if (!(ev->ev_flags & EVLIST_X_KQINKERNEL))
+ return (0);
+
+ if (ev->ev_events & EV_SIGNAL) {
+ int nsignal = EVENT_SIGNAL(ev);
+ struct timespec timeout = { 0, 0 };
+
+ assert(nsignal >= 0 && nsignal < NSIG);
+ TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next);
+ if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) {
+ memset(&kev, 0, sizeof(kev));
+ kev.ident = nsignal;
+ kev.filter = EVFILT_SIGNAL;
+ kev.flags = EV_DELETE;
+
+ /* Because we insert signal events
+ * immediately, we need to delete them
+ * immediately, too */
+ if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
+ return (-1);
+
+ if (_evsignal_restore_handler(ev->ev_base,
+ nsignal) == -1)
+ return (-1);
+ }
+
+ ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
+ return (0);
+ }
+
+ if (ev->ev_events & EV_READ) {
+ memset(&kev, 0, sizeof(kev));
+ kev.ident = ev->ev_fd;
+ kev.filter = EVFILT_READ;
+ kev.flags = EV_DELETE;
+
+ if (kq_insert(kqop, &kev) == -1)
+ return (-1);
+
+ ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
+ }
+
+ if (ev->ev_events & EV_WRITE) {
+ memset(&kev, 0, sizeof(kev));
+ kev.ident = ev->ev_fd;
+ kev.filter = EVFILT_WRITE;
+ kev.flags = EV_DELETE;
+
+ if (kq_insert(kqop, &kev) == -1)
+ return (-1);
+
+ ev->ev_flags &= ~EVLIST_X_KQINKERNEL;
+ }
+
+ return (0);
+}
+
+static void
+kq_dealloc(struct event_base *base, void *arg)
+{
+ struct kqop *kqop = arg;
+
+ if (kqop->changes)
+ free(kqop->changes);
+ if (kqop->events)
+ free(kqop->events);
+ if (kqop->kq >= 0 && kqop->pid == getpid())
+ close(kqop->kq);
+ memset(kqop, 0, sizeof(struct kqop));
+ free(kqop);
+}
diff --git a/libevent/log.c b/libevent/log.c
new file mode 100644
index 00000000000..b62a61915d1
--- /dev/null
+++ b/libevent/log.c
@@ -0,0 +1,187 @@
+/* $OpenBSD: err.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */
+
+/*
+ * log.c
+ *
+ * Based on err.c, which was adapted from OpenBSD libc *err* *warn* code.
+ *
+ * Copyright (c) 2005 Nick Mathewson <nickm@freehaven.net>
+ *
+ * Copyright (c) 2000 Dug Song <dugsong@monkey.org>
+ *
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include "event.h"
+
+#include "log.h"
+#include "evutil.h"
+
+static void _warn_helper(int severity, int log_errno, const char *fmt,
+ va_list ap);
+static void event_log(int severity, const char *msg);
+
+void
+event_err(int eval, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ _warn_helper(_EVENT_LOG_ERR, errno, fmt, ap);
+ va_end(ap);
+ exit(eval);
+}
+
+void
+event_warn(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ _warn_helper(_EVENT_LOG_WARN, errno, fmt, ap);
+ va_end(ap);
+}
+
+void
+event_errx(int eval, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ _warn_helper(_EVENT_LOG_ERR, -1, fmt, ap);
+ va_end(ap);
+ exit(eval);
+}
+
+void
+event_warnx(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ _warn_helper(_EVENT_LOG_WARN, -1, fmt, ap);
+ va_end(ap);
+}
+
+void
+event_msgx(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ _warn_helper(_EVENT_LOG_MSG, -1, fmt, ap);
+ va_end(ap);
+}
+
+void
+_event_debugx(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ _warn_helper(_EVENT_LOG_DEBUG, -1, fmt, ap);
+ va_end(ap);
+}
+
+static void
+_warn_helper(int severity, int log_errno, const char *fmt, va_list ap)
+{
+ char buf[1024];
+ size_t len;
+
+ if (fmt != NULL)
+ evutil_vsnprintf(buf, sizeof(buf), fmt, ap);
+ else
+ buf[0] = '\0';
+
+ if (log_errno >= 0) {
+ len = strlen(buf);
+ if (len < sizeof(buf) - 3) {
+ evutil_snprintf(buf + len, sizeof(buf) - len, ": %s",
+ strerror(log_errno));
+ }
+ }
+
+ event_log(severity, buf);
+}
+
+static event_log_cb log_fn = NULL;
+
+void
+event_set_log_callback(event_log_cb cb)
+{
+ log_fn = cb;
+}
+
+static void
+event_log(int severity, const char *msg)
+{
+ if (log_fn)
+ log_fn(severity, msg);
+ else {
+ const char *severity_str;
+ switch (severity) {
+ case _EVENT_LOG_DEBUG:
+ severity_str = "debug";
+ break;
+ case _EVENT_LOG_MSG:
+ severity_str = "msg";
+ break;
+ case _EVENT_LOG_WARN:
+ severity_str = "warn";
+ break;
+ case _EVENT_LOG_ERR:
+ severity_str = "err";
+ break;
+ default:
+ severity_str = "???";
+ break;
+ }
+ (void)fprintf(stderr, "[%s] %s\n", severity_str, msg);
+ }
+}
diff --git a/libevent/log.h b/libevent/log.h
new file mode 100644
index 00000000000..7bc6632b8dd
--- /dev/null
+++ b/libevent/log.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _LOG_H_
+#define _LOG_H_
+
+#ifdef __GNUC__
+#define EV_CHECK_FMT(a,b) __attribute__((format(printf, a, b)))
+#else
+#define EV_CHECK_FMT(a,b)
+#endif
+
+void event_err(int eval, const char *fmt, ...) EV_CHECK_FMT(2,3);
+void event_warn(const char *fmt, ...) EV_CHECK_FMT(1,2);
+void event_errx(int eval, const char *fmt, ...) EV_CHECK_FMT(2,3);
+void event_warnx(const char *fmt, ...) EV_CHECK_FMT(1,2);
+void event_msgx(const char *fmt, ...) EV_CHECK_FMT(1,2);
+void _event_debugx(const char *fmt, ...) EV_CHECK_FMT(1,2);
+
+#ifdef USE_DEBUG
+#define event_debug(x) _event_debugx x
+#else
+#define event_debug(x) do {;} while (0)
+#endif
+
+#undef EV_CHECK_FMT
+
+#endif
diff --git a/libevent/min_heap.h b/libevent/min_heap.h
new file mode 100644
index 00000000000..edaa5ae1270
--- /dev/null
+++ b/libevent/min_heap.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2006 Maxim Yegorushkin <maxim.yegorushkin@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _MIN_HEAP_H_
+#define _MIN_HEAP_H_
+
+#include "event.h"
+#include "evutil.h"
+#include "stdlib.h"
+
+typedef struct min_heap
+{
+ struct event** p;
+ unsigned n, a;
+} min_heap_t;
+
+static inline void min_heap_ctor(min_heap_t* s);
+static inline void min_heap_dtor(min_heap_t* s);
+static inline void min_heap_elem_init(struct event* e);
+static inline int min_heap_elem_greater(struct event *a, struct event *b);
+static inline int min_heap_empty(min_heap_t* s);
+static inline unsigned min_heap_size(min_heap_t* s);
+static inline struct event* min_heap_top(min_heap_t* s);
+static inline int min_heap_reserve(min_heap_t* s, unsigned n);
+static inline int min_heap_push(min_heap_t* s, struct event* e);
+static inline struct event* min_heap_pop(min_heap_t* s);
+static inline int min_heap_erase(min_heap_t* s, struct event* e);
+static inline void min_heap_shift_up_(min_heap_t* s, unsigned hole_index, struct event* e);
+static inline void min_heap_shift_down_(min_heap_t* s, unsigned hole_index, struct event* e);
+
+int min_heap_elem_greater(struct event *a, struct event *b)
+{
+ return evutil_timercmp(&a->ev_timeout, &b->ev_timeout, >);
+}
+
+void min_heap_ctor(min_heap_t* s) { s->p = 0; s->n = 0; s->a = 0; }
+void min_heap_dtor(min_heap_t* s) { free(s->p); }
+void min_heap_elem_init(struct event* e) { e->min_heap_idx = -1; }
+int min_heap_empty(min_heap_t* s) { return 0u == s->n; }
+unsigned min_heap_size(min_heap_t* s) { return s->n; }
+struct event* min_heap_top(min_heap_t* s) { return s->n ? *s->p : 0; }
+
+int min_heap_push(min_heap_t* s, struct event* e)
+{
+ if(min_heap_reserve(s, s->n + 1))
+ return -1;
+ min_heap_shift_up_(s, s->n++, e);
+ return 0;
+}
+
+struct event* min_heap_pop(min_heap_t* s)
+{
+ if(s->n)
+ {
+ struct event* e = *s->p;
+ min_heap_shift_down_(s, 0u, s->p[--s->n]);
+ e->min_heap_idx = -1;
+ return e;
+ }
+ return 0;
+}
+
+int min_heap_erase(min_heap_t* s, struct event* e)
+{
+ if(((unsigned int)-1) != e->min_heap_idx)
+ {
+ struct event *last = s->p[--s->n];
+ unsigned parent = (e->min_heap_idx - 1) / 2;
+ /* we replace e with the last element in the heap. We might need to
+ shift it upward if it is less than its parent, or downward if it is
+ greater than one or both its children. Since the children are known
+ to be less than the parent, it can't need to shift both up and
+ down. */
+ if (e->min_heap_idx > 0 && min_heap_elem_greater(s->p[parent], last))
+ min_heap_shift_up_(s, e->min_heap_idx, last);
+ else
+ min_heap_shift_down_(s, e->min_heap_idx, last);
+ e->min_heap_idx = -1;
+ return 0;
+ }
+ return -1;
+}
+
+int min_heap_reserve(min_heap_t* s, unsigned n)
+{
+ if(s->a < n)
+ {
+ struct event** p;
+ unsigned a = s->a ? s->a * 2 : 8;
+ if(a < n)
+ a = n;
+ if(!(p = (struct event**)realloc(s->p, a * sizeof *p)))
+ return -1;
+ s->p = p;
+ s->a = a;
+ }
+ return 0;
+}
+
+void min_heap_shift_up_(min_heap_t* s, unsigned hole_index, struct event* e)
+{
+ unsigned parent = (hole_index - 1) / 2;
+ while(hole_index && min_heap_elem_greater(s->p[parent], e))
+ {
+ (s->p[hole_index] = s->p[parent])->min_heap_idx = hole_index;
+ hole_index = parent;
+ parent = (hole_index - 1) / 2;
+ }
+ (s->p[hole_index] = e)->min_heap_idx = hole_index;
+}
+
+void min_heap_shift_down_(min_heap_t* s, unsigned hole_index, struct event* e)
+{
+ unsigned min_child = 2 * (hole_index + 1);
+ while(min_child <= s->n)
+ {
+ min_child -= min_child == s->n || min_heap_elem_greater(s->p[min_child], s->p[min_child - 1]);
+ if(!(min_heap_elem_greater(e, s->p[min_child])))
+ break;
+ (s->p[hole_index] = s->p[min_child])->min_heap_idx = hole_index;
+ hole_index = min_child;
+ min_child = 2 * (hole_index + 1);
+ }
+ min_heap_shift_up_(s, hole_index, e);
+}
+
+#endif /* _MIN_HEAP_H_ */
diff --git a/libevent/poll.c b/libevent/poll.c
new file mode 100644
index 00000000000..5d496618d29
--- /dev/null
+++ b/libevent/poll.c
@@ -0,0 +1,379 @@
+/* $OpenBSD: poll.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */
+
+/*
+ * Copyright 2000-2003 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#include <sys/queue.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#ifdef CHECK_INVARIANTS
+#include <assert.h>
+#endif
+
+#include "event.h"
+#include "event-internal.h"
+#include "evsignal.h"
+#include "log.h"
+
+struct pollop {
+ int event_count; /* Highest number alloc */
+ int nfds; /* Size of event_* */
+ int fd_count; /* Size of idxplus1_by_fd */
+ struct pollfd *event_set;
+ struct event **event_r_back;
+ struct event **event_w_back;
+ int *idxplus1_by_fd; /* Index into event_set by fd; we add 1 so
+ * that 0 (which is easy to memset) can mean
+ * "no entry." */
+};
+
+static void *poll_init (struct event_base *);
+static int poll_add (void *, struct event *);
+static int poll_del (void *, struct event *);
+static int poll_dispatch (struct event_base *, void *, struct timeval *);
+static void poll_dealloc (struct event_base *, void *);
+
+const struct eventop pollops = {
+ "poll",
+ poll_init,
+ poll_add,
+ poll_del,
+ poll_dispatch,
+ poll_dealloc,
+ 0
+};
+
+static void *
+poll_init(struct event_base *base)
+{
+ struct pollop *pollop;
+
+ /* Disable poll when this environment variable is set */
+ if (getenv("EVENT_NOPOLL"))
+ return (NULL);
+
+ if (!(pollop = calloc(1, sizeof(struct pollop))))
+ return (NULL);
+
+ evsignal_init(base);
+
+ return (pollop);
+}
+
+#ifdef CHECK_INVARIANTS
+static void
+poll_check_ok(struct pollop *pop)
+{
+ int i, idx;
+ struct event *ev;
+
+ for (i = 0; i < pop->fd_count; ++i) {
+ idx = pop->idxplus1_by_fd[i]-1;
+ if (idx < 0)
+ continue;
+ assert(pop->event_set[idx].fd == i);
+ if (pop->event_set[idx].events & POLLIN) {
+ ev = pop->event_r_back[idx];
+ assert(ev);
+ assert(ev->ev_events & EV_READ);
+ assert(ev->ev_fd == i);
+ }
+ if (pop->event_set[idx].events & POLLOUT) {
+ ev = pop->event_w_back[idx];
+ assert(ev);
+ assert(ev->ev_events & EV_WRITE);
+ assert(ev->ev_fd == i);
+ }
+ }
+ for (i = 0; i < pop->nfds; ++i) {
+ struct pollfd *pfd = &pop->event_set[i];
+ assert(pop->idxplus1_by_fd[pfd->fd] == i+1);
+ }
+}
+#else
+#define poll_check_ok(pop)
+#endif
+
+static int
+poll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+ int res, i, j, msec = -1, nfds;
+ struct pollop *pop = arg;
+
+ poll_check_ok(pop);
+
+ if (tv != NULL)
+ msec = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
+
+ nfds = pop->nfds;
+ res = poll(pop->event_set, nfds, msec);
+
+ if (res == -1) {
+ if (errno != EINTR) {
+ event_warn("poll");
+ return (-1);
+ }
+
+ evsignal_process(base);
+ return (0);
+ } else if (base->sig.evsignal_caught) {
+ evsignal_process(base);
+ }
+
+ event_debug(("%s: poll reports %d", __func__, res));
+
+ if (res == 0 || nfds == 0)
+ return (0);
+
+ i = random() % nfds;
+ for (j = 0; j < nfds; j++) {
+ struct event *r_ev = NULL, *w_ev = NULL;
+ int what;
+ if (++i == nfds)
+ i = 0;
+ what = pop->event_set[i].revents;
+
+ if (!what)
+ continue;
+
+ res = 0;
+
+ /* If the file gets closed notify */
+ if (what & (POLLHUP|POLLERR))
+ what |= POLLIN|POLLOUT;
+ if (what & POLLIN) {
+ res |= EV_READ;
+ r_ev = pop->event_r_back[i];
+ }
+ if (what & POLLOUT) {
+ res |= EV_WRITE;
+ w_ev = pop->event_w_back[i];
+ }
+ if (res == 0)
+ continue;
+
+ if (r_ev && (res & r_ev->ev_events)) {
+ event_active(r_ev, res & r_ev->ev_events, 1);
+ }
+ if (w_ev && w_ev != r_ev && (res & w_ev->ev_events)) {
+ event_active(w_ev, res & w_ev->ev_events, 1);
+ }
+ }
+
+ return (0);
+}
+
+static int
+poll_add(void *arg, struct event *ev)
+{
+ struct pollop *pop = arg;
+ struct pollfd *pfd = NULL;
+ int i;
+
+ if (ev->ev_events & EV_SIGNAL)
+ return (evsignal_add(ev));
+ if (!(ev->ev_events & (EV_READ|EV_WRITE)))
+ return (0);
+
+ poll_check_ok(pop);
+ if (pop->nfds + 1 >= pop->event_count) {
+ struct pollfd *tmp_event_set;
+ struct event **tmp_event_r_back;
+ struct event **tmp_event_w_back;
+ int tmp_event_count;
+
+ if (pop->event_count < 32)
+ tmp_event_count = 32;
+ else
+ tmp_event_count = pop->event_count * 2;
+
+ /* We need more file descriptors */
+ tmp_event_set = realloc(pop->event_set,
+ tmp_event_count * sizeof(struct pollfd));
+ if (tmp_event_set == NULL) {
+ event_warn("realloc");
+ return (-1);
+ }
+ pop->event_set = tmp_event_set;
+
+ tmp_event_r_back = realloc(pop->event_r_back,
+ tmp_event_count * sizeof(struct event *));
+ if (tmp_event_r_back == NULL) {
+ /* event_set overallocated; that's okay. */
+ event_warn("realloc");
+ return (-1);
+ }
+ pop->event_r_back = tmp_event_r_back;
+
+ tmp_event_w_back = realloc(pop->event_w_back,
+ tmp_event_count * sizeof(struct event *));
+ if (tmp_event_w_back == NULL) {
+ /* event_set and event_r_back overallocated; that's
+ * okay. */
+ event_warn("realloc");
+ return (-1);
+ }
+ pop->event_w_back = tmp_event_w_back;
+
+ pop->event_count = tmp_event_count;
+ }
+ if (ev->ev_fd >= pop->fd_count) {
+ int *tmp_idxplus1_by_fd;
+ int new_count;
+ if (pop->fd_count < 32)
+ new_count = 32;
+ else
+ new_count = pop->fd_count * 2;
+ while (new_count <= ev->ev_fd)
+ new_count *= 2;
+ tmp_idxplus1_by_fd =
+ realloc(pop->idxplus1_by_fd, new_count * sizeof(int));
+ if (tmp_idxplus1_by_fd == NULL) {
+ event_warn("realloc");
+ return (-1);
+ }
+ pop->idxplus1_by_fd = tmp_idxplus1_by_fd;
+ memset(pop->idxplus1_by_fd + pop->fd_count,
+ 0, sizeof(int)*(new_count - pop->fd_count));
+ pop->fd_count = new_count;
+ }
+
+ i = pop->idxplus1_by_fd[ev->ev_fd] - 1;
+ if (i >= 0) {
+ pfd = &pop->event_set[i];
+ } else {
+ i = pop->nfds++;
+ pfd = &pop->event_set[i];
+ pfd->events = 0;
+ pfd->fd = ev->ev_fd;
+ pop->event_w_back[i] = pop->event_r_back[i] = NULL;
+ pop->idxplus1_by_fd[ev->ev_fd] = i + 1;
+ }
+
+ pfd->revents = 0;
+ if (ev->ev_events & EV_WRITE) {
+ pfd->events |= POLLOUT;
+ pop->event_w_back[i] = ev;
+ }
+ if (ev->ev_events & EV_READ) {
+ pfd->events |= POLLIN;
+ pop->event_r_back[i] = ev;
+ }
+ poll_check_ok(pop);
+
+ return (0);
+}
+
+/*
+ * Nothing to be done here.
+ */
+
+static int
+poll_del(void *arg, struct event *ev)
+{
+ struct pollop *pop = arg;
+ struct pollfd *pfd = NULL;
+ int i;
+
+ if (ev->ev_events & EV_SIGNAL)
+ return (evsignal_del(ev));
+
+ if (!(ev->ev_events & (EV_READ|EV_WRITE)))
+ return (0);
+
+ poll_check_ok(pop);
+ i = pop->idxplus1_by_fd[ev->ev_fd] - 1;
+ if (i < 0)
+ return (-1);
+
+ /* Do we still want to read or write? */
+ pfd = &pop->event_set[i];
+ if (ev->ev_events & EV_READ) {
+ pfd->events &= ~POLLIN;
+ pop->event_r_back[i] = NULL;
+ }
+ if (ev->ev_events & EV_WRITE) {
+ pfd->events &= ~POLLOUT;
+ pop->event_w_back[i] = NULL;
+ }
+ poll_check_ok(pop);
+ if (pfd->events)
+ /* Another event cares about that fd. */
+ return (0);
+
+ /* Okay, so we aren't interested in that fd anymore. */
+ pop->idxplus1_by_fd[ev->ev_fd] = 0;
+
+ --pop->nfds;
+ if (i != pop->nfds) {
+ /*
+ * Shift the last pollfd down into the now-unoccupied
+ * position.
+ */
+ memcpy(&pop->event_set[i], &pop->event_set[pop->nfds],
+ sizeof(struct pollfd));
+ pop->event_r_back[i] = pop->event_r_back[pop->nfds];
+ pop->event_w_back[i] = pop->event_w_back[pop->nfds];
+ pop->idxplus1_by_fd[pop->event_set[i].fd] = i + 1;
+ }
+
+ poll_check_ok(pop);
+ return (0);
+}
+
+static void
+poll_dealloc(struct event_base *base, void *arg)
+{
+ struct pollop *pop = arg;
+
+ evsignal_dealloc(base);
+ if (pop->event_set)
+ free(pop->event_set);
+ if (pop->event_r_back)
+ free(pop->event_r_back);
+ if (pop->event_w_back)
+ free(pop->event_w_back);
+ if (pop->idxplus1_by_fd)
+ free(pop->idxplus1_by_fd);
+
+ memset(pop, 0, sizeof(struct pollop));
+ free(pop);
+}
diff --git a/libevent/sample/Makefile.am b/libevent/sample/Makefile.am
new file mode 100644
index 00000000000..2f4e26e2f3f
--- /dev/null
+++ b/libevent/sample/Makefile.am
@@ -0,0 +1,14 @@
+AUTOMAKE_OPTIONS = foreign no-dependencies
+
+LDADD = ../libevent.la
+AM_CFLAGS = -I$(top_srcdir) -I$(top_srcdir)/compat
+
+noinst_PROGRAMS = event-test time-test signal-test
+
+event_test_sources = event-test.c
+time_test_sources = time-test.c
+signal_test_sources = signal-test.c
+
+verify:
+
+DISTCLEANFILES = *~
diff --git a/libevent/sample/event-test.c b/libevent/sample/event-test.c
new file mode 100644
index 00000000000..2c6cb93864c
--- /dev/null
+++ b/libevent/sample/event-test.c
@@ -0,0 +1,139 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o event-test event-test.c -L/usr/local/lib -levent
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifndef WIN32
+#include <sys/queue.h>
+#include <unistd.h>
+#include <sys/time.h>
+#else
+#include <windows.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include <event.h>
+
+static void
+fifo_read(int fd, short event, void *arg)
+{
+ char buf[255];
+ int len;
+ struct event *ev = arg;
+#ifdef WIN32
+ DWORD dwBytesRead;
+#endif
+
+ /* Reschedule this event */
+ event_add(ev, NULL);
+
+ fprintf(stderr, "fifo_read called with fd: %d, event: %d, arg: %p\n",
+ fd, event, arg);
+#ifdef WIN32
+ len = ReadFile((HANDLE)fd, buf, sizeof(buf) - 1, &dwBytesRead, NULL);
+
+ // Check for end of file.
+ if(len && dwBytesRead == 0) {
+ fprintf(stderr, "End Of File");
+ event_del(ev);
+ return;
+ }
+
+ buf[dwBytesRead] = '\0';
+#else
+ len = read(fd, buf, sizeof(buf) - 1);
+
+ if (len == -1) {
+ perror("read");
+ return;
+ } else if (len == 0) {
+ fprintf(stderr, "Connection closed\n");
+ return;
+ }
+
+ buf[len] = '\0';
+#endif
+ fprintf(stdout, "Read: %s\n", buf);
+}
+
+int
+main (int argc, char **argv)
+{
+ struct event evfifo;
+#ifdef WIN32
+ HANDLE socket;
+ // Open a file.
+ socket = CreateFile("test.txt", // open File
+ GENERIC_READ, // open for reading
+ 0, // do not share
+ NULL, // no security
+ OPEN_EXISTING, // existing file only
+ FILE_ATTRIBUTE_NORMAL, // normal file
+ NULL); // no attr. template
+
+ if(socket == INVALID_HANDLE_VALUE)
+ return 1;
+
+#else
+ struct stat st;
+ const char *fifo = "event.fifo";
+ int socket;
+
+ if (lstat (fifo, &st) == 0) {
+ if ((st.st_mode & S_IFMT) == S_IFREG) {
+ errno = EEXIST;
+ perror("lstat");
+ exit (1);
+ }
+ }
+
+ unlink (fifo);
+ if (mkfifo (fifo, 0600) == -1) {
+ perror("mkfifo");
+ exit (1);
+ }
+
+ /* Linux pipes are broken, we need O_RDWR instead of O_RDONLY */
+#ifdef __linux
+ socket = open (fifo, O_RDWR | O_NONBLOCK, 0);
+#else
+ socket = open (fifo, O_RDONLY | O_NONBLOCK, 0);
+#endif
+
+ if (socket == -1) {
+ perror("open");
+ exit (1);
+ }
+
+ fprintf(stderr, "Write data to %s\n", fifo);
+#endif
+ /* Initalize the event library */
+ event_init();
+
+ /* Initalize one event */
+#ifdef WIN32
+ event_set(&evfifo, (int)socket, EV_READ, fifo_read, &evfifo);
+#else
+ event_set(&evfifo, socket, EV_READ, fifo_read, &evfifo);
+#endif
+
+ /* Add it to the active events, without a timeout */
+ event_add(&evfifo, NULL);
+
+ event_dispatch();
+#ifdef WIN32
+ CloseHandle(socket);
+#endif
+ return (0);
+}
+
diff --git a/libevent/sample/signal-test.c b/libevent/sample/signal-test.c
new file mode 100644
index 00000000000..9a131cb50c2
--- /dev/null
+++ b/libevent/sample/signal-test.c
@@ -0,0 +1,63 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o signal-test \
+ * signal-test.c -L/usr/local/lib -levent
+ */
+
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/stat.h>
+#ifndef WIN32
+#include <sys/queue.h>
+#include <unistd.h>
+#include <sys/time.h>
+#else
+#include <windows.h>
+#endif
+#include <signal.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include <event.h>
+
+int called = 0;
+
+static void
+signal_cb(int fd, short event, void *arg)
+{
+ struct event *signal = arg;
+
+ printf("%s: got signal %d\n", __func__, EVENT_SIGNAL(signal));
+
+ if (called >= 2)
+ event_del(signal);
+
+ called++;
+}
+
+int
+main (int argc, char **argv)
+{
+ struct event signal_int;
+
+ /* Initalize the event library */
+ event_init();
+
+ /* Initalize one event */
+ event_set(&signal_int, SIGINT, EV_SIGNAL|EV_PERSIST, signal_cb,
+ &signal_int);
+
+ event_add(&signal_int, NULL);
+
+ event_dispatch();
+
+ return (0);
+}
+
diff --git a/libevent/sample/time-test.c b/libevent/sample/time-test.c
new file mode 100644
index 00000000000..069d4f8f783
--- /dev/null
+++ b/libevent/sample/time-test.c
@@ -0,0 +1,70 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o time-test time-test.c -L/usr/local/lib -levent
+ */
+
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/stat.h>
+#ifndef WIN32
+#include <sys/queue.h>
+#include <unistd.h>
+#endif
+#include <time.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include <event.h>
+#include <evutil.h>
+
+int lasttime;
+
+static void
+timeout_cb(int fd, short event, void *arg)
+{
+ struct timeval tv;
+ struct event *timeout = arg;
+ int newtime = time(NULL);
+
+ printf("%s: called at %d: %d\n", __func__, newtime,
+ newtime - lasttime);
+ lasttime = newtime;
+
+ evutil_timerclear(&tv);
+ tv.tv_sec = 2;
+ event_add(timeout, &tv);
+}
+
+int
+main (int argc, char **argv)
+{
+ struct event timeout;
+ struct timeval tv;
+
+ /* Initalize the event library */
+ event_init();
+
+ /* Initalize one event */
+ evtimer_set(&timeout, timeout_cb, &timeout);
+
+ evutil_timerclear(&tv);
+ tv.tv_sec = 2;
+ event_add(&timeout, &tv);
+
+ lasttime = time(NULL);
+
+ event_dispatch();
+
+ return (0);
+}
+
diff --git a/libevent/select.c b/libevent/select.c
new file mode 100644
index 00000000000..ca6639fd829
--- /dev/null
+++ b/libevent/select.c
@@ -0,0 +1,356 @@
+/* $OpenBSD: select.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */
+
+/*
+ * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <sys/_time.h>
+#endif
+#ifdef HAVE_SYS_SELECT_H
+#include <sys/select.h>
+#endif
+#include <sys/queue.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#ifdef CHECK_INVARIANTS
+#include <assert.h>
+#endif
+
+#include "event.h"
+#include "event-internal.h"
+#include "evsignal.h"
+#include "log.h"
+
+#ifndef howmany
+#define howmany(x, y) (((x)+((y)-1))/(y))
+#endif
+
+struct selectop {
+ int event_fds; /* Highest fd in fd set */
+ int event_fdsz;
+ fd_set *event_readset_in;
+ fd_set *event_writeset_in;
+ fd_set *event_readset_out;
+ fd_set *event_writeset_out;
+ struct event **event_r_by_fd;
+ struct event **event_w_by_fd;
+};
+
+static void *select_init (struct event_base *);
+static int select_add (void *, struct event *);
+static int select_del (void *, struct event *);
+static int select_dispatch (struct event_base *, void *, struct timeval *);
+static void select_dealloc (struct event_base *, void *);
+
+const struct eventop selectops = {
+ "select",
+ select_init,
+ select_add,
+ select_del,
+ select_dispatch,
+ select_dealloc,
+ 0
+};
+
+static int select_resize(struct selectop *sop, int fdsz);
+
+static void *
+select_init(struct event_base *base)
+{
+ struct selectop *sop;
+
+ /* Disable select when this environment variable is set */
+ if (getenv("EVENT_NOSELECT"))
+ return (NULL);
+
+ if (!(sop = calloc(1, sizeof(struct selectop))))
+ return (NULL);
+
+ select_resize(sop, howmany(32 + 1, NFDBITS)*sizeof(fd_mask));
+
+ evsignal_init(base);
+
+ return (sop);
+}
+
+#ifdef CHECK_INVARIANTS
+static void
+check_selectop(struct selectop *sop)
+{
+ int i;
+ for (i = 0; i <= sop->event_fds; ++i) {
+ if (FD_ISSET(i, sop->event_readset_in)) {
+ assert(sop->event_r_by_fd[i]);
+ assert(sop->event_r_by_fd[i]->ev_events & EV_READ);
+ assert(sop->event_r_by_fd[i]->ev_fd == i);
+ } else {
+ assert(! sop->event_r_by_fd[i]);
+ }
+ if (FD_ISSET(i, sop->event_writeset_in)) {
+ assert(sop->event_w_by_fd[i]);
+ assert(sop->event_w_by_fd[i]->ev_events & EV_WRITE);
+ assert(sop->event_w_by_fd[i]->ev_fd == i);
+ } else {
+ assert(! sop->event_w_by_fd[i]);
+ }
+ }
+
+}
+#else
+#define check_selectop(sop) do { (void) sop; } while (0)
+#endif
+
+static int
+select_dispatch(struct event_base *base, void *arg, struct timeval *tv)
+{
+ int res, i, j;
+ struct selectop *sop = arg;
+
+ check_selectop(sop);
+
+ memcpy(sop->event_readset_out, sop->event_readset_in,
+ sop->event_fdsz);
+ memcpy(sop->event_writeset_out, sop->event_writeset_in,
+ sop->event_fdsz);
+
+ res = select(sop->event_fds + 1, sop->event_readset_out,
+ sop->event_writeset_out, NULL, tv);
+
+ check_selectop(sop);
+
+ if (res == -1) {
+ if (errno != EINTR) {
+ event_warn("select");
+ return (-1);
+ }
+
+ evsignal_process(base);
+ return (0);
+ } else if (base->sig.evsignal_caught) {
+ evsignal_process(base);
+ }
+
+ event_debug(("%s: select reports %d", __func__, res));
+
+ check_selectop(sop);
+ i = random() % (sop->event_fds+1);
+ for (j = 0; j <= sop->event_fds; ++j) {
+ struct event *r_ev = NULL, *w_ev = NULL;
+ if (++i >= sop->event_fds+1)
+ i = 0;
+
+ res = 0;
+ if (FD_ISSET(i, sop->event_readset_out)) {
+ r_ev = sop->event_r_by_fd[i];
+ res |= EV_READ;
+ }
+ if (FD_ISSET(i, sop->event_writeset_out)) {
+ w_ev = sop->event_w_by_fd[i];
+ res |= EV_WRITE;
+ }
+ if (r_ev && (res & r_ev->ev_events)) {
+ event_active(r_ev, res & r_ev->ev_events, 1);
+ }
+ if (w_ev && w_ev != r_ev && (res & w_ev->ev_events)) {
+ event_active(w_ev, res & w_ev->ev_events, 1);
+ }
+ }
+ check_selectop(sop);
+
+ return (0);
+}
+
+
+static int
+select_resize(struct selectop *sop, int fdsz)
+{
+ int n_events, n_events_old;
+
+ fd_set *readset_in = NULL;
+ fd_set *writeset_in = NULL;
+ fd_set *readset_out = NULL;
+ fd_set *writeset_out = NULL;
+ struct event **r_by_fd = NULL;
+ struct event **w_by_fd = NULL;
+
+ n_events = (fdsz/sizeof(fd_mask)) * NFDBITS;
+ n_events_old = (sop->event_fdsz/sizeof(fd_mask)) * NFDBITS;
+
+ if (sop->event_readset_in)
+ check_selectop(sop);
+
+ if ((readset_in = realloc(sop->event_readset_in, fdsz)) == NULL)
+ goto error;
+ sop->event_readset_in = readset_in;
+ if ((readset_out = realloc(sop->event_readset_out, fdsz)) == NULL)
+ goto error;
+ sop->event_readset_out = readset_out;
+ if ((writeset_in = realloc(sop->event_writeset_in, fdsz)) == NULL)
+ goto error;
+ sop->event_writeset_in = writeset_in;
+ if ((writeset_out = realloc(sop->event_writeset_out, fdsz)) == NULL)
+ goto error;
+ sop->event_writeset_out = writeset_out;
+ if ((r_by_fd = realloc(sop->event_r_by_fd,
+ n_events*sizeof(struct event*))) == NULL)
+ goto error;
+ sop->event_r_by_fd = r_by_fd;
+ if ((w_by_fd = realloc(sop->event_w_by_fd,
+ n_events * sizeof(struct event*))) == NULL)
+ goto error;
+ sop->event_w_by_fd = w_by_fd;
+
+ memset((char *)sop->event_readset_in + sop->event_fdsz, 0,
+ fdsz - sop->event_fdsz);
+ memset((char *)sop->event_writeset_in + sop->event_fdsz, 0,
+ fdsz - sop->event_fdsz);
+ memset(sop->event_r_by_fd + n_events_old, 0,
+ (n_events-n_events_old) * sizeof(struct event*));
+ memset(sop->event_w_by_fd + n_events_old, 0,
+ (n_events-n_events_old) * sizeof(struct event*));
+
+ sop->event_fdsz = fdsz;
+ check_selectop(sop);
+
+ return (0);
+
+ error:
+ event_warn("malloc");
+ return (-1);
+}
+
+
+static int
+select_add(void *arg, struct event *ev)
+{
+ struct selectop *sop = arg;
+
+ if (ev->ev_events & EV_SIGNAL)
+ return (evsignal_add(ev));
+
+ check_selectop(sop);
+ /*
+ * Keep track of the highest fd, so that we can calculate the size
+ * of the fd_sets for select(2)
+ */
+ if (sop->event_fds < ev->ev_fd) {
+ int fdsz = sop->event_fdsz;
+
+ if (fdsz < sizeof(fd_mask))
+ fdsz = sizeof(fd_mask);
+
+ while (fdsz <
+ (howmany(ev->ev_fd + 1, NFDBITS) * sizeof(fd_mask)))
+ fdsz *= 2;
+
+ if (fdsz != sop->event_fdsz) {
+ if (select_resize(sop, fdsz)) {
+ check_selectop(sop);
+ return (-1);
+ }
+ }
+
+ sop->event_fds = ev->ev_fd;
+ }
+
+ if (ev->ev_events & EV_READ) {
+ FD_SET(ev->ev_fd, sop->event_readset_in);
+ sop->event_r_by_fd[ev->ev_fd] = ev;
+ }
+ if (ev->ev_events & EV_WRITE) {
+ FD_SET(ev->ev_fd, sop->event_writeset_in);
+ sop->event_w_by_fd[ev->ev_fd] = ev;
+ }
+ check_selectop(sop);
+
+ return (0);
+}
+
+/*
+ * Nothing to be done here.
+ */
+
+static int
+select_del(void *arg, struct event *ev)
+{
+ struct selectop *sop = arg;
+
+ check_selectop(sop);
+ if (ev->ev_events & EV_SIGNAL)
+ return (evsignal_del(ev));
+
+ if (sop->event_fds < ev->ev_fd) {
+ check_selectop(sop);
+ return (0);
+ }
+
+ if (ev->ev_events & EV_READ) {
+ FD_CLR(ev->ev_fd, sop->event_readset_in);
+ sop->event_r_by_fd[ev->ev_fd] = NULL;
+ }
+
+ if (ev->ev_events & EV_WRITE) {
+ FD_CLR(ev->ev_fd, sop->event_writeset_in);
+ sop->event_w_by_fd[ev->ev_fd] = NULL;
+ }
+
+ check_selectop(sop);
+ return (0);
+}
+
+static void
+select_dealloc(struct event_base *base, void *arg)
+{
+ struct selectop *sop = arg;
+
+ evsignal_dealloc(base);
+ if (sop->event_readset_in)
+ free(sop->event_readset_in);
+ if (sop->event_writeset_in)
+ free(sop->event_writeset_in);
+ if (sop->event_readset_out)
+ free(sop->event_readset_out);
+ if (sop->event_writeset_out)
+ free(sop->event_writeset_out);
+ if (sop->event_r_by_fd)
+ free(sop->event_r_by_fd);
+ if (sop->event_w_by_fd)
+ free(sop->event_w_by_fd);
+
+ memset(sop, 0, sizeof(struct selectop));
+ free(sop);
+}
diff --git a/libevent/signal.c b/libevent/signal.c
new file mode 100644
index 00000000000..74fa23f688a
--- /dev/null
+++ b/libevent/signal.c
@@ -0,0 +1,357 @@
+/* $OpenBSD: select.c,v 1.2 2002/06/25 15:50:15 mickey Exp $ */
+
+/*
+ * Copyright 2000-2002 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+#include <sys/types.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <errno.h>
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#include <assert.h>
+
+#include "event.h"
+#include "event-internal.h"
+#include "evsignal.h"
+#include "evutil.h"
+#include "log.h"
+
+struct event_base *evsignal_base = NULL;
+
+static void evsignal_handler(int sig);
+
+/* Callback for when the signal handler write a byte to our signaling socket */
+static void
+evsignal_cb(int fd, short what, void *arg)
+{
+ static char signals[1];
+#ifdef WIN32
+ SSIZE_T n;
+#else
+ ssize_t n;
+#endif
+
+ n = recv(fd, signals, sizeof(signals), 0);
+ if (n == -1)
+ event_err(1, "%s: read", __func__);
+}
+
+#ifdef HAVE_SETFD
+#define FD_CLOSEONEXEC(x) do { \
+ if (fcntl(x, F_SETFD, 1) == -1) \
+ event_warn("fcntl(%d, F_SETFD)", x); \
+} while (0)
+#else
+#define FD_CLOSEONEXEC(x)
+#endif
+
+int
+evsignal_init(struct event_base *base)
+{
+ int i;
+
+ /*
+ * Our signal handler is going to write to one end of the socket
+ * pair to wake up our event loop. The event loop then scans for
+ * signals that got delivered.
+ */
+ if (evutil_socketpair(
+ AF_UNIX, SOCK_STREAM, 0, base->sig.ev_signal_pair) == -1) {
+#ifdef WIN32
+ /* Make this nonfatal on win32, where sometimes people
+ have localhost firewalled. */
+ event_warn("%s: socketpair", __func__);
+#else
+ event_err(1, "%s: socketpair", __func__);
+#endif
+ return -1;
+ }
+
+ FD_CLOSEONEXEC(base->sig.ev_signal_pair[0]);
+ FD_CLOSEONEXEC(base->sig.ev_signal_pair[1]);
+ base->sig.sh_old = NULL;
+ base->sig.sh_old_max = 0;
+ base->sig.evsignal_caught = 0;
+ memset(&base->sig.evsigcaught, 0, sizeof(sig_atomic_t)*NSIG);
+ /* initialize the queues for all events */
+ for (i = 0; i < NSIG; ++i)
+ TAILQ_INIT(&base->sig.evsigevents[i]);
+
+ evutil_make_socket_nonblocking(base->sig.ev_signal_pair[0]);
+
+ event_set(&base->sig.ev_signal, base->sig.ev_signal_pair[1],
+ EV_READ | EV_PERSIST, evsignal_cb, &base->sig.ev_signal);
+ base->sig.ev_signal.ev_base = base;
+ base->sig.ev_signal.ev_flags |= EVLIST_INTERNAL;
+
+ return 0;
+}
+
+/* Helper: set the signal handler for evsignal to handler in base, so that
+ * we can restore the original handler when we clear the current one. */
+int
+_evsignal_set_handler(struct event_base *base,
+ int evsignal, void (*handler)(int))
+{
+#ifdef HAVE_SIGACTION
+ struct sigaction sa;
+#else
+ ev_sighandler_t sh;
+#endif
+ struct evsignal_info *sig = &base->sig;
+ void *p;
+
+ /*
+ * resize saved signal handler array up to the highest signal number.
+ * a dynamic array is used to keep footprint on the low side.
+ */
+ if (evsignal >= sig->sh_old_max) {
+ int new_max = evsignal + 1;
+ event_debug(("%s: evsignal (%d) >= sh_old_max (%d), resizing",
+ __func__, evsignal, sig->sh_old_max));
+ p = realloc(sig->sh_old, new_max * sizeof(*sig->sh_old));
+ if (p == NULL) {
+ event_warn("realloc");
+ return (-1);
+ }
+
+ memset((char *)p + sig->sh_old_max * sizeof(*sig->sh_old),
+ 0, (new_max - sig->sh_old_max) * sizeof(*sig->sh_old));
+
+ sig->sh_old_max = new_max;
+ sig->sh_old = p;
+ }
+
+ /* allocate space for previous handler out of dynamic array */
+ sig->sh_old[evsignal] = malloc(sizeof *sig->sh_old[evsignal]);
+ if (sig->sh_old[evsignal] == NULL) {
+ event_warn("malloc");
+ return (-1);
+ }
+
+ /* save previous handler and setup new handler */
+#ifdef HAVE_SIGACTION
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ sa.sa_flags |= SA_RESTART;
+ sigfillset(&sa.sa_mask);
+
+ if (sigaction(evsignal, &sa, sig->sh_old[evsignal]) == -1) {
+ event_warn("sigaction");
+ free(sig->sh_old[evsignal]);
+ return (-1);
+ }
+#else
+ if ((sh = signal(evsignal, handler)) == SIG_ERR) {
+ event_warn("signal");
+ free(sig->sh_old[evsignal]);
+ return (-1);
+ }
+ *sig->sh_old[evsignal] = sh;
+#endif
+
+ return (0);
+}
+
+int
+evsignal_add(struct event *ev)
+{
+ int evsignal;
+ struct event_base *base = ev->ev_base;
+ struct evsignal_info *sig = &ev->ev_base->sig;
+
+ if (ev->ev_events & (EV_READ|EV_WRITE))
+ event_errx(1, "%s: EV_SIGNAL incompatible use", __func__);
+ evsignal = EVENT_SIGNAL(ev);
+ assert(evsignal >= 0 && evsignal < NSIG);
+ if (TAILQ_EMPTY(&sig->evsigevents[evsignal])) {
+ event_debug(("%s: %p: changing signal handler", __func__, ev));
+ if (_evsignal_set_handler(
+ base, evsignal, evsignal_handler) == -1)
+ return (-1);
+
+ /* catch signals if they happen quickly */
+ evsignal_base = base;
+
+ if (!sig->ev_signal_added) {
+ if (event_add(&sig->ev_signal, NULL))
+ return (-1);
+ sig->ev_signal_added = 1;
+ }
+ }
+
+ /* multiple events may listen to the same signal */
+ TAILQ_INSERT_TAIL(&sig->evsigevents[evsignal], ev, ev_signal_next);
+
+ return (0);
+}
+
+int
+_evsignal_restore_handler(struct event_base *base, int evsignal)
+{
+ int ret = 0;
+ struct evsignal_info *sig = &base->sig;
+#ifdef HAVE_SIGACTION
+ struct sigaction *sh;
+#else
+ ev_sighandler_t *sh;
+#endif
+
+ /* restore previous handler */
+ sh = sig->sh_old[evsignal];
+ sig->sh_old[evsignal] = NULL;
+#ifdef HAVE_SIGACTION
+ if (sigaction(evsignal, sh, NULL) == -1) {
+ event_warn("sigaction");
+ ret = -1;
+ }
+#else
+ if (signal(evsignal, *sh) == SIG_ERR) {
+ event_warn("signal");
+ ret = -1;
+ }
+#endif
+ free(sh);
+
+ return ret;
+}
+
+int
+evsignal_del(struct event *ev)
+{
+ struct event_base *base = ev->ev_base;
+ struct evsignal_info *sig = &base->sig;
+ int evsignal = EVENT_SIGNAL(ev);
+
+ assert(evsignal >= 0 && evsignal < NSIG);
+
+ /* multiple events may listen to the same signal */
+ TAILQ_REMOVE(&sig->evsigevents[evsignal], ev, ev_signal_next);
+
+ if (!TAILQ_EMPTY(&sig->evsigevents[evsignal]))
+ return (0);
+
+ event_debug(("%s: %p: restoring signal handler", __func__, ev));
+
+ return (_evsignal_restore_handler(ev->ev_base, EVENT_SIGNAL(ev)));
+}
+
+static void
+evsignal_handler(int sig)
+{
+ int save_errno = errno;
+
+ if (evsignal_base == NULL) {
+ event_warn(
+ "%s: received signal %d, but have no base configured",
+ __func__, sig);
+ return;
+ }
+
+ evsignal_base->sig.evsigcaught[sig]++;
+ evsignal_base->sig.evsignal_caught = 1;
+
+#ifndef HAVE_SIGACTION
+ signal(sig, evsignal_handler);
+#endif
+
+ /* Wake up our notification mechanism */
+ send(evsignal_base->sig.ev_signal_pair[0], "a", 1, 0);
+ errno = save_errno;
+}
+
+void
+evsignal_process(struct event_base *base)
+{
+ struct evsignal_info *sig = &base->sig;
+ struct event *ev, *next_ev;
+ sig_atomic_t ncalls;
+ int i;
+
+ base->sig.evsignal_caught = 0;
+ for (i = 1; i < NSIG; ++i) {
+ ncalls = sig->evsigcaught[i];
+ if (ncalls == 0)
+ continue;
+ sig->evsigcaught[i] -= ncalls;
+
+ for (ev = TAILQ_FIRST(&sig->evsigevents[i]);
+ ev != NULL; ev = next_ev) {
+ next_ev = TAILQ_NEXT(ev, ev_signal_next);
+ if (!(ev->ev_events & EV_PERSIST))
+ event_del(ev);
+ event_active(ev, EV_SIGNAL, ncalls);
+ }
+
+ }
+}
+
+void
+evsignal_dealloc(struct event_base *base)
+{
+ int i = 0;
+ if (base->sig.ev_signal_added) {
+ event_del(&base->sig.ev_signal);
+ base->sig.ev_signal_added = 0;
+ }
+ for (i = 0; i < NSIG; ++i) {
+ if (i < base->sig.sh_old_max && base->sig.sh_old[i] != NULL)
+ _evsignal_restore_handler(base, i);
+ }
+
+ EVUTIL_CLOSESOCKET(base->sig.ev_signal_pair[0]);
+ base->sig.ev_signal_pair[0] = -1;
+ EVUTIL_CLOSESOCKET(base->sig.ev_signal_pair[1]);
+ base->sig.ev_signal_pair[1] = -1;
+ base->sig.sh_old_max = 0;
+
+ /* per index frees are handled in evsignal_del() */
+ free(base->sig.sh_old);
+}
diff --git a/libevent/strlcpy-internal.h b/libevent/strlcpy-internal.h
new file mode 100644
index 00000000000..22b5f61d45e
--- /dev/null
+++ b/libevent/strlcpy-internal.h
@@ -0,0 +1,23 @@
+#ifndef _STRLCPY_INTERNAL_H_
+#define _STRLCPY_INTERNAL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#ifndef HAVE_STRLCPY
+#include <string.h>
+size_t _event_strlcpy(char *dst, const char *src, size_t siz);
+#define strlcpy _event_strlcpy
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/libevent/strlcpy.c b/libevent/strlcpy.c
new file mode 100644
index 00000000000..5d194527c8c
--- /dev/null
+++ b/libevent/strlcpy.c
@@ -0,0 +1,76 @@
+/* $OpenBSD: strlcpy.c,v 1.5 2001/05/13 15:40:16 deraadt Exp $ */
+
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static char *rcsid = "$OpenBSD: strlcpy.c,v 1.5 2001/05/13 15:40:16 deraadt Exp $";
+#endif /* LIBC_SCCS and not lint */
+
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif /* HAVE_CONFIG_H */
+
+#ifndef HAVE_STRLCPY
+#include "strlcpy-internal.h"
+
+/*
+ * Copy src to string dst of size siz. At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+_event_strlcpy(dst, src, siz)
+ char *dst;
+ const char *src;
+ size_t siz;
+{
+ register char *d = dst;
+ register const char *s = src;
+ register size_t n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0 && --n != 0) {
+ do {
+ if ((*d++ = *s++) == 0)
+ break;
+ } while (--n != 0);
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+
+ return(s - src - 1); /* count does not include NUL */
+}
+#endif
diff --git a/libevent/test/Makefile.am b/libevent/test/Makefile.am
new file mode 100644
index 00000000000..3558d02fd5a
--- /dev/null
+++ b/libevent/test/Makefile.am
@@ -0,0 +1,35 @@
+AUTOMAKE_OPTIONS = foreign no-dependencies
+
+AM_CFLAGS = -I$(top_srcdir) -I$(top_srcdir)/compat
+
+EXTRA_DIST = regress.rpc regress.gen.h regress.gen.c
+
+noinst_PROGRAMS = test-init test-eof test-weof test-time regress bench
+
+BUILT_SOURCES = regress.gen.c regress.gen.h
+test_init_SOURCES = test-init.c
+test_init_LDADD = ../libevent_core.la
+test_eof_SOURCES = test-eof.c
+test_eof_LDADD = ../libevent_core.la
+test_weof_SOURCES = test-weof.c
+test_weof_LDADD = ../libevent_core.la
+test_time_SOURCES = test-time.c
+test_time_LDADD = ../libevent_core.la
+regress_SOURCES = regress.c regress.h regress_http.c regress_dns.c \
+ regress_rpc.c \
+ regress.gen.c regress.gen.h
+regress_LDADD = ../libevent.la
+bench_SOURCES = bench.c
+bench_LDADD = ../libevent.la
+
+regress.gen.c regress.gen.h: regress.rpc $(top_srcdir)/event_rpcgen.py
+ $(top_srcdir)/event_rpcgen.py $(srcdir)/regress.rpc || echo "No Python installed"
+
+DISTCLEANFILES = *~
+
+test: test-init test-eof test-weof test-time regress
+
+verify: test
+ @$(srcdir)/test.sh
+
+bench test-init test-eof test-weof test-time: ../libevent.la
diff --git a/libevent/test/bench.c b/libevent/test/bench.c
new file mode 100644
index 00000000000..c976932fa80
--- /dev/null
+++ b/libevent/test/bench.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2003 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * Mon 03/10/2003 - Modified by Davide Libenzi <davidel@xmailserver.org>
+ *
+ * Added chain event propagation to improve the sensitivity of
+ * the measure respect to the event loop efficency.
+ *
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#ifdef WIN32
+#include <windows.h>
+#else
+#include <sys/socket.h>
+#include <signal.h>
+#include <sys/resource.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <event.h>
+#include <evutil.h>
+
+
+static int count, writes, fired;
+static int *pipes;
+static int num_pipes, num_active, num_writes;
+static struct event *events;
+
+static void
+read_cb(int fd, short which, void *arg)
+{
+ long idx = (long) arg, widx = idx + 1;
+ u_char ch;
+
+ count += read(fd, &ch, sizeof(ch));
+ if (writes) {
+ if (widx >= num_pipes)
+ widx -= num_pipes;
+ write(pipes[2 * widx + 1], "e", 1);
+ writes--;
+ fired++;
+ }
+}
+
+static struct timeval *
+run_once(void)
+{
+ int *cp, space;
+ long i;
+ static struct timeval ts, te;
+
+ for (cp = pipes, i = 0; i < num_pipes; i++, cp += 2) {
+ event_del(&events[i]);
+ event_set(&events[i], cp[0], EV_READ | EV_PERSIST, read_cb, (void *) i);
+ event_add(&events[i], NULL);
+ }
+
+ event_loop(EVLOOP_ONCE | EVLOOP_NONBLOCK);
+
+ fired = 0;
+ space = num_pipes / num_active;
+ space = space * 2;
+ for (i = 0; i < num_active; i++, fired++)
+ write(pipes[i * space + 1], "e", 1);
+
+ count = 0;
+ writes = num_writes;
+ { int xcount = 0;
+ gettimeofday(&ts, NULL);
+ do {
+ event_loop(EVLOOP_ONCE | EVLOOP_NONBLOCK);
+ xcount++;
+ } while (count != fired);
+ gettimeofday(&te, NULL);
+
+ if (xcount != count) fprintf(stderr, "Xcount: %d, Rcount: %d\n", xcount, count);
+ }
+
+ evutil_timersub(&te, &ts, &te);
+
+ return (&te);
+}
+
+int
+main (int argc, char **argv)
+{
+#ifndef WIN32
+ struct rlimit rl;
+#endif
+ int i, c;
+ struct timeval *tv;
+ int *cp;
+
+ num_pipes = 100;
+ num_active = 1;
+ num_writes = num_pipes;
+ while ((c = getopt(argc, argv, "n:a:w:")) != -1) {
+ switch (c) {
+ case 'n':
+ num_pipes = atoi(optarg);
+ break;
+ case 'a':
+ num_active = atoi(optarg);
+ break;
+ case 'w':
+ num_writes = atoi(optarg);
+ break;
+ default:
+ fprintf(stderr, "Illegal argument \"%c\"\n", c);
+ exit(1);
+ }
+ }
+
+#ifndef WIN32
+ rl.rlim_cur = rl.rlim_max = num_pipes * 2 + 50;
+ if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
+ perror("setrlimit");
+ exit(1);
+ }
+#endif
+
+ events = calloc(num_pipes, sizeof(struct event));
+ pipes = calloc(num_pipes * 2, sizeof(int));
+ if (events == NULL || pipes == NULL) {
+ perror("malloc");
+ exit(1);
+ }
+
+ event_init();
+
+ for (cp = pipes, i = 0; i < num_pipes; i++, cp += 2) {
+#ifdef USE_PIPES
+ if (pipe(cp) == -1) {
+#else
+ if (evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, cp) == -1) {
+#endif
+ perror("pipe");
+ exit(1);
+ }
+ }
+
+ for (i = 0; i < 25; i++) {
+ tv = run_once();
+ if (tv == NULL)
+ exit(1);
+ fprintf(stdout, "%ld\n",
+ tv->tv_sec * 1000000L + tv->tv_usec);
+ }
+
+ exit(0);
+}
diff --git a/libevent/test/regress.c b/libevent/test/regress.c
new file mode 100644
index 00000000000..0b7517d3aa4
--- /dev/null
+++ b/libevent/test/regress.c
@@ -0,0 +1,1703 @@
+/*
+ * Copyright (c) 2003, 2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#ifndef WIN32
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <unistd.h>
+#include <netdb.h>
+#endif
+#include <assert.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include "event.h"
+#include "evutil.h"
+#include "event-internal.h"
+#include "log.h"
+
+#include "regress.h"
+#ifndef WIN32
+#include "regress.gen.h"
+#endif
+
+int pair[2];
+int test_ok;
+static int called;
+static char wbuf[4096];
+static char rbuf[4096];
+static int woff;
+static int roff;
+static int usepersist;
+static struct timeval tset;
+static struct timeval tcalled;
+static struct event_base *global_base;
+
+#define TEST1 "this is a test"
+#define SECONDS 1
+
+#ifndef SHUT_WR
+#define SHUT_WR 1
+#endif
+
+#ifdef WIN32
+#define write(fd,buf,len) send((fd),(buf),(len),0)
+#define read(fd,buf,len) recv((fd),(buf),(len),0)
+#endif
+
+static void
+simple_read_cb(int fd, short event, void *arg)
+{
+ char buf[256];
+ int len;
+
+ if (arg == NULL)
+ return;
+
+ len = read(fd, buf, sizeof(buf));
+
+ if (len) {
+ if (!called) {
+ if (event_add(arg, NULL) == -1)
+ exit(1);
+ }
+ } else if (called == 1)
+ test_ok = 1;
+
+ called++;
+}
+
+static void
+simple_write_cb(int fd, short event, void *arg)
+{
+ int len;
+
+ if (arg == NULL)
+ return;
+
+ len = write(fd, TEST1, strlen(TEST1) + 1);
+ if (len == -1)
+ test_ok = 0;
+ else
+ test_ok = 1;
+}
+
+static void
+multiple_write_cb(int fd, short event, void *arg)
+{
+ struct event *ev = arg;
+ int len;
+
+ len = 128;
+ if (woff + len >= sizeof(wbuf))
+ len = sizeof(wbuf) - woff;
+
+ len = write(fd, wbuf + woff, len);
+ if (len == -1) {
+ fprintf(stderr, "%s: write\n", __func__);
+ if (usepersist)
+ event_del(ev);
+ return;
+ }
+
+ woff += len;
+
+ if (woff >= sizeof(wbuf)) {
+ shutdown(fd, SHUT_WR);
+ if (usepersist)
+ event_del(ev);
+ return;
+ }
+
+ if (!usepersist) {
+ if (event_add(ev, NULL) == -1)
+ exit(1);
+ }
+}
+
+static void
+multiple_read_cb(int fd, short event, void *arg)
+{
+ struct event *ev = arg;
+ int len;
+
+ len = read(fd, rbuf + roff, sizeof(rbuf) - roff);
+ if (len == -1)
+ fprintf(stderr, "%s: read\n", __func__);
+ if (len <= 0) {
+ if (usepersist)
+ event_del(ev);
+ return;
+ }
+
+ roff += len;
+ if (!usepersist) {
+ if (event_add(ev, NULL) == -1)
+ exit(1);
+ }
+}
+
+static void
+timeout_cb(int fd, short event, void *arg)
+{
+ struct timeval tv;
+ int diff;
+
+ evutil_gettimeofday(&tcalled, NULL);
+ if (evutil_timercmp(&tcalled, &tset, >))
+ evutil_timersub(&tcalled, &tset, &tv);
+ else
+ evutil_timersub(&tset, &tcalled, &tv);
+
+ diff = tv.tv_sec*1000 + tv.tv_usec/1000 - SECONDS * 1000;
+ if (diff < 0)
+ diff = -diff;
+
+ if (diff < 100)
+ test_ok = 1;
+}
+
+#ifndef WIN32
+static void
+signal_cb_sa(int sig)
+{
+ test_ok = 2;
+}
+
+static void
+signal_cb(int fd, short event, void *arg)
+{
+ struct event *ev = arg;
+
+ signal_del(ev);
+ test_ok = 1;
+}
+#endif
+
+struct both {
+ struct event ev;
+ int nread;
+};
+
+static void
+combined_read_cb(int fd, short event, void *arg)
+{
+ struct both *both = arg;
+ char buf[128];
+ int len;
+
+ len = read(fd, buf, sizeof(buf));
+ if (len == -1)
+ fprintf(stderr, "%s: read\n", __func__);
+ if (len <= 0)
+ return;
+
+ both->nread += len;
+ if (event_add(&both->ev, NULL) == -1)
+ exit(1);
+}
+
+static void
+combined_write_cb(int fd, short event, void *arg)
+{
+ struct both *both = arg;
+ char buf[128];
+ int len;
+
+ len = sizeof(buf);
+ if (len > both->nread)
+ len = both->nread;
+
+ len = write(fd, buf, len);
+ if (len == -1)
+ fprintf(stderr, "%s: write\n", __func__);
+ if (len <= 0) {
+ shutdown(fd, SHUT_WR);
+ return;
+ }
+
+ both->nread -= len;
+ if (event_add(&both->ev, NULL) == -1)
+ exit(1);
+}
+
+/* Test infrastructure */
+
+static int
+setup_test(const char *name)
+{
+
+ fprintf(stdout, "%s", name);
+
+ if (evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == -1) {
+ fprintf(stderr, "%s: socketpair\n", __func__);
+ exit(1);
+ }
+
+#ifdef HAVE_FCNTL
+ if (fcntl(pair[0], F_SETFL, O_NONBLOCK) == -1)
+ fprintf(stderr, "fcntl(O_NONBLOCK)");
+
+ if (fcntl(pair[1], F_SETFL, O_NONBLOCK) == -1)
+ fprintf(stderr, "fcntl(O_NONBLOCK)");
+#endif
+
+ test_ok = 0;
+ called = 0;
+ return (0);
+}
+
+static int
+cleanup_test(void)
+{
+#ifndef WIN32
+ close(pair[0]);
+ close(pair[1]);
+#else
+ CloseHandle((HANDLE)pair[0]);
+ CloseHandle((HANDLE)pair[1]);
+#endif
+ if (test_ok)
+ fprintf(stdout, "OK\n");
+ else {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+ test_ok = 0;
+ return (0);
+}
+
+static void
+test_registerfds(void)
+{
+ int i, j;
+ int pair[2];
+ struct event read_evs[512];
+ struct event write_evs[512];
+
+ struct event_base *base = event_base_new();
+
+ fprintf(stdout, "Testing register fds: ");
+
+ for (i = 0; i < 512; ++i) {
+ if (evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == -1) {
+ /* run up to the limit of file descriptors */
+ break;
+ }
+ event_set(&read_evs[i], pair[0],
+ EV_READ|EV_PERSIST, simple_read_cb, NULL);
+ event_base_set(base, &read_evs[i]);
+ event_add(&read_evs[i], NULL);
+ event_set(&write_evs[i], pair[1],
+ EV_WRITE|EV_PERSIST, simple_write_cb, NULL);
+ event_base_set(base, &write_evs[i]);
+ event_add(&write_evs[i], NULL);
+
+ /* just loop once */
+ event_base_loop(base, EVLOOP_ONCE);
+ }
+
+ /* now delete everything */
+ for (j = 0; j < i; ++j) {
+ event_del(&read_evs[j]);
+ event_del(&write_evs[j]);
+#ifndef WIN32
+ close(read_evs[j].ev_fd);
+ close(write_evs[j].ev_fd);
+#else
+ CloseHandle((HANDLE)read_evs[j].ev_fd);
+ CloseHandle((HANDLE)write_evs[j].ev_fd);
+#endif
+
+ /* just loop once */
+ event_base_loop(base, EVLOOP_ONCE);
+ }
+
+ event_base_free(base);
+
+ fprintf(stdout, "OK\n");
+}
+
+static void
+test_simpleread(void)
+{
+ struct event ev;
+
+ /* Very simple read test */
+ setup_test("Simple read: ");
+
+ write(pair[0], TEST1, strlen(TEST1)+1);
+ shutdown(pair[0], SHUT_WR);
+
+ event_set(&ev, pair[1], EV_READ, simple_read_cb, &ev);
+ if (event_add(&ev, NULL) == -1)
+ exit(1);
+ event_dispatch();
+
+ cleanup_test();
+}
+
+static void
+test_simplewrite(void)
+{
+ struct event ev;
+
+ /* Very simple write test */
+ setup_test("Simple write: ");
+
+ event_set(&ev, pair[0], EV_WRITE, simple_write_cb, &ev);
+ if (event_add(&ev, NULL) == -1)
+ exit(1);
+ event_dispatch();
+
+ cleanup_test();
+}
+
+static void
+test_multiple(void)
+{
+ struct event ev, ev2;
+ int i;
+
+ /* Multiple read and write test */
+ setup_test("Multiple read/write: ");
+ memset(rbuf, 0, sizeof(rbuf));
+ for (i = 0; i < sizeof(wbuf); i++)
+ wbuf[i] = i;
+
+ roff = woff = 0;
+ usepersist = 0;
+
+ event_set(&ev, pair[0], EV_WRITE, multiple_write_cb, &ev);
+ if (event_add(&ev, NULL) == -1)
+ exit(1);
+ event_set(&ev2, pair[1], EV_READ, multiple_read_cb, &ev2);
+ if (event_add(&ev2, NULL) == -1)
+ exit(1);
+ event_dispatch();
+
+ if (roff == woff)
+ test_ok = memcmp(rbuf, wbuf, sizeof(wbuf)) == 0;
+
+ cleanup_test();
+}
+
+static void
+test_persistent(void)
+{
+ struct event ev, ev2;
+ int i;
+
+ /* Multiple read and write test with persist */
+ setup_test("Persist read/write: ");
+ memset(rbuf, 0, sizeof(rbuf));
+ for (i = 0; i < sizeof(wbuf); i++)
+ wbuf[i] = i;
+
+ roff = woff = 0;
+ usepersist = 1;
+
+ event_set(&ev, pair[0], EV_WRITE|EV_PERSIST, multiple_write_cb, &ev);
+ if (event_add(&ev, NULL) == -1)
+ exit(1);
+ event_set(&ev2, pair[1], EV_READ|EV_PERSIST, multiple_read_cb, &ev2);
+ if (event_add(&ev2, NULL) == -1)
+ exit(1);
+ event_dispatch();
+
+ if (roff == woff)
+ test_ok = memcmp(rbuf, wbuf, sizeof(wbuf)) == 0;
+
+ cleanup_test();
+}
+
+static void
+test_combined(void)
+{
+ struct both r1, r2, w1, w2;
+
+ setup_test("Combined read/write: ");
+ memset(&r1, 0, sizeof(r1));
+ memset(&r2, 0, sizeof(r2));
+ memset(&w1, 0, sizeof(w1));
+ memset(&w2, 0, sizeof(w2));
+
+ w1.nread = 4096;
+ w2.nread = 8192;
+
+ event_set(&r1.ev, pair[0], EV_READ, combined_read_cb, &r1);
+ event_set(&w1.ev, pair[0], EV_WRITE, combined_write_cb, &w1);
+ event_set(&r2.ev, pair[1], EV_READ, combined_read_cb, &r2);
+ event_set(&w2.ev, pair[1], EV_WRITE, combined_write_cb, &w2);
+ if (event_add(&r1.ev, NULL) == -1)
+ exit(1);
+ if (event_add(&w1.ev, NULL))
+ exit(1);
+ if (event_add(&r2.ev, NULL))
+ exit(1);
+ if (event_add(&w2.ev, NULL))
+ exit(1);
+
+ event_dispatch();
+
+ if (r1.nread == 8192 && r2.nread == 4096)
+ test_ok = 1;
+
+ cleanup_test();
+}
+
+static void
+test_simpletimeout(void)
+{
+ struct timeval tv;
+ struct event ev;
+
+ setup_test("Simple timeout: ");
+
+ tv.tv_usec = 0;
+ tv.tv_sec = SECONDS;
+ evtimer_set(&ev, timeout_cb, NULL);
+ evtimer_add(&ev, &tv);
+
+ evutil_gettimeofday(&tset, NULL);
+ event_dispatch();
+
+ cleanup_test();
+}
+
+#ifndef WIN32
+extern struct event_base *current_base;
+
+static void
+child_signal_cb(int fd, short event, void *arg)
+{
+ struct timeval tv;
+ int *pint = arg;
+
+ *pint = 1;
+
+ tv.tv_usec = 500000;
+ tv.tv_sec = 0;
+ event_loopexit(&tv);
+}
+
+static void
+test_fork(void)
+{
+ int status, got_sigchld = 0;
+ struct event ev, sig_ev;
+ pid_t pid;
+
+ setup_test("After fork: ");
+
+ write(pair[0], TEST1, strlen(TEST1)+1);
+
+ event_set(&ev, pair[1], EV_READ, simple_read_cb, &ev);
+ if (event_add(&ev, NULL) == -1)
+ exit(1);
+
+ signal_set(&sig_ev, SIGCHLD, child_signal_cb, &got_sigchld);
+ signal_add(&sig_ev, NULL);
+
+ if ((pid = fork()) == 0) {
+ /* in the child */
+ if (event_reinit(current_base) == -1) {
+ fprintf(stderr, "FAILED (reinit)\n");
+ exit(1);
+ }
+
+ signal_del(&sig_ev);
+
+ called = 0;
+
+ event_dispatch();
+
+ /* we do not send an EOF; simple_read_cb requires an EOF
+ * to set test_ok. we just verify that the callback was
+ * called. */
+ exit(test_ok != 0 || called != 2 ? -2 : 76);
+ }
+
+ /* wait for the child to read the data */
+ sleep(1);
+
+ write(pair[0], TEST1, strlen(TEST1)+1);
+
+ if (waitpid(pid, &status, 0) == -1) {
+ fprintf(stderr, "FAILED (fork)\n");
+ exit(1);
+ }
+
+ if (WEXITSTATUS(status) != 76) {
+ fprintf(stderr, "FAILED (exit): %d\n", WEXITSTATUS(status));
+ exit(1);
+ }
+
+ /* test that the current event loop still works */
+ write(pair[0], TEST1, strlen(TEST1)+1);
+ shutdown(pair[0], SHUT_WR);
+
+ event_dispatch();
+
+ if (!got_sigchld) {
+ fprintf(stdout, "FAILED (sigchld)\n");
+ exit(1);
+ }
+
+ signal_del(&sig_ev);
+
+ cleanup_test();
+}
+
+static void
+test_simplesignal(void)
+{
+ struct event ev;
+ struct itimerval itv;
+
+ setup_test("Simple signal: ");
+ signal_set(&ev, SIGALRM, signal_cb, &ev);
+ signal_add(&ev, NULL);
+ /* find bugs in which operations are re-ordered */
+ signal_del(&ev);
+ signal_add(&ev, NULL);
+
+ memset(&itv, 0, sizeof(itv));
+ itv.it_value.tv_sec = 1;
+ if (setitimer(ITIMER_REAL, &itv, NULL) == -1)
+ goto skip_simplesignal;
+
+ event_dispatch();
+ skip_simplesignal:
+ if (signal_del(&ev) == -1)
+ test_ok = 0;
+
+ cleanup_test();
+}
+
+static void
+test_multiplesignal(void)
+{
+ struct event ev_one, ev_two;
+ struct itimerval itv;
+
+ setup_test("Multiple signal: ");
+
+ signal_set(&ev_one, SIGALRM, signal_cb, &ev_one);
+ signal_add(&ev_one, NULL);
+
+ signal_set(&ev_two, SIGALRM, signal_cb, &ev_two);
+ signal_add(&ev_two, NULL);
+
+ memset(&itv, 0, sizeof(itv));
+ itv.it_value.tv_sec = 1;
+ if (setitimer(ITIMER_REAL, &itv, NULL) == -1)
+ goto skip_simplesignal;
+
+ event_dispatch();
+
+ skip_simplesignal:
+ if (signal_del(&ev_one) == -1)
+ test_ok = 0;
+ if (signal_del(&ev_two) == -1)
+ test_ok = 0;
+
+ cleanup_test();
+}
+
+static void
+test_immediatesignal(void)
+{
+ struct event ev;
+
+ test_ok = 0;
+ printf("Immediate signal: ");
+ signal_set(&ev, SIGUSR1, signal_cb, &ev);
+ signal_add(&ev, NULL);
+ raise(SIGUSR1);
+ event_loop(EVLOOP_NONBLOCK);
+ signal_del(&ev);
+ cleanup_test();
+}
+
+static void
+test_signal_dealloc(void)
+{
+ /* make sure that signal_event is event_del'ed and pipe closed */
+ struct event ev;
+ struct event_base *base = event_init();
+ printf("Signal dealloc: ");
+ signal_set(&ev, SIGUSR1, signal_cb, &ev);
+ signal_add(&ev, NULL);
+ signal_del(&ev);
+ event_base_free(base);
+ /* If we got here without asserting, we're fine. */
+ test_ok = 1;
+ cleanup_test();
+}
+
+static void
+test_signal_pipeloss(void)
+{
+ /* make sure that the base1 pipe is closed correctly. */
+ struct event_base *base1, *base2;
+ int pipe1;
+ test_ok = 0;
+ printf("Signal pipeloss: ");
+ base1 = event_init();
+ pipe1 = base1->sig.ev_signal_pair[0];
+ base2 = event_init();
+ event_base_free(base2);
+ event_base_free(base1);
+ if (close(pipe1) != -1 || errno!=EBADF) {
+ /* fd must be closed, so second close gives -1, EBADF */
+ printf("signal pipe not closed. ");
+ test_ok = 0;
+ } else {
+ test_ok = 1;
+ }
+ cleanup_test();
+}
+
+/*
+ * make two bases to catch signals, use both of them. this only works
+ * for event mechanisms that use our signal pipe trick. kqueue handles
+ * signals internally, and all interested kqueues get all the signals.
+ */
+static void
+test_signal_switchbase(void)
+{
+ struct event ev1, ev2;
+ struct event_base *base1, *base2;
+ int is_kqueue;
+ test_ok = 0;
+ printf("Signal switchbase: ");
+ base1 = event_init();
+ base2 = event_init();
+ is_kqueue = !strcmp(event_get_method(),"kqueue");
+ signal_set(&ev1, SIGUSR1, signal_cb, &ev1);
+ signal_set(&ev2, SIGUSR1, signal_cb, &ev2);
+ if (event_base_set(base1, &ev1) ||
+ event_base_set(base2, &ev2) ||
+ event_add(&ev1, NULL) ||
+ event_add(&ev2, NULL)) {
+ fprintf(stderr, "%s: cannot set base, add\n", __func__);
+ exit(1);
+ }
+
+ test_ok = 0;
+ /* can handle signal before loop is called */
+ raise(SIGUSR1);
+ event_base_loop(base2, EVLOOP_NONBLOCK);
+ if (is_kqueue) {
+ if (!test_ok)
+ goto done;
+ test_ok = 0;
+ }
+ event_base_loop(base1, EVLOOP_NONBLOCK);
+ if (test_ok && !is_kqueue) {
+ test_ok = 0;
+
+ /* set base1 to handle signals */
+ event_base_loop(base1, EVLOOP_NONBLOCK);
+ raise(SIGUSR1);
+ event_base_loop(base1, EVLOOP_NONBLOCK);
+ event_base_loop(base2, EVLOOP_NONBLOCK);
+ }
+ done:
+ event_base_free(base1);
+ event_base_free(base2);
+ cleanup_test();
+}
+
+/*
+ * assert that a signal event removed from the event queue really is
+ * removed - with no possibility of it's parent handler being fired.
+ */
+static void
+test_signal_assert(void)
+{
+ struct event ev;
+ struct event_base *base = event_init();
+ test_ok = 0;
+ printf("Signal handler assert: ");
+ /* use SIGCONT so we don't kill ourselves when we signal to nowhere */
+ signal_set(&ev, SIGCONT, signal_cb, &ev);
+ signal_add(&ev, NULL);
+ /*
+ * if signal_del() fails to reset the handler, it's current handler
+ * will still point to evsignal_handler().
+ */
+ signal_del(&ev);
+
+ raise(SIGCONT);
+ /* only way to verify we were in evsignal_handler() */
+ if (base->sig.evsignal_caught)
+ test_ok = 0;
+ else
+ test_ok = 1;
+
+ event_base_free(base);
+ cleanup_test();
+ return;
+}
+
+/*
+ * assert that we restore our previous signal handler properly.
+ */
+static void
+test_signal_restore(void)
+{
+ struct event ev;
+ struct event_base *base = event_init();
+#ifdef HAVE_SIGACTION
+ struct sigaction sa;
+#endif
+
+ test_ok = 0;
+ printf("Signal handler restore: ");
+#ifdef HAVE_SIGACTION
+ sa.sa_handler = signal_cb_sa;
+ sa.sa_flags = 0x0;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL) == -1)
+ goto out;
+#else
+ if (signal(SIGUSR1, signal_cb_sa) == SIG_ERR)
+ goto out;
+#endif
+ signal_set(&ev, SIGUSR1, signal_cb, &ev);
+ signal_add(&ev, NULL);
+ signal_del(&ev);
+
+ raise(SIGUSR1);
+ /* 1 == signal_cb, 2 == signal_cb_sa, we want our previous handler */
+ if (test_ok != 2)
+ test_ok = 0;
+out:
+ event_base_free(base);
+ cleanup_test();
+ return;
+}
+
+static void
+signal_cb_swp(int sig, short event, void *arg)
+{
+ called++;
+ if (called < 5)
+ raise(sig);
+ else
+ event_loopexit(NULL);
+}
+static void
+timeout_cb_swp(int fd, short event, void *arg)
+{
+ if (called == -1) {
+ struct timeval tv = {5, 0};
+
+ called = 0;
+ evtimer_add((struct event *)arg, &tv);
+ raise(SIGUSR1);
+ return;
+ }
+ test_ok = 0;
+ event_loopexit(NULL);
+}
+
+static void
+test_signal_while_processing(void)
+{
+ struct event_base *base = event_init();
+ struct event ev, ev_timer;
+ struct timeval tv = {0, 0};
+
+ setup_test("Receiving a signal while processing other signal: ");
+
+ called = -1;
+ test_ok = 1;
+ signal_set(&ev, SIGUSR1, signal_cb_swp, NULL);
+ signal_add(&ev, NULL);
+ evtimer_set(&ev_timer, timeout_cb_swp, &ev_timer);
+ evtimer_add(&ev_timer, &tv);
+ event_dispatch();
+
+ event_base_free(base);
+ cleanup_test();
+ return;
+}
+#endif
+
+static void
+test_free_active_base(void)
+{
+ struct event_base *base1;
+ struct event ev1;
+ setup_test("Free active base: ");
+ base1 = event_init();
+ event_set(&ev1, pair[1], EV_READ, simple_read_cb, &ev1);
+ event_base_set(base1, &ev1);
+ event_add(&ev1, NULL);
+ /* event_del(&ev1); */
+ event_base_free(base1);
+ test_ok = 1;
+ cleanup_test();
+}
+
+static void
+test_event_base_new(void)
+{
+ struct event_base *base;
+ struct event ev1;
+ setup_test("Event base new: ");
+
+ write(pair[0], TEST1, strlen(TEST1)+1);
+ shutdown(pair[0], SHUT_WR);
+
+ base = event_base_new();
+ event_set(&ev1, pair[1], EV_READ, simple_read_cb, &ev1);
+ event_base_set(base, &ev1);
+ event_add(&ev1, NULL);
+
+ event_base_dispatch(base);
+
+ event_base_free(base);
+ test_ok = 1;
+ cleanup_test();
+}
+
+static void
+test_loopexit(void)
+{
+ struct timeval tv, tv_start, tv_end;
+ struct event ev;
+
+ setup_test("Loop exit: ");
+
+ tv.tv_usec = 0;
+ tv.tv_sec = 60*60*24;
+ evtimer_set(&ev, timeout_cb, NULL);
+ evtimer_add(&ev, &tv);
+
+ tv.tv_usec = 0;
+ tv.tv_sec = 1;
+ event_loopexit(&tv);
+
+ evutil_gettimeofday(&tv_start, NULL);
+ event_dispatch();
+ evutil_gettimeofday(&tv_end, NULL);
+ evutil_timersub(&tv_end, &tv_start, &tv_end);
+
+ evtimer_del(&ev);
+
+ if (tv.tv_sec < 2)
+ test_ok = 1;
+
+ cleanup_test();
+}
+
+static void
+test_loopexit_multiple(void)
+{
+ struct timeval tv;
+ struct event_base *base;
+
+ setup_test("Loop Multiple exit: ");
+
+ base = event_base_new();
+
+ tv.tv_usec = 0;
+ tv.tv_sec = 1;
+ event_base_loopexit(base, &tv);
+
+ tv.tv_usec = 0;
+ tv.tv_sec = 2;
+ event_base_loopexit(base, &tv);
+
+ event_base_dispatch(base);
+
+ event_base_free(base);
+
+ test_ok = 1;
+
+ cleanup_test();
+}
+
+static void
+break_cb(int fd, short events, void *arg)
+{
+ test_ok = 1;
+ event_loopbreak();
+}
+
+static void
+fail_cb(int fd, short events, void *arg)
+{
+ test_ok = 0;
+}
+
+static void
+test_loopbreak(void)
+{
+ struct event ev1, ev2;
+ struct timeval tv;
+
+ setup_test("Loop break: ");
+
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ evtimer_set(&ev1, break_cb, NULL);
+ evtimer_add(&ev1, &tv);
+ evtimer_set(&ev2, fail_cb, NULL);
+ evtimer_add(&ev2, &tv);
+
+ event_dispatch();
+
+ evtimer_del(&ev1);
+ evtimer_del(&ev2);
+
+ cleanup_test();
+}
+
+static void
+test_evbuffer(void) {
+
+ struct evbuffer *evb = evbuffer_new();
+ setup_test("Testing Evbuffer: ");
+
+ evbuffer_add_printf(evb, "%s/%d", "hello", 1);
+
+ if (EVBUFFER_LENGTH(evb) == 7 &&
+ strcmp((char*)EVBUFFER_DATA(evb), "hello/1") == 0)
+ test_ok = 1;
+
+ evbuffer_free(evb);
+
+ cleanup_test();
+}
+
+static void
+test_evbuffer_find(void)
+{
+ u_char* p;
+ const char* test1 = "1234567890\r\n";
+ const char* test2 = "1234567890\r";
+#define EVBUFFER_INITIAL_LENGTH 256
+ char test3[EVBUFFER_INITIAL_LENGTH];
+ unsigned int i;
+ struct evbuffer * buf = evbuffer_new();
+
+ /* make sure evbuffer_find doesn't match past the end of the buffer */
+ fprintf(stdout, "Testing evbuffer_find 1: ");
+ evbuffer_add(buf, (u_char*)test1, strlen(test1));
+ evbuffer_drain(buf, strlen(test1));
+ evbuffer_add(buf, (u_char*)test2, strlen(test2));
+ p = evbuffer_find(buf, (u_char*)"\r\n", 2);
+ if (p == NULL) {
+ fprintf(stdout, "OK\n");
+ } else {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /*
+ * drain the buffer and do another find; in r309 this would
+ * read past the allocated buffer causing a valgrind error.
+ */
+ fprintf(stdout, "Testing evbuffer_find 2: ");
+ evbuffer_drain(buf, strlen(test2));
+ for (i = 0; i < EVBUFFER_INITIAL_LENGTH; ++i)
+ test3[i] = 'a';
+ test3[EVBUFFER_INITIAL_LENGTH - 1] = 'x';
+ evbuffer_add(buf, (u_char *)test3, EVBUFFER_INITIAL_LENGTH);
+ p = evbuffer_find(buf, (u_char *)"xy", 2);
+ if (p == NULL) {
+ printf("OK\n");
+ } else {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /* simple test for match at end of allocated buffer */
+ fprintf(stdout, "Testing evbuffer_find 3: ");
+ p = evbuffer_find(buf, (u_char *)"ax", 2);
+ if (p != NULL && strncmp((char*)p, "ax", 2) == 0) {
+ printf("OK\n");
+ } else {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ evbuffer_free(buf);
+}
+
+/*
+ * simple bufferevent test
+ */
+
+static void
+readcb(struct bufferevent *bev, void *arg)
+{
+ if (EVBUFFER_LENGTH(bev->input) == 8333) {
+ bufferevent_disable(bev, EV_READ);
+ test_ok++;
+ }
+}
+
+static void
+writecb(struct bufferevent *bev, void *arg)
+{
+ if (EVBUFFER_LENGTH(bev->output) == 0)
+ test_ok++;
+}
+
+static void
+errorcb(struct bufferevent *bev, short what, void *arg)
+{
+ test_ok = -2;
+}
+
+static void
+test_bufferevent(void)
+{
+ struct bufferevent *bev1, *bev2;
+ char buffer[8333];
+ int i;
+
+ setup_test("Bufferevent: ");
+
+ bev1 = bufferevent_new(pair[0], readcb, writecb, errorcb, NULL);
+ bev2 = bufferevent_new(pair[1], readcb, writecb, errorcb, NULL);
+
+ bufferevent_disable(bev1, EV_READ);
+ bufferevent_enable(bev2, EV_READ);
+
+ for (i = 0; i < sizeof(buffer); i++)
+ buffer[i] = i;
+
+ bufferevent_write(bev1, buffer, sizeof(buffer));
+
+ event_dispatch();
+
+ bufferevent_free(bev1);
+ bufferevent_free(bev2);
+
+ if (test_ok != 2)
+ test_ok = 0;
+
+ cleanup_test();
+}
+
+/*
+ * test watermarks and bufferevent
+ */
+
+static void
+wm_readcb(struct bufferevent *bev, void *arg)
+{
+ int len = EVBUFFER_LENGTH(bev->input);
+ static int nread;
+
+ assert(len >= 10 && len <= 20);
+
+ evbuffer_drain(bev->input, len);
+
+ nread += len;
+ if (nread == 65000) {
+ bufferevent_disable(bev, EV_READ);
+ test_ok++;
+ }
+}
+
+static void
+wm_writecb(struct bufferevent *bev, void *arg)
+{
+ if (EVBUFFER_LENGTH(bev->output) == 0)
+ test_ok++;
+}
+
+static void
+wm_errorcb(struct bufferevent *bev, short what, void *arg)
+{
+ test_ok = -2;
+}
+
+static void
+test_bufferevent_watermarks(void)
+{
+ struct bufferevent *bev1, *bev2;
+ char buffer[65000];
+ int i;
+
+ setup_test("Bufferevent Watermarks: ");
+
+ bev1 = bufferevent_new(pair[0], NULL, wm_writecb, wm_errorcb, NULL);
+ bev2 = bufferevent_new(pair[1], wm_readcb, NULL, wm_errorcb, NULL);
+
+ bufferevent_disable(bev1, EV_READ);
+ bufferevent_enable(bev2, EV_READ);
+
+ for (i = 0; i < sizeof(buffer); i++)
+ buffer[i] = i;
+
+ bufferevent_write(bev1, buffer, sizeof(buffer));
+
+ /* limit the reading on the receiving bufferevent */
+ bufferevent_setwatermark(bev2, EV_READ, 10, 20);
+
+ event_dispatch();
+
+ bufferevent_free(bev1);
+ bufferevent_free(bev2);
+
+ if (test_ok != 2)
+ test_ok = 0;
+
+ cleanup_test();
+}
+
+struct test_pri_event {
+ struct event ev;
+ int count;
+};
+
+static void
+test_priorities_cb(int fd, short what, void *arg)
+{
+ struct test_pri_event *pri = arg;
+ struct timeval tv;
+
+ if (pri->count == 3) {
+ event_loopexit(NULL);
+ return;
+ }
+
+ pri->count++;
+
+ evutil_timerclear(&tv);
+ event_add(&pri->ev, &tv);
+}
+
+static void
+test_priorities(int npriorities)
+{
+ char buf[32];
+ struct test_pri_event one, two;
+ struct timeval tv;
+
+ evutil_snprintf(buf, sizeof(buf), "Testing Priorities %d: ", npriorities);
+ setup_test(buf);
+
+ event_base_priority_init(global_base, npriorities);
+
+ memset(&one, 0, sizeof(one));
+ memset(&two, 0, sizeof(two));
+
+ timeout_set(&one.ev, test_priorities_cb, &one);
+ if (event_priority_set(&one.ev, 0) == -1) {
+ fprintf(stderr, "%s: failed to set priority", __func__);
+ exit(1);
+ }
+
+ timeout_set(&two.ev, test_priorities_cb, &two);
+ if (event_priority_set(&two.ev, npriorities - 1) == -1) {
+ fprintf(stderr, "%s: failed to set priority", __func__);
+ exit(1);
+ }
+
+ evutil_timerclear(&tv);
+
+ if (event_add(&one.ev, &tv) == -1)
+ exit(1);
+ if (event_add(&two.ev, &tv) == -1)
+ exit(1);
+
+ event_dispatch();
+
+ event_del(&one.ev);
+ event_del(&two.ev);
+
+ if (npriorities == 1) {
+ if (one.count == 3 && two.count == 3)
+ test_ok = 1;
+ } else if (npriorities == 2) {
+ /* Two is called once because event_loopexit is priority 1 */
+ if (one.count == 3 && two.count == 1)
+ test_ok = 1;
+ } else {
+ if (one.count == 3 && two.count == 0)
+ test_ok = 1;
+ }
+
+ cleanup_test();
+}
+
+static void
+test_multiple_cb(int fd, short event, void *arg)
+{
+ if (event & EV_READ)
+ test_ok |= 1;
+ else if (event & EV_WRITE)
+ test_ok |= 2;
+}
+
+static void
+test_multiple_events_for_same_fd(void)
+{
+ struct event e1, e2;
+
+ setup_test("Multiple events for same fd: ");
+
+ event_set(&e1, pair[0], EV_READ, test_multiple_cb, NULL);
+ event_add(&e1, NULL);
+ event_set(&e2, pair[0], EV_WRITE, test_multiple_cb, NULL);
+ event_add(&e2, NULL);
+ event_loop(EVLOOP_ONCE);
+ event_del(&e2);
+ write(pair[1], TEST1, strlen(TEST1)+1);
+ event_loop(EVLOOP_ONCE);
+ event_del(&e1);
+
+ if (test_ok != 3)
+ test_ok = 0;
+
+ cleanup_test();
+}
+
+int evtag_decode_int(uint32_t *pnumber, struct evbuffer *evbuf);
+int evtag_encode_tag(struct evbuffer *evbuf, uint32_t number);
+int evtag_decode_tag(uint32_t *pnumber, struct evbuffer *evbuf);
+
+static void
+read_once_cb(int fd, short event, void *arg)
+{
+ char buf[256];
+ int len;
+
+ len = read(fd, buf, sizeof(buf));
+
+ if (called) {
+ test_ok = 0;
+ } else if (len) {
+ /* Assumes global pair[0] can be used for writing */
+ write(pair[0], TEST1, strlen(TEST1)+1);
+ test_ok = 1;
+ }
+
+ called++;
+}
+
+static void
+test_want_only_once(void)
+{
+ struct event ev;
+ struct timeval tv;
+
+ /* Very simple read test */
+ setup_test("Want read only once: ");
+
+ write(pair[0], TEST1, strlen(TEST1)+1);
+
+ /* Setup the loop termination */
+ evutil_timerclear(&tv);
+ tv.tv_sec = 1;
+ event_loopexit(&tv);
+
+ event_set(&ev, pair[1], EV_READ, read_once_cb, &ev);
+ if (event_add(&ev, NULL) == -1)
+ exit(1);
+ event_dispatch();
+
+ cleanup_test();
+}
+
+#define TEST_MAX_INT 6
+
+static void
+evtag_int_test(void)
+{
+ struct evbuffer *tmp = evbuffer_new();
+ uint32_t integers[TEST_MAX_INT] = {
+ 0xaf0, 0x1000, 0x1, 0xdeadbeef, 0x00, 0xbef000
+ };
+ uint32_t integer;
+ int i;
+
+ for (i = 0; i < TEST_MAX_INT; i++) {
+ int oldlen, newlen;
+ oldlen = EVBUFFER_LENGTH(tmp);
+ encode_int(tmp, integers[i]);
+ newlen = EVBUFFER_LENGTH(tmp);
+ fprintf(stdout, "\t\tencoded 0x%08x with %d bytes\n",
+ integers[i], newlen - oldlen);
+ }
+
+ for (i = 0; i < TEST_MAX_INT; i++) {
+ if (evtag_decode_int(&integer, tmp) == -1) {
+ fprintf(stderr, "decode %d failed", i);
+ exit(1);
+ }
+ if (integer != integers[i]) {
+ fprintf(stderr, "got %x, wanted %x",
+ integer, integers[i]);
+ exit(1);
+ }
+ }
+
+ if (EVBUFFER_LENGTH(tmp) != 0) {
+ fprintf(stderr, "trailing data");
+ exit(1);
+ }
+ evbuffer_free(tmp);
+
+ fprintf(stdout, "\t%s: OK\n", __func__);
+}
+
+static void
+evtag_fuzz(void)
+{
+ u_char buffer[4096];
+ struct evbuffer *tmp = evbuffer_new();
+ struct timeval tv;
+ int i, j;
+
+ int not_failed = 0;
+ for (j = 0; j < 100; j++) {
+ for (i = 0; i < sizeof(buffer); i++)
+ buffer[i] = rand();
+ evbuffer_drain(tmp, -1);
+ evbuffer_add(tmp, buffer, sizeof(buffer));
+
+ if (evtag_unmarshal_timeval(tmp, 0, &tv) != -1)
+ not_failed++;
+ }
+
+ /* The majority of decodes should fail */
+ if (not_failed >= 10) {
+ fprintf(stderr, "evtag_unmarshal should have failed");
+ exit(1);
+ }
+
+ /* Now insert some corruption into the tag length field */
+ evbuffer_drain(tmp, -1);
+ evutil_timerclear(&tv);
+ tv.tv_sec = 1;
+ evtag_marshal_timeval(tmp, 0, &tv);
+ evbuffer_add(tmp, buffer, sizeof(buffer));
+
+ EVBUFFER_DATA(tmp)[1] = 0xff;
+ if (evtag_unmarshal_timeval(tmp, 0, &tv) != -1) {
+ fprintf(stderr, "evtag_unmarshal_timeval should have failed");
+ exit(1);
+ }
+
+ evbuffer_free(tmp);
+
+ fprintf(stdout, "\t%s: OK\n", __func__);
+}
+
+static void
+evtag_tag_encoding(void)
+{
+ struct evbuffer *tmp = evbuffer_new();
+ uint32_t integers[TEST_MAX_INT] = {
+ 0xaf0, 0x1000, 0x1, 0xdeadbeef, 0x00, 0xbef000
+ };
+ uint32_t integer;
+ int i;
+
+ for (i = 0; i < TEST_MAX_INT; i++) {
+ int oldlen, newlen;
+ oldlen = EVBUFFER_LENGTH(tmp);
+ evtag_encode_tag(tmp, integers[i]);
+ newlen = EVBUFFER_LENGTH(tmp);
+ fprintf(stdout, "\t\tencoded 0x%08x with %d bytes\n",
+ integers[i], newlen - oldlen);
+ }
+
+ for (i = 0; i < TEST_MAX_INT; i++) {
+ if (evtag_decode_tag(&integer, tmp) == -1) {
+ fprintf(stderr, "decode %d failed", i);
+ exit(1);
+ }
+ if (integer != integers[i]) {
+ fprintf(stderr, "got %x, wanted %x",
+ integer, integers[i]);
+ exit(1);
+ }
+ }
+
+ if (EVBUFFER_LENGTH(tmp) != 0) {
+ fprintf(stderr, "trailing data");
+ exit(1);
+ }
+ evbuffer_free(tmp);
+
+ fprintf(stdout, "\t%s: OK\n", __func__);
+}
+
+static void
+evtag_test(void)
+{
+ fprintf(stdout, "Testing Tagging:\n");
+
+ evtag_init();
+ evtag_int_test();
+ evtag_fuzz();
+
+ evtag_tag_encoding();
+
+ fprintf(stdout, "OK\n");
+}
+
+#ifndef WIN32
+static void
+rpc_test(void)
+{
+ struct msg *msg, *msg2;
+ struct kill *attack;
+ struct run *run;
+ struct evbuffer *tmp = evbuffer_new();
+ struct timeval tv_start, tv_end;
+ uint32_t tag;
+ int i;
+
+ fprintf(stdout, "Testing RPC: ");
+
+ msg = msg_new();
+ EVTAG_ASSIGN(msg, from_name, "niels");
+ EVTAG_ASSIGN(msg, to_name, "phoenix");
+
+ if (EVTAG_GET(msg, attack, &attack) == -1) {
+ fprintf(stderr, "Failed to set kill message.\n");
+ exit(1);
+ }
+
+ EVTAG_ASSIGN(attack, weapon, "feather");
+ EVTAG_ASSIGN(attack, action, "tickle");
+
+ evutil_gettimeofday(&tv_start, NULL);
+ for (i = 0; i < 1000; ++i) {
+ run = EVTAG_ADD(msg, run);
+ if (run == NULL) {
+ fprintf(stderr, "Failed to add run message.\n");
+ exit(1);
+ }
+ EVTAG_ASSIGN(run, how, "very fast but with some data in it");
+ EVTAG_ASSIGN(run, fixed_bytes,
+ (unsigned char*)"012345678901234567890123");
+ }
+
+ if (msg_complete(msg) == -1) {
+ fprintf(stderr, "Failed to make complete message.\n");
+ exit(1);
+ }
+
+ evtag_marshal_msg(tmp, 0xdeaf, msg);
+
+ if (evtag_peek(tmp, &tag) == -1) {
+ fprintf(stderr, "Failed to peak tag.\n");
+ exit (1);
+ }
+
+ if (tag != 0xdeaf) {
+ fprintf(stderr, "Got incorrect tag: %0x.\n", tag);
+ exit (1);
+ }
+
+ msg2 = msg_new();
+ if (evtag_unmarshal_msg(tmp, 0xdeaf, msg2) == -1) {
+ fprintf(stderr, "Failed to unmarshal message.\n");
+ exit(1);
+ }
+
+ evutil_gettimeofday(&tv_end, NULL);
+ evutil_timersub(&tv_end, &tv_start, &tv_end);
+ fprintf(stderr, "(%.1f us/add) ",
+ (float)tv_end.tv_sec/(float)i * 1000000.0 +
+ tv_end.tv_usec / (float)i);
+
+ if (!EVTAG_HAS(msg2, from_name) ||
+ !EVTAG_HAS(msg2, to_name) ||
+ !EVTAG_HAS(msg2, attack)) {
+ fprintf(stderr, "Missing data structures.\n");
+ exit(1);
+ }
+
+ if (EVTAG_LEN(msg2, run) != i) {
+ fprintf(stderr, "Wrong number of run messages.\n");
+ exit(1);
+ }
+
+ msg_free(msg);
+ msg_free(msg2);
+
+ evbuffer_free(tmp);
+
+ fprintf(stdout, "OK\n");
+}
+#endif
+
+static void
+test_evutil_strtoll(void)
+{
+ const char *s;
+ char *endptr;
+ setup_test("evutil_stroll: ");
+ test_ok = 0;
+
+ if (evutil_strtoll("5000000000", NULL, 10) != ((ev_int64_t)5000000)*1000)
+ goto err;
+ if (evutil_strtoll("-5000000000", NULL, 10) != ((ev_int64_t)5000000)*-1000)
+ goto err;
+ s = " 99999stuff";
+ if (evutil_strtoll(s, &endptr, 10) != (ev_int64_t)99999)
+ goto err;
+ if (endptr != s+6)
+ goto err;
+ if (evutil_strtoll("foo", NULL, 10) != 0)
+ goto err;
+
+ test_ok = 1;
+ err:
+ cleanup_test();
+}
+
+
+int
+main (int argc, char **argv)
+{
+#ifdef WIN32
+ WORD wVersionRequested;
+ WSADATA wsaData;
+ int err;
+
+ wVersionRequested = MAKEWORD( 2, 2 );
+
+ err = WSAStartup( wVersionRequested, &wsaData );
+#endif
+
+#ifndef WIN32
+ if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
+ return (1);
+#endif
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ /* Initalize the event library */
+ global_base = event_init();
+
+ test_registerfds();
+
+ test_evutil_strtoll();
+
+ /* use the global event base and need to be called first */
+ test_priorities(1);
+ test_priorities(2);
+ test_priorities(3);
+
+ test_evbuffer();
+ test_evbuffer_find();
+
+ test_bufferevent();
+ test_bufferevent_watermarks();
+
+ test_free_active_base();
+
+ test_event_base_new();
+
+ http_suite();
+
+#ifndef WIN32
+ rpc_suite();
+#endif
+
+ dns_suite();
+
+#ifndef WIN32
+ test_fork();
+#endif
+
+ test_simpleread();
+
+ test_simplewrite();
+
+ test_multiple();
+
+ test_persistent();
+
+ test_combined();
+
+ test_simpletimeout();
+#ifndef WIN32
+ test_simplesignal();
+ test_multiplesignal();
+ test_immediatesignal();
+#endif
+ test_loopexit();
+ test_loopbreak();
+
+ test_loopexit_multiple();
+
+ test_multiple_events_for_same_fd();
+
+ test_want_only_once();
+
+ evtag_test();
+
+#ifndef WIN32
+ rpc_test();
+
+ test_signal_dealloc();
+ test_signal_pipeloss();
+ test_signal_switchbase();
+ test_signal_restore();
+ test_signal_assert();
+ test_signal_while_processing();
+#endif
+
+ return (0);
+}
+
diff --git a/libevent/test/regress.gen.c b/libevent/test/regress.gen.c
new file mode 100644
index 00000000000..ff31096a7c2
--- /dev/null
+++ b/libevent/test/regress.gen.c
@@ -0,0 +1,872 @@
+/*
+ * Automatically generated from ./regress.rpc
+ * by event_rpcgen.py/0.1. DO NOT EDIT THIS FILE.
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <event.h>
+
+
+#include "./regress.gen.h"
+
+void event_err(int eval, const char *fmt, ...);
+void event_warn(const char *fmt, ...);
+void event_errx(int eval, const char *fmt, ...);
+void event_warnx(const char *fmt, ...);
+
+
+/*
+ * Implementation of msg
+ */
+
+static struct msg_access_ __msg_base = {
+ msg_from_name_assign,
+ msg_from_name_get,
+ msg_to_name_assign,
+ msg_to_name_get,
+ msg_attack_assign,
+ msg_attack_get,
+ msg_run_assign,
+ msg_run_get,
+ msg_run_add,
+};
+
+struct msg *
+msg_new(void)
+{
+ struct msg *tmp;
+ if ((tmp = malloc(sizeof(struct msg))) == NULL) {
+ event_warn("%s: malloc", __func__);
+ return (NULL);
+ }
+ tmp->base = &__msg_base;
+
+ tmp->from_name_data = NULL;
+ tmp->from_name_set = 0;
+
+ tmp->to_name_data = NULL;
+ tmp->to_name_set = 0;
+
+ tmp->attack_data = NULL;
+ tmp->attack_set = 0;
+
+ tmp->run_data = NULL;
+ tmp->run_length = 0;
+ tmp->run_num_allocated = 0;
+ tmp->run_set = 0;
+
+ return (tmp);
+}
+
+
+
+
+struct run *
+msg_run_add(struct msg *msg)
+{
+ if (++msg->run_length >= msg->run_num_allocated) {
+ int tobe_allocated = msg->run_num_allocated;
+ struct run ** new_data = NULL;
+ tobe_allocated = !tobe_allocated ? 1 : tobe_allocated << 1;
+ new_data = (struct run **) realloc(msg->run_data,
+ tobe_allocated * sizeof(struct run *));
+ if (new_data == NULL)
+ goto error;
+ msg->run_data = new_data;
+ msg->run_num_allocated = tobe_allocated;
+ }
+ msg->run_data[msg->run_length - 1] = run_new();
+ if (msg->run_data[msg->run_length - 1] == NULL)
+ goto error;
+ msg->run_set = 1;
+ return (msg->run_data[msg->run_length - 1]);
+error:
+ --msg->run_length;
+ return (NULL);
+}
+
+
+int
+msg_from_name_assign(struct msg *msg,
+ const char * value)
+{
+ if (msg->from_name_data != NULL)
+ free(msg->from_name_data);
+ if ((msg->from_name_data = strdup(value)) == NULL)
+ return (-1);
+ msg->from_name_set = 1;
+ return (0);
+}
+
+int
+msg_to_name_assign(struct msg *msg,
+ const char * value)
+{
+ if (msg->to_name_data != NULL)
+ free(msg->to_name_data);
+ if ((msg->to_name_data = strdup(value)) == NULL)
+ return (-1);
+ msg->to_name_set = 1;
+ return (0);
+}
+
+int
+msg_attack_assign(struct msg *msg,
+ const struct kill* value)
+{
+ struct evbuffer *tmp = NULL;
+ if (msg->attack_set) {
+ kill_clear(msg->attack_data);
+ msg->attack_set = 0;
+ } else {
+ msg->attack_data = kill_new();
+ if (msg->attack_data == NULL) {
+ event_warn("%s: kill_new()", __func__);
+ goto error;
+ }
+ }
+ if ((tmp = evbuffer_new()) == NULL) {
+ event_warn("%s: evbuffer_new()", __func__);
+ goto error;
+ }
+ kill_marshal(tmp, value);
+ if (kill_unmarshal(msg->attack_data, tmp) == -1) {
+ event_warnx("%s: kill_unmarshal", __func__);
+ goto error;
+ }
+ msg->attack_set = 1;
+ evbuffer_free(tmp);
+ return (0);
+ error:
+ if (tmp != NULL)
+ evbuffer_free(tmp);
+ if (msg->attack_data != NULL) {
+ kill_free(msg->attack_data);
+ msg->attack_data = NULL;
+ }
+ return (-1);
+}
+
+int
+msg_run_assign(struct msg *msg, int off,
+ const struct run * value)
+{
+ struct evbuffer *tmp = NULL;
+ if (!msg->run_set || off < 0 || off >= msg->run_length)
+ return (-1);
+ run_clear(msg->run_data[off]);
+ if ((tmp = evbuffer_new()) == NULL) {
+ event_warn("%s: evbuffer_new()", __func__);
+ goto error;
+ }
+ run_marshal(tmp, value);
+ if (run_unmarshal(msg->run_data[off], tmp) == -1) {
+ event_warnx("%s: run_unmarshal", __func__);
+ goto error;
+ }
+ evbuffer_free(tmp);
+ return (0);
+error:
+ if (tmp != NULL)
+ evbuffer_free(tmp);
+ run_clear(msg->run_data[off]);
+ return (-1);
+}
+
+int
+msg_from_name_get(struct msg *msg, char * *value)
+{
+ if (msg->from_name_set != 1)
+ return (-1);
+ *value = msg->from_name_data;
+ return (0);
+}
+
+int
+msg_to_name_get(struct msg *msg, char * *value)
+{
+ if (msg->to_name_set != 1)
+ return (-1);
+ *value = msg->to_name_data;
+ return (0);
+}
+
+int
+msg_attack_get(struct msg *msg, struct kill* *value)
+{
+ if (msg->attack_set != 1) {
+ msg->attack_data = kill_new();
+ if (msg->attack_data == NULL)
+ return (-1);
+ msg->attack_set = 1;
+ }
+ *value = msg->attack_data;
+ return (0);
+}
+
+int
+msg_run_get(struct msg *msg, int offset,
+ struct run * *value)
+{
+ if (!msg->run_set || offset < 0 || offset >= msg->run_length)
+ return (-1);
+ *value = msg->run_data[offset];
+ return (0);
+}
+
+void
+msg_clear(struct msg *tmp)
+{
+ if (tmp->from_name_set == 1) {
+ free (tmp->from_name_data);
+ tmp->from_name_data = NULL;
+ tmp->from_name_set = 0;
+ }
+ if (tmp->to_name_set == 1) {
+ free (tmp->to_name_data);
+ tmp->to_name_data = NULL;
+ tmp->to_name_set = 0;
+ }
+ if (tmp->attack_set == 1) {
+ kill_free(tmp->attack_data);
+ tmp->attack_data = NULL;
+ tmp->attack_set = 0;
+ }
+ if (tmp->run_set == 1) {
+ int i;
+ for (i = 0; i < tmp->run_length; ++i) {
+ run_free(tmp->run_data[i]);
+ }
+ free(tmp->run_data);
+ tmp->run_data = NULL;
+ tmp->run_set = 0;
+ tmp->run_length = 0;
+ tmp->run_num_allocated = 0;
+ }
+}
+
+void
+msg_free(struct msg *tmp)
+{
+ if (tmp->from_name_data != NULL)
+ free (tmp->from_name_data);
+ if (tmp->to_name_data != NULL)
+ free (tmp->to_name_data);
+ if (tmp->attack_data != NULL)
+ kill_free(tmp->attack_data);
+ if (tmp->run_data != NULL) {
+ int i;
+ for (i = 0; i < tmp->run_length; ++i) {
+ run_free(tmp->run_data[i]);
+ tmp->run_data[i] = NULL;
+ }
+ free(tmp->run_data);
+ tmp->run_data = NULL;
+ tmp->run_length = 0;
+ tmp->run_num_allocated = 0;
+ }
+ free(tmp);
+}
+
+void
+msg_marshal(struct evbuffer *evbuf, const struct msg *tmp){
+ evtag_marshal_string(evbuf, MSG_FROM_NAME, tmp->from_name_data);
+ evtag_marshal_string(evbuf, MSG_TO_NAME, tmp->to_name_data);
+ if (tmp->attack_set) {
+ evtag_marshal_kill(evbuf, MSG_ATTACK, tmp->attack_data);
+ }
+ {
+ int i;
+ for (i = 0; i < tmp->run_length; ++i) {
+ evtag_marshal_run(evbuf, MSG_RUN, tmp->run_data[i]);
+ }
+ }
+}
+
+int
+msg_unmarshal(struct msg *tmp, struct evbuffer *evbuf)
+{
+ uint32_t tag;
+ while (EVBUFFER_LENGTH(evbuf) > 0) {
+ if (evtag_peek(evbuf, &tag) == -1)
+ return (-1);
+ switch (tag) {
+
+ case MSG_FROM_NAME:
+
+ if (tmp->from_name_set)
+ return (-1);
+ if (evtag_unmarshal_string(evbuf, MSG_FROM_NAME, &tmp->from_name_data) == -1) {
+ event_warnx("%s: failed to unmarshal from_name", __func__);
+ return (-1);
+ }
+ tmp->from_name_set = 1;
+ break;
+
+ case MSG_TO_NAME:
+
+ if (tmp->to_name_set)
+ return (-1);
+ if (evtag_unmarshal_string(evbuf, MSG_TO_NAME, &tmp->to_name_data) == -1) {
+ event_warnx("%s: failed to unmarshal to_name", __func__);
+ return (-1);
+ }
+ tmp->to_name_set = 1;
+ break;
+
+ case MSG_ATTACK:
+
+ if (tmp->attack_set)
+ return (-1);
+ tmp->attack_data = kill_new();
+ if (tmp->attack_data == NULL)
+ return (-1);
+ if (evtag_unmarshal_kill(evbuf, MSG_ATTACK, tmp->attack_data) == -1) {
+ event_warnx("%s: failed to unmarshal attack", __func__);
+ return (-1);
+ }
+ tmp->attack_set = 1;
+ break;
+
+ case MSG_RUN:
+
+ if (msg_run_add(tmp) == NULL)
+ return (-1);
+ if (evtag_unmarshal_run(evbuf, MSG_RUN,
+ tmp->run_data[tmp->run_length - 1]) == -1) {
+ --tmp->run_length;
+ event_warnx("%s: failed to unmarshal run", __func__);
+ return (-1);
+ }
+ tmp->run_set = 1;
+ break;
+
+ default:
+ return -1;
+ }
+ }
+
+ if (msg_complete(tmp) == -1)
+ return (-1);
+ return (0);
+}
+
+int
+msg_complete(struct msg *msg)
+{
+ if (!msg->from_name_set)
+ return (-1);
+ if (!msg->to_name_set)
+ return (-1);
+ if (msg->attack_set && kill_complete(msg->attack_data) == -1)
+ return (-1);
+ {
+ int i;
+ for (i = 0; i < msg->run_length; ++i) {
+ if (run_complete(msg->run_data[i]) == -1)
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+int
+evtag_unmarshal_msg(struct evbuffer *evbuf, uint32_t need_tag, struct msg *msg)
+{
+ uint32_t tag;
+ int res = -1;
+
+ struct evbuffer *tmp = evbuffer_new();
+
+ if (evtag_unmarshal(evbuf, &tag, tmp) == -1 || tag != need_tag)
+ goto error;
+
+ if (msg_unmarshal(msg, tmp) == -1)
+ goto error;
+
+ res = 0;
+
+ error:
+ evbuffer_free(tmp);
+ return (res);
+}
+
+void
+evtag_marshal_msg(struct evbuffer *evbuf, uint32_t tag, const struct msg *msg)
+{
+ struct evbuffer *_buf = evbuffer_new();
+ assert(_buf != NULL);
+ evbuffer_drain(_buf, -1);
+ msg_marshal(_buf, msg);
+ evtag_marshal(evbuf, tag, EVBUFFER_DATA(_buf), EVBUFFER_LENGTH(_buf));
+ evbuffer_free(_buf);
+}
+
+/*
+ * Implementation of kill
+ */
+
+static struct kill_access_ __kill_base = {
+ kill_weapon_assign,
+ kill_weapon_get,
+ kill_action_assign,
+ kill_action_get,
+ kill_how_often_assign,
+ kill_how_often_get,
+};
+
+struct kill *
+kill_new(void)
+{
+ struct kill *tmp;
+ if ((tmp = malloc(sizeof(struct kill))) == NULL) {
+ event_warn("%s: malloc", __func__);
+ return (NULL);
+ }
+ tmp->base = &__kill_base;
+
+ tmp->weapon_data = NULL;
+ tmp->weapon_set = 0;
+
+ tmp->action_data = NULL;
+ tmp->action_set = 0;
+
+ tmp->how_often_data = 0;
+ tmp->how_often_set = 0;
+
+ return (tmp);
+}
+
+
+
+
+int
+kill_weapon_assign(struct kill *msg,
+ const char * value)
+{
+ if (msg->weapon_data != NULL)
+ free(msg->weapon_data);
+ if ((msg->weapon_data = strdup(value)) == NULL)
+ return (-1);
+ msg->weapon_set = 1;
+ return (0);
+}
+
+int
+kill_action_assign(struct kill *msg,
+ const char * value)
+{
+ if (msg->action_data != NULL)
+ free(msg->action_data);
+ if ((msg->action_data = strdup(value)) == NULL)
+ return (-1);
+ msg->action_set = 1;
+ return (0);
+}
+
+int
+kill_how_often_assign(struct kill *msg, const uint32_t value)
+{
+ msg->how_often_set = 1;
+ msg->how_often_data = value;
+ return (0);
+}
+
+int
+kill_weapon_get(struct kill *msg, char * *value)
+{
+ if (msg->weapon_set != 1)
+ return (-1);
+ *value = msg->weapon_data;
+ return (0);
+}
+
+int
+kill_action_get(struct kill *msg, char * *value)
+{
+ if (msg->action_set != 1)
+ return (-1);
+ *value = msg->action_data;
+ return (0);
+}
+
+int
+kill_how_often_get(struct kill *msg, uint32_t *value)
+{
+ if (msg->how_often_set != 1)
+ return (-1);
+ *value = msg->how_often_data;
+ return (0);
+}
+
+void
+kill_clear(struct kill *tmp)
+{
+ if (tmp->weapon_set == 1) {
+ free (tmp->weapon_data);
+ tmp->weapon_data = NULL;
+ tmp->weapon_set = 0;
+ }
+ if (tmp->action_set == 1) {
+ free (tmp->action_data);
+ tmp->action_data = NULL;
+ tmp->action_set = 0;
+ }
+ tmp->how_often_set = 0;
+}
+
+void
+kill_free(struct kill *tmp)
+{
+ if (tmp->weapon_data != NULL)
+ free (tmp->weapon_data);
+ if (tmp->action_data != NULL)
+ free (tmp->action_data);
+ free(tmp);
+}
+
+void
+kill_marshal(struct evbuffer *evbuf, const struct kill *tmp){
+ evtag_marshal_string(evbuf, KILL_WEAPON, tmp->weapon_data);
+ evtag_marshal_string(evbuf, KILL_ACTION, tmp->action_data);
+ if (tmp->how_often_set) {
+ evtag_marshal_int(evbuf, KILL_HOW_OFTEN, tmp->how_often_data);
+ }
+}
+
+int
+kill_unmarshal(struct kill *tmp, struct evbuffer *evbuf)
+{
+ uint32_t tag;
+ while (EVBUFFER_LENGTH(evbuf) > 0) {
+ if (evtag_peek(evbuf, &tag) == -1)
+ return (-1);
+ switch (tag) {
+
+ case KILL_WEAPON:
+
+ if (tmp->weapon_set)
+ return (-1);
+ if (evtag_unmarshal_string(evbuf, KILL_WEAPON, &tmp->weapon_data) == -1) {
+ event_warnx("%s: failed to unmarshal weapon", __func__);
+ return (-1);
+ }
+ tmp->weapon_set = 1;
+ break;
+
+ case KILL_ACTION:
+
+ if (tmp->action_set)
+ return (-1);
+ if (evtag_unmarshal_string(evbuf, KILL_ACTION, &tmp->action_data) == -1) {
+ event_warnx("%s: failed to unmarshal action", __func__);
+ return (-1);
+ }
+ tmp->action_set = 1;
+ break;
+
+ case KILL_HOW_OFTEN:
+
+ if (tmp->how_often_set)
+ return (-1);
+ if (evtag_unmarshal_int(evbuf, KILL_HOW_OFTEN, &tmp->how_often_data) == -1) {
+ event_warnx("%s: failed to unmarshal how_often", __func__);
+ return (-1);
+ }
+ tmp->how_often_set = 1;
+ break;
+
+ default:
+ return -1;
+ }
+ }
+
+ if (kill_complete(tmp) == -1)
+ return (-1);
+ return (0);
+}
+
+int
+kill_complete(struct kill *msg)
+{
+ if (!msg->weapon_set)
+ return (-1);
+ if (!msg->action_set)
+ return (-1);
+ return (0);
+}
+
+int
+evtag_unmarshal_kill(struct evbuffer *evbuf, uint32_t need_tag, struct kill *msg)
+{
+ uint32_t tag;
+ int res = -1;
+
+ struct evbuffer *tmp = evbuffer_new();
+
+ if (evtag_unmarshal(evbuf, &tag, tmp) == -1 || tag != need_tag)
+ goto error;
+
+ if (kill_unmarshal(msg, tmp) == -1)
+ goto error;
+
+ res = 0;
+
+ error:
+ evbuffer_free(tmp);
+ return (res);
+}
+
+void
+evtag_marshal_kill(struct evbuffer *evbuf, uint32_t tag, const struct kill *msg)
+{
+ struct evbuffer *_buf = evbuffer_new();
+ assert(_buf != NULL);
+ evbuffer_drain(_buf, -1);
+ kill_marshal(_buf, msg);
+ evtag_marshal(evbuf, tag, EVBUFFER_DATA(_buf), EVBUFFER_LENGTH(_buf));
+ evbuffer_free(_buf);
+}
+
+/*
+ * Implementation of run
+ */
+
+static struct run_access_ __run_base = {
+ run_how_assign,
+ run_how_get,
+ run_some_bytes_assign,
+ run_some_bytes_get,
+ run_fixed_bytes_assign,
+ run_fixed_bytes_get,
+};
+
+struct run *
+run_new(void)
+{
+ struct run *tmp;
+ if ((tmp = malloc(sizeof(struct run))) == NULL) {
+ event_warn("%s: malloc", __func__);
+ return (NULL);
+ }
+ tmp->base = &__run_base;
+
+ tmp->how_data = NULL;
+ tmp->how_set = 0;
+
+ tmp->some_bytes_data = NULL;
+ tmp->some_bytes_length = 0;
+ tmp->some_bytes_set = 0;
+
+ memset(tmp->fixed_bytes_data, 0, sizeof(tmp->fixed_bytes_data));
+ tmp->fixed_bytes_set = 0;
+
+ return (tmp);
+}
+
+
+
+
+int
+run_how_assign(struct run *msg,
+ const char * value)
+{
+ if (msg->how_data != NULL)
+ free(msg->how_data);
+ if ((msg->how_data = strdup(value)) == NULL)
+ return (-1);
+ msg->how_set = 1;
+ return (0);
+}
+
+int
+run_some_bytes_assign(struct run *msg, const uint8_t * value, uint32_t len)
+{
+ if (msg->some_bytes_data != NULL)
+ free (msg->some_bytes_data);
+ msg->some_bytes_data = malloc(len);
+ if (msg->some_bytes_data == NULL)
+ return (-1);
+ msg->some_bytes_set = 1;
+ msg->some_bytes_length = len;
+ memcpy(msg->some_bytes_data, value, len);
+ return (0);
+}
+
+int
+run_fixed_bytes_assign(struct run *msg, const uint8_t *value)
+{
+ msg->fixed_bytes_set = 1;
+ memcpy(msg->fixed_bytes_data, value, 24);
+ return (0);
+}
+
+int
+run_how_get(struct run *msg, char * *value)
+{
+ if (msg->how_set != 1)
+ return (-1);
+ *value = msg->how_data;
+ return (0);
+}
+
+int
+run_some_bytes_get(struct run *msg, uint8_t * *value, uint32_t *plen)
+{
+ if (msg->some_bytes_set != 1)
+ return (-1);
+ *value = msg->some_bytes_data;
+ *plen = msg->some_bytes_length;
+ return (0);
+}
+
+int
+run_fixed_bytes_get(struct run *msg, uint8_t **value)
+{
+ if (msg->fixed_bytes_set != 1)
+ return (-1);
+ *value = msg->fixed_bytes_data;
+ return (0);
+}
+
+void
+run_clear(struct run *tmp)
+{
+ if (tmp->how_set == 1) {
+ free (tmp->how_data);
+ tmp->how_data = NULL;
+ tmp->how_set = 0;
+ }
+ if (tmp->some_bytes_set == 1) {
+ free (tmp->some_bytes_data);
+ tmp->some_bytes_data = NULL;
+ tmp->some_bytes_length = 0;
+ tmp->some_bytes_set = 0;
+ }
+ tmp->fixed_bytes_set = 0;
+ memset(tmp->fixed_bytes_data, 0, sizeof(tmp->fixed_bytes_data));
+}
+
+void
+run_free(struct run *tmp)
+{
+ if (tmp->how_data != NULL)
+ free (tmp->how_data);
+ if (tmp->some_bytes_data != NULL)
+ free (tmp->some_bytes_data);
+ free(tmp);
+}
+
+void
+run_marshal(struct evbuffer *evbuf, const struct run *tmp){
+ evtag_marshal_string(evbuf, RUN_HOW, tmp->how_data);
+ if (tmp->some_bytes_set) {
+ evtag_marshal(evbuf, RUN_SOME_BYTES, tmp->some_bytes_data, tmp->some_bytes_length);
+ }
+ evtag_marshal(evbuf, RUN_FIXED_BYTES, tmp->fixed_bytes_data, sizeof(tmp->fixed_bytes_data));
+}
+
+int
+run_unmarshal(struct run *tmp, struct evbuffer *evbuf)
+{
+ uint32_t tag;
+ while (EVBUFFER_LENGTH(evbuf) > 0) {
+ if (evtag_peek(evbuf, &tag) == -1)
+ return (-1);
+ switch (tag) {
+
+ case RUN_HOW:
+
+ if (tmp->how_set)
+ return (-1);
+ if (evtag_unmarshal_string(evbuf, RUN_HOW, &tmp->how_data) == -1) {
+ event_warnx("%s: failed to unmarshal how", __func__);
+ return (-1);
+ }
+ tmp->how_set = 1;
+ break;
+
+ case RUN_SOME_BYTES:
+
+ if (tmp->some_bytes_set)
+ return (-1);
+ if (evtag_payload_length(evbuf, &tmp->some_bytes_length) == -1)
+ return (-1);
+ if (tmp->some_bytes_length > EVBUFFER_LENGTH(evbuf))
+ return (-1);
+ if ((tmp->some_bytes_data = malloc(tmp->some_bytes_length)) == NULL)
+ return (-1);
+ if (evtag_unmarshal_fixed(evbuf, RUN_SOME_BYTES, tmp->some_bytes_data, tmp->some_bytes_length) == -1) {
+ event_warnx("%s: failed to unmarshal some_bytes", __func__);
+ return (-1);
+ }
+ tmp->some_bytes_set = 1;
+ break;
+
+ case RUN_FIXED_BYTES:
+
+ if (tmp->fixed_bytes_set)
+ return (-1);
+ if (evtag_unmarshal_fixed(evbuf, RUN_FIXED_BYTES, tmp->fixed_bytes_data, sizeof(tmp->fixed_bytes_data)) == -1) {
+ event_warnx("%s: failed to unmarshal fixed_bytes", __func__);
+ return (-1);
+ }
+ tmp->fixed_bytes_set = 1;
+ break;
+
+ default:
+ return -1;
+ }
+ }
+
+ if (run_complete(tmp) == -1)
+ return (-1);
+ return (0);
+}
+
+int
+run_complete(struct run *msg)
+{
+ if (!msg->how_set)
+ return (-1);
+ if (!msg->fixed_bytes_set)
+ return (-1);
+ return (0);
+}
+
+int
+evtag_unmarshal_run(struct evbuffer *evbuf, uint32_t need_tag, struct run *msg)
+{
+ uint32_t tag;
+ int res = -1;
+
+ struct evbuffer *tmp = evbuffer_new();
+
+ if (evtag_unmarshal(evbuf, &tag, tmp) == -1 || tag != need_tag)
+ goto error;
+
+ if (run_unmarshal(msg, tmp) == -1)
+ goto error;
+
+ res = 0;
+
+ error:
+ evbuffer_free(tmp);
+ return (res);
+}
+
+void
+evtag_marshal_run(struct evbuffer *evbuf, uint32_t tag, const struct run *msg)
+{
+ struct evbuffer *_buf = evbuffer_new();
+ assert(_buf != NULL);
+ evbuffer_drain(_buf, -1);
+ run_marshal(_buf, msg);
+ evtag_marshal(evbuf, tag, EVBUFFER_DATA(_buf), EVBUFFER_LENGTH(_buf));
+ evbuffer_free(_buf);
+}
+
diff --git a/libevent/test/regress.gen.h b/libevent/test/regress.gen.h
new file mode 100644
index 00000000000..09591f0584b
--- /dev/null
+++ b/libevent/test/regress.gen.h
@@ -0,0 +1,183 @@
+/*
+ * Automatically generated from ./regress.rpc
+ */
+
+#ifndef ___REGRESS_RPC_
+#define ___REGRESS_RPC_
+
+#include <event-config.h>
+#ifdef _EVENT_HAVE_STDINT_H
+#include <stdint.h>
+#endif
+#define EVTAG_HAS(msg, member) ((msg)->member##_set == 1)
+#ifdef __GNUC__
+#define EVTAG_ASSIGN(msg, member, args...) (*(msg)->base->member##_assign)(msg, ## args)
+#define EVTAG_GET(msg, member, args...) (*(msg)->base->member##_get)(msg, ## args)
+#else
+#define EVTAG_ASSIGN(msg, member, ...) (*(msg)->base->member##_assign)(msg, ## __VA_ARGS__)
+#define EVTAG_GET(msg, member, ...) (*(msg)->base->member##_get)(msg, ## __VA_ARGS__)
+#endif
+#define EVTAG_ADD(msg, member) (*(msg)->base->member##_add)(msg)
+#define EVTAG_LEN(msg, member) ((msg)->member##_length)
+
+struct msg;
+struct kill;
+struct run;
+
+/* Tag definition for msg */
+enum msg_ {
+ MSG_FROM_NAME=1,
+ MSG_TO_NAME=2,
+ MSG_ATTACK=3,
+ MSG_RUN=4,
+ MSG_MAX_TAGS
+};
+
+/* Structure declaration for msg */
+struct msg_access_ {
+ int (*from_name_assign)(struct msg *, const char *);
+ int (*from_name_get)(struct msg *, char * *);
+ int (*to_name_assign)(struct msg *, const char *);
+ int (*to_name_get)(struct msg *, char * *);
+ int (*attack_assign)(struct msg *, const struct kill*);
+ int (*attack_get)(struct msg *, struct kill* *);
+ int (*run_assign)(struct msg *, int, const struct run *);
+ int (*run_get)(struct msg *, int, struct run * *);
+ struct run * (*run_add)(struct msg *);
+};
+
+struct msg {
+ struct msg_access_ *base;
+
+ char *from_name_data;
+ char *to_name_data;
+ struct kill* attack_data;
+ struct run **run_data;
+ int run_length;
+ int run_num_allocated;
+
+ uint8_t from_name_set;
+ uint8_t to_name_set;
+ uint8_t attack_set;
+ uint8_t run_set;
+};
+
+struct msg *msg_new(void);
+void msg_free(struct msg *);
+void msg_clear(struct msg *);
+void msg_marshal(struct evbuffer *, const struct msg *);
+int msg_unmarshal(struct msg *, struct evbuffer *);
+int msg_complete(struct msg *);
+void evtag_marshal_msg(struct evbuffer *, uint32_t,
+ const struct msg *);
+int evtag_unmarshal_msg(struct evbuffer *, uint32_t,
+ struct msg *);
+int msg_from_name_assign(struct msg *, const char *);
+int msg_from_name_get(struct msg *, char * *);
+int msg_to_name_assign(struct msg *, const char *);
+int msg_to_name_get(struct msg *, char * *);
+int msg_attack_assign(struct msg *, const struct kill*);
+int msg_attack_get(struct msg *, struct kill* *);
+int msg_run_assign(struct msg *, int, const struct run *);
+int msg_run_get(struct msg *, int, struct run * *);
+struct run * msg_run_add(struct msg *);
+/* --- msg done --- */
+
+/* Tag definition for kill */
+enum kill_ {
+ KILL_WEAPON=65825,
+ KILL_ACTION=2,
+ KILL_HOW_OFTEN=3,
+ KILL_MAX_TAGS
+};
+
+/* Structure declaration for kill */
+struct kill_access_ {
+ int (*weapon_assign)(struct kill *, const char *);
+ int (*weapon_get)(struct kill *, char * *);
+ int (*action_assign)(struct kill *, const char *);
+ int (*action_get)(struct kill *, char * *);
+ int (*how_often_assign)(struct kill *, const uint32_t);
+ int (*how_often_get)(struct kill *, uint32_t *);
+};
+
+struct kill {
+ struct kill_access_ *base;
+
+ char *weapon_data;
+ char *action_data;
+ uint32_t how_often_data;
+
+ uint8_t weapon_set;
+ uint8_t action_set;
+ uint8_t how_often_set;
+};
+
+struct kill *kill_new(void);
+void kill_free(struct kill *);
+void kill_clear(struct kill *);
+void kill_marshal(struct evbuffer *, const struct kill *);
+int kill_unmarshal(struct kill *, struct evbuffer *);
+int kill_complete(struct kill *);
+void evtag_marshal_kill(struct evbuffer *, uint32_t,
+ const struct kill *);
+int evtag_unmarshal_kill(struct evbuffer *, uint32_t,
+ struct kill *);
+int kill_weapon_assign(struct kill *, const char *);
+int kill_weapon_get(struct kill *, char * *);
+int kill_action_assign(struct kill *, const char *);
+int kill_action_get(struct kill *, char * *);
+int kill_how_often_assign(struct kill *, const uint32_t);
+int kill_how_often_get(struct kill *, uint32_t *);
+/* --- kill done --- */
+
+/* Tag definition for run */
+enum run_ {
+ RUN_HOW=1,
+ RUN_SOME_BYTES=2,
+ RUN_FIXED_BYTES=3,
+ RUN_MAX_TAGS
+};
+
+/* Structure declaration for run */
+struct run_access_ {
+ int (*how_assign)(struct run *, const char *);
+ int (*how_get)(struct run *, char * *);
+ int (*some_bytes_assign)(struct run *, const uint8_t *, uint32_t);
+ int (*some_bytes_get)(struct run *, uint8_t * *, uint32_t *);
+ int (*fixed_bytes_assign)(struct run *, const uint8_t *);
+ int (*fixed_bytes_get)(struct run *, uint8_t **);
+};
+
+struct run {
+ struct run_access_ *base;
+
+ char *how_data;
+ uint8_t *some_bytes_data;
+ uint32_t some_bytes_length;
+ uint8_t fixed_bytes_data[24];
+
+ uint8_t how_set;
+ uint8_t some_bytes_set;
+ uint8_t fixed_bytes_set;
+};
+
+struct run *run_new(void);
+void run_free(struct run *);
+void run_clear(struct run *);
+void run_marshal(struct evbuffer *, const struct run *);
+int run_unmarshal(struct run *, struct evbuffer *);
+int run_complete(struct run *);
+void evtag_marshal_run(struct evbuffer *, uint32_t,
+ const struct run *);
+int evtag_unmarshal_run(struct evbuffer *, uint32_t,
+ struct run *);
+int run_how_assign(struct run *, const char *);
+int run_how_get(struct run *, char * *);
+int run_some_bytes_assign(struct run *, const uint8_t *, uint32_t);
+int run_some_bytes_get(struct run *, uint8_t * *, uint32_t *);
+int run_fixed_bytes_assign(struct run *, const uint8_t *);
+int run_fixed_bytes_get(struct run *, uint8_t **);
+/* --- run done --- */
+
+#endif /* ___REGRESS_RPC_ */
diff --git a/libevent/test/regress.h b/libevent/test/regress.h
new file mode 100644
index 00000000000..4060ff5c6ac
--- /dev/null
+++ b/libevent/test/regress.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2000-2004 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _REGRESS_H_
+#define _REGRESS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void http_suite(void);
+void http_basic_test(void);
+
+void rpc_suite(void);
+
+void dns_suite(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _REGRESS_H_ */
diff --git a/libevent/test/regress.rpc b/libevent/test/regress.rpc
new file mode 100644
index 00000000000..65ca95de4cf
--- /dev/null
+++ b/libevent/test/regress.rpc
@@ -0,0 +1,20 @@
+/* tests data packing and unpacking */
+
+struct msg {
+ string from_name = 1;
+ string to_name = 2;
+ optional struct[kill] attack = 3;
+ array struct[run] run = 4;
+}
+
+struct kill {
+ string weapon = 0x10121;
+ string action = 2;
+ optional int how_often = 3;
+}
+
+struct run {
+ string how = 1;
+ optional bytes some_bytes = 2;
+ bytes fixed_bytes[24] = 3;
+}
diff --git a/libevent/test/regress_dns.c b/libevent/test/regress_dns.c
new file mode 100644
index 00000000000..129cdad498f
--- /dev/null
+++ b/libevent/test/regress_dns.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2003-2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#ifndef WIN32
+#include <sys/socket.h>
+#include <signal.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#endif
+#ifdef HAVE_NETINET_IN6_H
+#include <netinet/in6.h>
+#endif
+#ifdef HAVE_NETDB_H
+#include <netdb.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include "event.h"
+#include "evdns.h"
+#include "log.h"
+
+static int dns_ok = 0;
+static int dns_err = 0;
+
+void dns_suite(void);
+
+static void
+dns_gethostbyname_cb(int result, char type, int count, int ttl,
+ void *addresses, void *arg)
+{
+ dns_ok = dns_err = 0;
+
+ if (result == DNS_ERR_TIMEOUT) {
+ fprintf(stdout, "[Timed out] ");
+ dns_err = result;
+ goto out;
+ }
+
+ if (result != DNS_ERR_NONE) {
+ fprintf(stdout, "[Error code %d] ", result);
+ goto out;
+ }
+
+ fprintf(stderr, "type: %d, count: %d, ttl: %d: ", type, count, ttl);
+
+ switch (type) {
+ case DNS_IPv6_AAAA: {
+#if defined(HAVE_STRUCT_IN6_ADDR) && defined(HAVE_INET_NTOP) && defined(INET6_ADDRSTRLEN)
+ struct in6_addr *in6_addrs = addresses;
+ char buf[INET6_ADDRSTRLEN+1];
+ int i;
+ /* a resolution that's not valid does not help */
+ if (ttl < 0)
+ goto out;
+ for (i = 0; i < count; ++i) {
+ const char *b = inet_ntop(AF_INET6, &in6_addrs[i], buf,sizeof(buf));
+ if (b)
+ fprintf(stderr, "%s ", b);
+ else
+ fprintf(stderr, "%s ", strerror(errno));
+ }
+#endif
+ break;
+ }
+ case DNS_IPv4_A: {
+ struct in_addr *in_addrs = addresses;
+ int i;
+ /* a resolution that's not valid does not help */
+ if (ttl < 0)
+ goto out;
+ for (i = 0; i < count; ++i)
+ fprintf(stderr, "%s ", inet_ntoa(in_addrs[i]));
+ break;
+ }
+ case DNS_PTR:
+ /* may get at most one PTR */
+ if (count != 1)
+ goto out;
+
+ fprintf(stderr, "%s ", *(char **)addresses);
+ break;
+ default:
+ goto out;
+ }
+
+ dns_ok = type;
+
+out:
+ event_loopexit(NULL);
+}
+
+static void
+dns_gethostbyname(void)
+{
+ fprintf(stdout, "Simple DNS resolve: ");
+ dns_ok = 0;
+ evdns_resolve_ipv4("www.monkey.org", 0, dns_gethostbyname_cb, NULL);
+ event_dispatch();
+
+ if (dns_ok == DNS_IPv4_A) {
+ fprintf(stdout, "OK\n");
+ } else {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+}
+
+static void
+dns_gethostbyname6(void)
+{
+ fprintf(stdout, "IPv6 DNS resolve: ");
+ dns_ok = 0;
+ evdns_resolve_ipv6("www.ietf.org", 0, dns_gethostbyname_cb, NULL);
+ event_dispatch();
+
+ if (dns_ok == DNS_IPv6_AAAA) {
+ fprintf(stdout, "OK\n");
+ } else if (!dns_ok && dns_err == DNS_ERR_TIMEOUT) {
+ fprintf(stdout, "SKIPPED\n");
+ } else {
+ fprintf(stdout, "FAILED (%d)\n", dns_ok);
+ exit(1);
+ }
+}
+
+static void
+dns_gethostbyaddr(void)
+{
+ struct in_addr in;
+ in.s_addr = htonl(0x7f000001ul); /* 127.0.0.1 */
+ fprintf(stdout, "Simple reverse DNS resolve: ");
+ dns_ok = 0;
+ evdns_resolve_reverse(&in, 0, dns_gethostbyname_cb, NULL);
+ event_dispatch();
+
+ if (dns_ok == DNS_PTR) {
+ fprintf(stdout, "OK\n");
+ } else {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+}
+
+static int n_server_responses = 0;
+
+static void
+dns_server_request_cb(struct evdns_server_request *req, void *data)
+{
+ int i, r;
+ const char TEST_ARPA[] = "11.11.168.192.in-addr.arpa";
+ for (i = 0; i < req->nquestions; ++i) {
+ struct in_addr ans;
+ ans.s_addr = htonl(0xc0a80b0bUL); /* 192.168.11.11 */
+ if (req->questions[i]->type == EVDNS_TYPE_A &&
+ req->questions[i]->dns_question_class == EVDNS_CLASS_INET &&
+ !strcmp(req->questions[i]->name, "zz.example.com")) {
+ r = evdns_server_request_add_a_reply(req, "zz.example.com",
+ 1, &ans.s_addr, 12345);
+ if (r<0)
+ dns_ok = 0;
+ } else if (req->questions[i]->type == EVDNS_TYPE_AAAA &&
+ req->questions[i]->dns_question_class == EVDNS_CLASS_INET &&
+ !strcmp(req->questions[i]->name, "zz.example.com")) {
+ char addr6[17] = "abcdefghijklmnop";
+ r = evdns_server_request_add_aaaa_reply(req, "zz.example.com",
+ 1, addr6, 123);
+ if (r<0)
+ dns_ok = 0;
+ } else if (req->questions[i]->type == EVDNS_TYPE_PTR &&
+ req->questions[i]->dns_question_class == EVDNS_CLASS_INET &&
+ !strcmp(req->questions[i]->name, TEST_ARPA)) {
+ r = evdns_server_request_add_ptr_reply(req, NULL, TEST_ARPA,
+ "ZZ.EXAMPLE.COM", 54321);
+ if (r<0)
+ dns_ok = 0;
+ } else {
+ fprintf(stdout, "Unexpected question %d %d \"%s\" ",
+ req->questions[i]->type,
+ req->questions[i]->dns_question_class,
+ req->questions[i]->name);
+ dns_ok = 0;
+ }
+ }
+ r = evdns_server_request_respond(req, 0);
+ if (r<0) {
+ fprintf(stdout, "Couldn't send reply. ");
+ dns_ok = 0;
+ }
+}
+
+static void
+dns_server_gethostbyname_cb(int result, char type, int count, int ttl,
+ void *addresses, void *arg)
+{
+ if (result != DNS_ERR_NONE) {
+ fprintf(stdout, "Unexpected result %d. ", result);
+ dns_ok = 0;
+ goto out;
+ }
+ if (count != 1) {
+ fprintf(stdout, "Unexpected answer count %d. ", count);
+ dns_ok = 0;
+ goto out;
+ }
+ switch (type) {
+ case DNS_IPv4_A: {
+ struct in_addr *in_addrs = addresses;
+ if (in_addrs[0].s_addr != htonl(0xc0a80b0bUL) || ttl != 12345) {
+ fprintf(stdout, "Bad IPv4 response \"%s\" %d. ",
+ inet_ntoa(in_addrs[0]), ttl);
+ dns_ok = 0;
+ goto out;
+ }
+ break;
+ }
+ case DNS_IPv6_AAAA: {
+#if defined (HAVE_STRUCT_IN6_ADDR) && defined(HAVE_INET_NTOP) && defined(INET6_ADDRSTRLEN)
+ struct in6_addr *in6_addrs = addresses;
+ char buf[INET6_ADDRSTRLEN+1];
+ if (memcmp(&in6_addrs[0].s6_addr, "abcdefghijklmnop", 16)
+ || ttl != 123) {
+ const char *b = inet_ntop(AF_INET6, &in6_addrs[0],buf,sizeof(buf));
+ fprintf(stdout, "Bad IPv6 response \"%s\" %d. ", b, ttl);
+ dns_ok = 0;
+ goto out;
+ }
+#endif
+ break;
+ }
+ case DNS_PTR: {
+ char **addrs = addresses;
+ if (strcmp(addrs[0], "ZZ.EXAMPLE.COM") || ttl != 54321) {
+ fprintf(stdout, "Bad PTR response \"%s\" %d. ",
+ addrs[0], ttl);
+ dns_ok = 0;
+ goto out;
+ }
+ break;
+ }
+ default:
+ fprintf(stdout, "Bad response type %d. ", type);
+ dns_ok = 0;
+ }
+
+ out:
+ if (++n_server_responses == 3) {
+ event_loopexit(NULL);
+ }
+}
+
+static void
+dns_server(void)
+{
+ int sock;
+ struct sockaddr_in my_addr;
+ struct evdns_server_port *port;
+ struct in_addr resolve_addr;
+
+ dns_ok = 1;
+ fprintf(stdout, "DNS server support: ");
+
+ /* Add ourself as the only nameserver, and make sure we really are
+ * the only nameserver. */
+ evdns_nameserver_ip_add("127.0.0.1:35353");
+ if (evdns_count_nameservers() != 1) {
+ fprintf(stdout, "Couldn't set up.\n");
+ exit(1);
+ }
+
+ /* Now configure a nameserver port. */
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock == -1) {
+ perror("socket");
+ exit(1);
+ }
+#ifdef WIN32
+ {
+ u_long nonblocking = 1;
+ ioctlsocket(sock, FIONBIO, &nonblocking);
+ }
+#else
+ fcntl(sock, F_SETFL, O_NONBLOCK);
+#endif
+ memset(&my_addr, 0, sizeof(my_addr));
+ my_addr.sin_family = AF_INET;
+ my_addr.sin_port = htons(35353);
+ my_addr.sin_addr.s_addr = htonl(0x7f000001UL);
+ if (bind(sock, (struct sockaddr*)&my_addr, sizeof(my_addr)) < 0) {
+ perror("bind");
+ exit (1);
+ }
+ port = evdns_add_server_port(sock, 0, dns_server_request_cb, NULL);
+
+ /* Send two queries. */
+ evdns_resolve_ipv4("zz.example.com", DNS_QUERY_NO_SEARCH,
+ dns_server_gethostbyname_cb, NULL);
+ evdns_resolve_ipv6("zz.example.com", DNS_QUERY_NO_SEARCH,
+ dns_server_gethostbyname_cb, NULL);
+ resolve_addr.s_addr = htonl(0xc0a80b0bUL); /* 192.168.11.11 */
+ evdns_resolve_reverse(&resolve_addr, 0,
+ dns_server_gethostbyname_cb, NULL);
+
+ event_dispatch();
+
+ if (dns_ok) {
+ fprintf(stdout, "OK\n");
+ } else {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ evdns_close_server_port(port);
+ evdns_shutdown(0); /* remove ourself as nameserver. */
+#ifdef WIN32
+ closesocket(sock);
+#else
+ close(sock);
+#endif
+}
+
+void
+dns_suite(void)
+{
+ dns_server(); /* Do this before we call evdns_init. */
+
+ evdns_init();
+ dns_gethostbyname();
+ dns_gethostbyname6();
+ dns_gethostbyaddr();
+
+ evdns_shutdown(0);
+}
diff --git a/libevent/test/regress_http.c b/libevent/test/regress_http.c
new file mode 100644
index 00000000000..1e2a1eb062a
--- /dev/null
+++ b/libevent/test/regress_http.c
@@ -0,0 +1,1476 @@
+/*
+ * Copyright (c) 2003-2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#ifndef WIN32
+#include <sys/socket.h>
+#include <signal.h>
+#include <unistd.h>
+#include <netdb.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include "event.h"
+#include "evhttp.h"
+#include "log.h"
+#include "http-internal.h"
+
+extern int pair[];
+extern int test_ok;
+
+static struct evhttp *http;
+/* set if a test needs to call loopexit on a base */
+static struct event_base *base;
+
+void http_suite(void);
+
+void http_basic_cb(struct evhttp_request *req, void *arg);
+static void http_chunked_cb(struct evhttp_request *req, void *arg);
+void http_post_cb(struct evhttp_request *req, void *arg);
+void http_dispatcher_cb(struct evhttp_request *req, void *arg);
+static void http_large_delay_cb(struct evhttp_request *req, void *arg);
+
+static struct evhttp *
+http_setup(short *pport, struct event_base *base)
+{
+ int i;
+ struct evhttp *myhttp;
+ short port = -1;
+
+ /* Try a few different ports */
+ myhttp = evhttp_new(base);
+ for (i = 0; i < 50; ++i) {
+ if (evhttp_bind_socket(myhttp, "127.0.0.1", 8080 + i) != -1) {
+ port = 8080 + i;
+ break;
+ }
+ }
+
+ if (port == -1)
+ event_errx(1, "Could not start web server");
+
+ /* Register a callback for certain types of requests */
+ evhttp_set_cb(myhttp, "/test", http_basic_cb, NULL);
+ evhttp_set_cb(myhttp, "/chunked", http_chunked_cb, NULL);
+ evhttp_set_cb(myhttp, "/postit", http_post_cb, NULL);
+ evhttp_set_cb(myhttp, "/largedelay", http_large_delay_cb, NULL);
+ evhttp_set_cb(myhttp, "/", http_dispatcher_cb, NULL);
+
+ *pport = port;
+ return (myhttp);
+}
+
+#ifndef NI_MAXSERV
+#define NI_MAXSERV 1024
+#endif
+
+static int
+http_connect(const char *address, u_short port)
+{
+ /* Stupid code for connecting */
+#ifdef WIN32
+ struct hostent *he;
+ struct sockaddr_in sin;
+#else
+ struct addrinfo ai, *aitop;
+ char strport[NI_MAXSERV];
+#endif
+ struct sockaddr *sa;
+ int slen;
+ int fd;
+
+#ifdef WIN32
+ if (!(he = gethostbyname(address))) {
+ event_warn("gethostbyname");
+ }
+ memcpy(&sin.sin_addr, he->h_addr_list[0], he->h_length);
+ sin.sin_family = AF_INET;
+ sin.sin_port = htons(port);
+ slen = sizeof(struct sockaddr_in);
+ sa = (struct sockaddr*)&sin;
+#else
+ memset(&ai, 0, sizeof (ai));
+ ai.ai_family = AF_INET;
+ ai.ai_socktype = SOCK_STREAM;
+ snprintf(strport, sizeof (strport), "%d", port);
+ if (getaddrinfo(address, strport, &ai, &aitop) != 0) {
+ event_warn("getaddrinfo");
+ return (-1);
+ }
+ sa = aitop->ai_addr;
+ slen = aitop->ai_addrlen;
+#endif
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd == -1)
+ event_err(1, "socket failed");
+
+ if (connect(fd, sa, slen) == -1)
+ event_err(1, "connect failed");
+
+#ifndef WIN32
+ freeaddrinfo(aitop);
+#endif
+
+ return (fd);
+}
+
+static void
+http_readcb(struct bufferevent *bev, void *arg)
+{
+ const char *what = "This is funny";
+
+ event_debug(("%s: %s\n", __func__, EVBUFFER_DATA(bev->input)));
+
+ if (evbuffer_find(bev->input,
+ (const unsigned char*) what, strlen(what)) != NULL) {
+ struct evhttp_request *req = evhttp_request_new(NULL, NULL);
+ enum message_read_status done;
+
+ req->kind = EVHTTP_RESPONSE;
+ done = evhttp_parse_firstline(req, bev->input);
+ if (done != ALL_DATA_READ)
+ goto out;
+
+ done = evhttp_parse_headers(req, bev->input);
+ if (done != ALL_DATA_READ)
+ goto out;
+
+ if (done == 1 &&
+ evhttp_find_header(req->input_headers,
+ "Content-Type") != NULL)
+ test_ok++;
+
+ out:
+ evhttp_request_free(req);
+ bufferevent_disable(bev, EV_READ);
+ if (base)
+ event_base_loopexit(base, NULL);
+ else
+ event_loopexit(NULL);
+ }
+}
+
+static void
+http_writecb(struct bufferevent *bev, void *arg)
+{
+ if (EVBUFFER_LENGTH(bev->output) == 0) {
+ /* enable reading of the reply */
+ bufferevent_enable(bev, EV_READ);
+ test_ok++;
+ }
+}
+
+static void
+http_errorcb(struct bufferevent *bev, short what, void *arg)
+{
+ test_ok = -2;
+ event_loopexit(NULL);
+}
+
+void
+http_basic_cb(struct evhttp_request *req, void *arg)
+{
+ struct evbuffer *evb = evbuffer_new();
+ int empty = evhttp_find_header(req->input_headers, "Empty") != NULL;
+ event_debug(("%s: called\n", __func__));
+ evbuffer_add_printf(evb, "This is funny");
+
+ /* For multi-line headers test */
+ {
+ const char *multi =
+ evhttp_find_header(req->input_headers,"X-multi");
+ if (multi) {
+ if (strcmp("END", multi + strlen(multi) - 3) == 0)
+ test_ok++;
+ if (evhttp_find_header(req->input_headers, "X-Last"))
+ test_ok++;
+ }
+ }
+
+ /* injecting a bad content-length */
+ if (evhttp_find_header(req->input_headers, "X-Negative"))
+ evhttp_add_header(req->output_headers,
+ "Content-Length", "-100");
+
+ /* allow sending of an empty reply */
+ evhttp_send_reply(req, HTTP_OK, "Everything is fine",
+ !empty ? evb : NULL);
+
+ evbuffer_free(evb);
+}
+
+static char const* const CHUNKS[] = {
+ "This is funny",
+ "but not hilarious.",
+ "bwv 1052"
+};
+
+struct chunk_req_state {
+ struct evhttp_request *req;
+ int i;
+};
+
+static void
+http_chunked_trickle_cb(int fd, short events, void *arg)
+{
+ struct evbuffer *evb = evbuffer_new();
+ struct chunk_req_state *state = arg;
+ struct timeval when = { 0, 0 };
+
+ evbuffer_add_printf(evb, "%s", CHUNKS[state->i]);
+ evhttp_send_reply_chunk(state->req, evb);
+ evbuffer_free(evb);
+
+ if (++state->i < sizeof(CHUNKS)/sizeof(CHUNKS[0])) {
+ event_once(-1, EV_TIMEOUT,
+ http_chunked_trickle_cb, state, &when);
+ } else {
+ evhttp_send_reply_end(state->req);
+ free(state);
+ }
+}
+
+static void
+http_chunked_cb(struct evhttp_request *req, void *arg)
+{
+ struct timeval when = { 0, 0 };
+ struct chunk_req_state *state = malloc(sizeof(struct chunk_req_state));
+ event_debug(("%s: called\n", __func__));
+
+ memset(state, 0, sizeof(struct chunk_req_state));
+ state->req = req;
+
+ /* generate a chunked reply */
+ evhttp_send_reply_start(req, HTTP_OK, "Everything is fine");
+
+ /* but trickle it across several iterations to ensure we're not
+ * assuming it comes all at once */
+ event_once(-1, EV_TIMEOUT, http_chunked_trickle_cb, state, &when);
+}
+
+static void
+http_complete_write(int fd, short what, void *arg)
+{
+ struct bufferevent *bev = arg;
+ const char *http_request = "host\r\n"
+ "Connection: close\r\n"
+ "\r\n";
+ bufferevent_write(bev, http_request, strlen(http_request));
+}
+
+static void
+http_basic_test(void)
+{
+ struct timeval tv;
+ struct bufferevent *bev;
+ int fd;
+ const char *http_request;
+ short port = -1;
+
+ test_ok = 0;
+ fprintf(stdout, "Testing Basic HTTP Server: ");
+
+ http = http_setup(&port, NULL);
+
+ /* bind to a second socket */
+ if (evhttp_bind_socket(http, "127.0.0.1", port + 1) == -1) {
+ fprintf(stdout, "FAILED (bind)\n");
+ exit(1);
+ }
+
+ fd = http_connect("127.0.0.1", port);
+
+ /* Stupid thing to send a request */
+ bev = bufferevent_new(fd, http_readcb, http_writecb,
+ http_errorcb, NULL);
+
+ /* first half of the http request */
+ http_request =
+ "GET /test HTTP/1.1\r\n"
+ "Host: some";
+
+ bufferevent_write(bev, http_request, strlen(http_request));
+ timerclear(&tv);
+ tv.tv_usec = 10000;
+ event_once(-1, EV_TIMEOUT, http_complete_write, bev, &tv);
+
+ event_dispatch();
+
+ if (test_ok != 3) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /* connect to the second port */
+ bufferevent_free(bev);
+ EVUTIL_CLOSESOCKET(fd);
+
+ fd = http_connect("127.0.0.1", port + 1);
+
+ /* Stupid thing to send a request */
+ bev = bufferevent_new(fd, http_readcb, http_writecb,
+ http_errorcb, NULL);
+
+ http_request =
+ "GET /test HTTP/1.1\r\n"
+ "Host: somehost\r\n"
+ "Connection: close\r\n"
+ "\r\n";
+
+ bufferevent_write(bev, http_request, strlen(http_request));
+
+ event_dispatch();
+
+ bufferevent_free(bev);
+ EVUTIL_CLOSESOCKET(fd);
+
+ evhttp_free(http);
+
+ if (test_ok != 5) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+}
+
+static struct evhttp_connection *delayed_client;
+
+static void
+http_delay_reply(int fd, short what, void *arg)
+{
+ struct evhttp_request *req = arg;
+
+ evhttp_send_reply(req, HTTP_OK, "Everything is fine", NULL);
+
+ ++test_ok;
+}
+
+static void
+http_large_delay_cb(struct evhttp_request *req, void *arg)
+{
+ struct timeval tv;
+ timerclear(&tv);
+ tv.tv_sec = 3;
+
+ event_once(-1, EV_TIMEOUT, http_delay_reply, req, &tv);
+
+ /* here we close the client connection which will cause an EOF */
+ evhttp_connection_fail(delayed_client, EVCON_HTTP_EOF);
+}
+
+void http_request_done(struct evhttp_request *, void *);
+void http_request_empty_done(struct evhttp_request *, void *);
+
+static void
+http_connection_test(int persistent)
+{
+ short port = -1;
+ struct evhttp_connection *evcon = NULL;
+ struct evhttp_request *req = NULL;
+
+ test_ok = 0;
+ fprintf(stdout, "Testing Request Connection Pipeline %s: ",
+ persistent ? "(persistent)" : "");
+
+ http = http_setup(&port, NULL);
+
+ evcon = evhttp_connection_new("127.0.0.1", port);
+ if (evcon == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /*
+ * At this point, we want to schedule a request to the HTTP
+ * server using our make request method.
+ */
+
+ req = evhttp_request_new(http_request_done, NULL);
+
+ /* Add the information that we care about */
+ evhttp_add_header(req->output_headers, "Host", "somehost");
+
+ /* We give ownership of the request to the connection */
+ if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/test") == -1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ event_dispatch();
+
+ if (test_ok != 1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /* try to make another request over the same connection */
+ test_ok = 0;
+
+ req = evhttp_request_new(http_request_done, NULL);
+
+ /* Add the information that we care about */
+ evhttp_add_header(req->output_headers, "Host", "somehost");
+
+ /*
+ * if our connections are not supposed to be persistent; request
+ * a close from the server.
+ */
+ if (!persistent)
+ evhttp_add_header(req->output_headers, "Connection", "close");
+
+ /* We give ownership of the request to the connection */
+ if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/test") == -1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ event_dispatch();
+
+ /* make another request: request empty reply */
+ test_ok = 0;
+
+ req = evhttp_request_new(http_request_empty_done, NULL);
+
+ /* Add the information that we care about */
+ evhttp_add_header(req->output_headers, "Empty", "itis");
+
+ /* We give ownership of the request to the connection */
+ if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/test") == -1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ event_dispatch();
+
+ if (test_ok != 1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ evhttp_connection_free(evcon);
+ evhttp_free(http);
+
+ fprintf(stdout, "OK\n");
+}
+
+void
+http_request_done(struct evhttp_request *req, void *arg)
+{
+ const char *what = "This is funny";
+
+ if (req->response_code != HTTP_OK) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ if (evhttp_find_header(req->input_headers, "Content-Type") == NULL) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ if (EVBUFFER_LENGTH(req->input_buffer) != strlen(what)) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ if (memcmp(EVBUFFER_DATA(req->input_buffer), what, strlen(what)) != 0) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ test_ok = 1;
+ event_loopexit(NULL);
+}
+
+/* test date header and content length */
+
+void
+http_request_empty_done(struct evhttp_request *req, void *arg)
+{
+ if (req->response_code != HTTP_OK) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ if (evhttp_find_header(req->input_headers, "Date") == NULL) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+
+ if (evhttp_find_header(req->input_headers, "Content-Length") == NULL) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ if (strcmp(evhttp_find_header(req->input_headers, "Content-Length"),
+ "0")) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ if (EVBUFFER_LENGTH(req->input_buffer) != 0) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ test_ok = 1;
+ event_loopexit(NULL);
+}
+
+/*
+ * HTTP DISPATCHER test
+ */
+
+void
+http_dispatcher_cb(struct evhttp_request *req, void *arg)
+{
+
+ struct evbuffer *evb = evbuffer_new();
+ event_debug(("%s: called\n", __func__));
+ evbuffer_add_printf(evb, "DISPATCHER_TEST");
+
+ evhttp_send_reply(req, HTTP_OK, "Everything is fine", evb);
+
+ evbuffer_free(evb);
+}
+
+static void
+http_dispatcher_test_done(struct evhttp_request *req, void *arg)
+{
+ const char *what = "DISPATCHER_TEST";
+
+ if (req->response_code != HTTP_OK) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ if (evhttp_find_header(req->input_headers, "Content-Type") == NULL) {
+ fprintf(stderr, "FAILED (content type)\n");
+ exit(1);
+ }
+
+ if (EVBUFFER_LENGTH(req->input_buffer) != strlen(what)) {
+ fprintf(stderr, "FAILED (length %zu vs %zu)\n",
+ EVBUFFER_LENGTH(req->input_buffer), strlen(what));
+ exit(1);
+ }
+
+ if (memcmp(EVBUFFER_DATA(req->input_buffer), what, strlen(what)) != 0) {
+ fprintf(stderr, "FAILED (data)\n");
+ exit(1);
+ }
+
+ test_ok = 1;
+ event_loopexit(NULL);
+}
+
+static void
+http_dispatcher_test(void)
+{
+ short port = -1;
+ struct evhttp_connection *evcon = NULL;
+ struct evhttp_request *req = NULL;
+
+ test_ok = 0;
+ fprintf(stdout, "Testing HTTP Dispatcher: ");
+
+ http = http_setup(&port, NULL);
+
+ evcon = evhttp_connection_new("127.0.0.1", port);
+ if (evcon == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /* also bind to local host */
+ evhttp_connection_set_local_address(evcon, "127.0.0.1");
+
+ /*
+ * At this point, we want to schedule an HTTP GET request
+ * server using our make request method.
+ */
+
+ req = evhttp_request_new(http_dispatcher_test_done, NULL);
+ if (req == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /* Add the information that we care about */
+ evhttp_add_header(req->output_headers, "Host", "somehost");
+
+ if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/?arg=val") == -1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ event_dispatch();
+
+ evhttp_connection_free(evcon);
+ evhttp_free(http);
+
+ if (test_ok != 1) {
+ fprintf(stdout, "FAILED: %d\n", test_ok);
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+}
+
+/*
+ * HTTP POST test.
+ */
+
+void http_postrequest_done(struct evhttp_request *, void *);
+
+#define POST_DATA "Okay. Not really printf"
+
+static void
+http_post_test(void)
+{
+ short port = -1;
+ struct evhttp_connection *evcon = NULL;
+ struct evhttp_request *req = NULL;
+
+ test_ok = 0;
+ fprintf(stdout, "Testing HTTP POST Request: ");
+
+ http = http_setup(&port, NULL);
+
+ evcon = evhttp_connection_new("127.0.0.1", port);
+ if (evcon == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /*
+ * At this point, we want to schedule an HTTP POST request
+ * server using our make request method.
+ */
+
+ req = evhttp_request_new(http_postrequest_done, NULL);
+ if (req == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /* Add the information that we care about */
+ evhttp_add_header(req->output_headers, "Host", "somehost");
+ evbuffer_add_printf(req->output_buffer, POST_DATA);
+
+ if (evhttp_make_request(evcon, req, EVHTTP_REQ_POST, "/postit") == -1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ event_dispatch();
+
+ evhttp_connection_free(evcon);
+ evhttp_free(http);
+
+ if (test_ok != 1) {
+ fprintf(stdout, "FAILED: %d\n", test_ok);
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+}
+
+void
+http_post_cb(struct evhttp_request *req, void *arg)
+{
+ struct evbuffer *evb;
+ event_debug(("%s: called\n", __func__));
+
+ /* Yes, we are expecting a post request */
+ if (req->type != EVHTTP_REQ_POST) {
+ fprintf(stdout, "FAILED (post type)\n");
+ exit(1);
+ }
+
+ if (EVBUFFER_LENGTH(req->input_buffer) != strlen(POST_DATA)) {
+ fprintf(stdout, "FAILED (length: %zu vs %zu)\n",
+ EVBUFFER_LENGTH(req->input_buffer), strlen(POST_DATA));
+ exit(1);
+ }
+
+ if (memcmp(EVBUFFER_DATA(req->input_buffer), POST_DATA,
+ strlen(POST_DATA))) {
+ fprintf(stdout, "FAILED (data)\n");
+ fprintf(stdout, "Got :%s\n", EVBUFFER_DATA(req->input_buffer));
+ fprintf(stdout, "Want:%s\n", POST_DATA);
+ exit(1);
+ }
+
+ evb = evbuffer_new();
+ evbuffer_add_printf(evb, "This is funny");
+
+ evhttp_send_reply(req, HTTP_OK, "Everything is fine", evb);
+
+ evbuffer_free(evb);
+}
+
+void
+http_postrequest_done(struct evhttp_request *req, void *arg)
+{
+ const char *what = "This is funny";
+
+ if (req == NULL) {
+ fprintf(stderr, "FAILED (timeout)\n");
+ exit(1);
+ }
+
+ if (req->response_code != HTTP_OK) {
+
+ fprintf(stderr, "FAILED (response code)\n");
+ exit(1);
+ }
+
+ if (evhttp_find_header(req->input_headers, "Content-Type") == NULL) {
+ fprintf(stderr, "FAILED (content type)\n");
+ exit(1);
+ }
+
+ if (EVBUFFER_LENGTH(req->input_buffer) != strlen(what)) {
+ fprintf(stderr, "FAILED (length %zu vs %zu)\n",
+ EVBUFFER_LENGTH(req->input_buffer), strlen(what));
+ exit(1);
+ }
+
+ if (memcmp(EVBUFFER_DATA(req->input_buffer), what, strlen(what)) != 0) {
+ fprintf(stderr, "FAILED (data)\n");
+ exit(1);
+ }
+
+ test_ok = 1;
+ event_loopexit(NULL);
+}
+
+static void
+http_failure_readcb(struct bufferevent *bev, void *arg)
+{
+ const char *what = "400 Bad Request";
+ if (evbuffer_find(bev->input, (const unsigned char*) what, strlen(what)) != NULL) {
+ test_ok = 2;
+ bufferevent_disable(bev, EV_READ);
+ event_loopexit(NULL);
+ }
+}
+
+/*
+ * Testing that the HTTP server can deal with a malformed request.
+ */
+static void
+http_failure_test(void)
+{
+ struct bufferevent *bev;
+ int fd;
+ const char *http_request;
+ short port = -1;
+
+ test_ok = 0;
+ fprintf(stdout, "Testing Bad HTTP Request: ");
+
+ http = http_setup(&port, NULL);
+
+ fd = http_connect("127.0.0.1", port);
+
+ /* Stupid thing to send a request */
+ bev = bufferevent_new(fd, http_failure_readcb, http_writecb,
+ http_errorcb, NULL);
+
+ http_request = "illegal request\r\n";
+
+ bufferevent_write(bev, http_request, strlen(http_request));
+
+ event_dispatch();
+
+ bufferevent_free(bev);
+ EVUTIL_CLOSESOCKET(fd);
+
+ evhttp_free(http);
+
+ if (test_ok != 2) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+}
+
+static void
+close_detect_done(struct evhttp_request *req, void *arg)
+{
+ struct timeval tv;
+ if (req == NULL || req->response_code != HTTP_OK) {
+
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ test_ok = 1;
+
+ timerclear(&tv);
+ tv.tv_sec = 3; /* longer than the http time out */
+
+ event_loopexit(&tv);
+}
+
+static void
+close_detect_launch(int fd, short what, void *arg)
+{
+ struct evhttp_connection *evcon = arg;
+ struct evhttp_request *req;
+
+ req = evhttp_request_new(close_detect_done, NULL);
+
+ /* Add the information that we care about */
+ evhttp_add_header(req->output_headers, "Host", "somehost");
+
+ /* We give ownership of the request to the connection */
+ if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/test") == -1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+}
+
+static void
+close_detect_cb(struct evhttp_request *req, void *arg)
+{
+ struct evhttp_connection *evcon = arg;
+ struct timeval tv;
+
+ if (req != NULL && req->response_code != HTTP_OK) {
+
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ timerclear(&tv);
+ tv.tv_sec = 3; /* longer than the http time out */
+
+ /* launch a new request on the persistent connection in 6 seconds */
+ event_once(-1, EV_TIMEOUT, close_detect_launch, evcon, &tv);
+}
+
+
+static void
+http_close_detection(int with_delay)
+{
+ short port = -1;
+ struct evhttp_connection *evcon = NULL;
+ struct evhttp_request *req = NULL;
+
+ test_ok = 0;
+ fprintf(stdout, "Testing Connection Close Detection%s: ",
+ with_delay ? " (with delay)" : "");
+
+ http = http_setup(&port, NULL);
+
+ /* 2 second timeout */
+ evhttp_set_timeout(http, 2);
+
+ evcon = evhttp_connection_new("127.0.0.1", port);
+ if (evcon == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ delayed_client = evcon;
+
+ /*
+ * At this point, we want to schedule a request to the HTTP
+ * server using our make request method.
+ */
+
+ req = evhttp_request_new(close_detect_cb, evcon);
+
+ /* Add the information that we care about */
+ evhttp_add_header(req->output_headers, "Host", "somehost");
+
+ /* We give ownership of the request to the connection */
+ if (evhttp_make_request(evcon,
+ req, EVHTTP_REQ_GET, with_delay ? "/largedelay" : "/test") == -1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ event_dispatch();
+
+ if (test_ok != 1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /* at this point, the http server should have no connection */
+ if (TAILQ_FIRST(&http->connections) != NULL) {
+ fprintf(stdout, "FAILED (left connections)\n");
+ exit(1);
+ }
+
+ evhttp_connection_free(evcon);
+ evhttp_free(http);
+
+ fprintf(stdout, "OK\n");
+}
+
+static void
+http_highport_test(void)
+{
+ int i = -1;
+ struct evhttp *myhttp = NULL;
+
+ fprintf(stdout, "Testing HTTP Server with high port: ");
+
+ /* Try a few different ports */
+ for (i = 0; i < 50; ++i) {
+ myhttp = evhttp_start("127.0.0.1", 65535 - i);
+ if (myhttp != NULL) {
+ fprintf(stdout, "OK\n");
+ evhttp_free(myhttp);
+ return;
+ }
+ }
+
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+}
+
+static void
+http_bad_header_test(void)
+{
+ struct evkeyvalq headers;
+
+ fprintf(stdout, "Testing HTTP Header filtering: ");
+
+ TAILQ_INIT(&headers);
+
+ if (evhttp_add_header(&headers, "One", "Two") != 0)
+ goto fail;
+
+ if (evhttp_add_header(&headers, "One\r", "Two") != -1)
+ goto fail;
+ if (evhttp_add_header(&headers, "One", "Two") != 0)
+ goto fail;
+ if (evhttp_add_header(&headers, "One", "Two\r\n Three") != 0)
+ goto fail;
+ if (evhttp_add_header(&headers, "One\r", "Two") != -1)
+ goto fail;
+ if (evhttp_add_header(&headers, "One\n", "Two") != -1)
+ goto fail;
+ if (evhttp_add_header(&headers, "One", "Two\r") != -1)
+ goto fail;
+ if (evhttp_add_header(&headers, "One", "Two\n") != -1)
+ goto fail;
+
+ evhttp_clear_headers(&headers);
+
+ fprintf(stdout, "OK\n");
+ return;
+fail:
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+}
+
+static int validate_header(
+ const struct evkeyvalq* headers,
+ const char *key, const char *value)
+{
+ const char *real_val = evhttp_find_header(headers, key);
+ if (real_val == NULL)
+ return (-1);
+ if (strcmp(real_val, value) != 0)
+ return (-1);
+ return (0);
+}
+
+static void
+http_parse_query_test(void)
+{
+ struct evkeyvalq headers;
+
+ fprintf(stdout, "Testing HTTP query parsing: ");
+
+ TAILQ_INIT(&headers);
+
+ evhttp_parse_query("http://www.test.com/?q=test", &headers);
+ if (validate_header(&headers, "q", "test") != 0)
+ goto fail;
+ evhttp_clear_headers(&headers);
+
+ evhttp_parse_query("http://www.test.com/?q=test&foo=bar", &headers);
+ if (validate_header(&headers, "q", "test") != 0)
+ goto fail;
+ if (validate_header(&headers, "foo", "bar") != 0)
+ goto fail;
+ evhttp_clear_headers(&headers);
+
+ evhttp_parse_query("http://www.test.com/?q=test+foo", &headers);
+ if (validate_header(&headers, "q", "test foo") != 0)
+ goto fail;
+ evhttp_clear_headers(&headers);
+
+ evhttp_parse_query("http://www.test.com/?q=test%0Afoo", &headers);
+ if (validate_header(&headers, "q", "test\nfoo") != 0)
+ goto fail;
+ evhttp_clear_headers(&headers);
+
+ evhttp_parse_query("http://www.test.com/?q=test%0Dfoo", &headers);
+ if (validate_header(&headers, "q", "test\rfoo") != 0)
+ goto fail;
+ evhttp_clear_headers(&headers);
+
+ fprintf(stdout, "OK\n");
+ return;
+fail:
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+}
+
+static void
+http_base_test(void)
+{
+ struct bufferevent *bev;
+ int fd;
+ const char *http_request;
+ short port = -1;
+
+ test_ok = 0;
+ fprintf(stdout, "Testing HTTP Server Event Base: ");
+
+ base = event_init();
+
+ /*
+ * create another bogus base - which is being used by all subsequen
+ * tests - yuck!
+ */
+ event_init();
+
+ http = http_setup(&port, base);
+
+ fd = http_connect("127.0.0.1", port);
+
+ /* Stupid thing to send a request */
+ bev = bufferevent_new(fd, http_readcb, http_writecb,
+ http_errorcb, NULL);
+ bufferevent_base_set(base, bev);
+
+ http_request =
+ "GET /test HTTP/1.1\r\n"
+ "Host: somehost\r\n"
+ "Connection: close\r\n"
+ "\r\n";
+
+ bufferevent_write(bev, http_request, strlen(http_request));
+
+ event_base_dispatch(base);
+
+ bufferevent_free(bev);
+ EVUTIL_CLOSESOCKET(fd);
+
+ evhttp_free(http);
+
+ event_base_free(base);
+ base = NULL;
+
+ if (test_ok != 2) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+}
+
+/*
+ * the server is going to reply with chunked data.
+ */
+
+static void
+http_chunked_readcb(struct bufferevent *bev, void *arg)
+{
+ /* nothing here */
+}
+
+static void
+http_chunked_errorcb(struct bufferevent *bev, short what, void *arg)
+{
+ if (!test_ok)
+ goto out;
+
+ test_ok = -1;
+
+ if ((what & EVBUFFER_EOF) != 0) {
+ struct evhttp_request *req = evhttp_request_new(NULL, NULL);
+ const char *header;
+ enum message_read_status done;
+
+ req->kind = EVHTTP_RESPONSE;
+ done = evhttp_parse_firstline(req, EVBUFFER_INPUT(bev));
+ if (done != ALL_DATA_READ)
+ goto out;
+
+ done = evhttp_parse_headers(req, EVBUFFER_INPUT(bev));
+ if (done != ALL_DATA_READ)
+ goto out;
+
+ header = evhttp_find_header(req->input_headers, "Transfer-Encoding");
+ if (header == NULL || strcmp(header, "chunked"))
+ goto out;
+
+ header = evhttp_find_header(req->input_headers, "Connection");
+ if (header == NULL || strcmp(header, "close"))
+ goto out;
+
+ header = evbuffer_readline(EVBUFFER_INPUT(bev));
+ if (header == NULL)
+ goto out;
+ /* 13 chars */
+ if (strcmp(header, "d"))
+ goto out;
+ free((char*)header);
+
+ if (strncmp((char *)EVBUFFER_DATA(EVBUFFER_INPUT(bev)),
+ "This is funny", 13))
+ goto out;
+
+ evbuffer_drain(EVBUFFER_INPUT(bev), 13 + 2);
+
+ header = evbuffer_readline(EVBUFFER_INPUT(bev));
+ if (header == NULL)
+ goto out;
+ /* 18 chars */
+ if (strcmp(header, "12"))
+ goto out;
+ free((char *)header);
+
+ if (strncmp((char *)EVBUFFER_DATA(EVBUFFER_INPUT(bev)),
+ "but not hilarious.", 18))
+ goto out;
+
+ evbuffer_drain(EVBUFFER_INPUT(bev), 18 + 2);
+
+ header = evbuffer_readline(EVBUFFER_INPUT(bev));
+ if (header == NULL)
+ goto out;
+ /* 8 chars */
+ if (strcmp(header, "8"))
+ goto out;
+ free((char *)header);
+
+ if (strncmp((char *)EVBUFFER_DATA(EVBUFFER_INPUT(bev)),
+ "bwv 1052.", 8))
+ goto out;
+
+ evbuffer_drain(EVBUFFER_INPUT(bev), 8 + 2);
+
+ header = evbuffer_readline(EVBUFFER_INPUT(bev));
+ if (header == NULL)
+ goto out;
+ /* 0 chars */
+ if (strcmp(header, "0"))
+ goto out;
+ free((char *)header);
+
+ test_ok = 2;
+ }
+
+out:
+ event_loopexit(NULL);
+}
+
+static void
+http_chunked_writecb(struct bufferevent *bev, void *arg)
+{
+ if (EVBUFFER_LENGTH(EVBUFFER_OUTPUT(bev)) == 0) {
+ /* enable reading of the reply */
+ bufferevent_enable(bev, EV_READ);
+ test_ok++;
+ }
+}
+
+static void
+http_chunked_request_done(struct evhttp_request *req, void *arg)
+{
+ if (req->response_code != HTTP_OK) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ if (evhttp_find_header(req->input_headers,
+ "Transfer-Encoding") == NULL) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ if (EVBUFFER_LENGTH(req->input_buffer) != 13 + 18 + 8) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ if (strncmp((char *)EVBUFFER_DATA(req->input_buffer),
+ "This is funnybut not hilarious.bwv 1052",
+ 13 + 18 + 8)) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ test_ok = 1;
+ event_loopexit(NULL);
+}
+
+static void
+http_chunked_test(void)
+{
+ struct bufferevent *bev;
+ int fd;
+ const char *http_request;
+ short port = -1;
+ struct timeval tv_start, tv_end;
+ struct evhttp_connection *evcon = NULL;
+ struct evhttp_request *req = NULL;
+ int i;
+
+ test_ok = 0;
+ fprintf(stdout, "Testing Chunked HTTP Reply: ");
+
+ http = http_setup(&port, NULL);
+
+ fd = http_connect("127.0.0.1", port);
+
+ /* Stupid thing to send a request */
+ bev = bufferevent_new(fd,
+ http_chunked_readcb, http_chunked_writecb,
+ http_chunked_errorcb, NULL);
+
+ http_request =
+ "GET /chunked HTTP/1.1\r\n"
+ "Host: somehost\r\n"
+ "Connection: close\r\n"
+ "\r\n";
+
+ bufferevent_write(bev, http_request, strlen(http_request));
+
+ evutil_gettimeofday(&tv_start, NULL);
+
+ event_dispatch();
+
+ evutil_gettimeofday(&tv_end, NULL);
+ evutil_timersub(&tv_end, &tv_start, &tv_end);
+
+ if (tv_end.tv_sec >= 1) {
+ fprintf(stdout, "FAILED (time)\n");
+ exit (1);
+ }
+
+
+ if (test_ok != 2) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /* now try again with the regular connection object */
+ evcon = evhttp_connection_new("127.0.0.1", port);
+ if (evcon == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /* make two requests to check the keepalive behavior */
+ for (i = 0; i < 2; i++) {
+ test_ok = 0;
+ req = evhttp_request_new(http_chunked_request_done, NULL);
+
+ /* Add the information that we care about */
+ evhttp_add_header(req->output_headers, "Host", "somehost");
+
+ /* We give ownership of the request to the connection */
+ if (evhttp_make_request(evcon, req,
+ EVHTTP_REQ_GET, "/chunked") == -1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ event_dispatch();
+
+ if (test_ok != 1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+ }
+
+ evhttp_connection_free(evcon);
+ evhttp_free(http);
+
+ fprintf(stdout, "OK\n");
+}
+
+static void
+http_multi_line_header_test(void)
+{
+ struct bufferevent *bev;
+ int fd;
+ const char *http_start_request;
+ short port = -1;
+
+ test_ok = 0;
+ fprintf(stdout, "Testing HTTP Server with multi line: ");
+
+ http = http_setup(&port, NULL);
+
+ fd = http_connect("127.0.0.1", port);
+
+ /* Stupid thing to send a request */
+ bev = bufferevent_new(fd, http_readcb, http_writecb,
+ http_errorcb, NULL);
+
+ http_start_request =
+ "GET /test HTTP/1.1\r\n"
+ "Host: somehost\r\n"
+ "Connection: close\r\n"
+ "X-Multi: aaaaaaaa\r\n"
+ " a\r\n"
+ "\tEND\r\n"
+ "X-Last: last\r\n"
+ "\r\n";
+
+ bufferevent_write(bev, http_start_request, strlen(http_start_request));
+
+ event_dispatch();
+
+ bufferevent_free(bev);
+ EVUTIL_CLOSESOCKET(fd);
+
+ evhttp_free(http);
+
+ if (test_ok != 4) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+}
+
+static void
+http_request_bad(struct evhttp_request *req, void *arg)
+{
+ if (req != NULL) {
+ fprintf(stderr, "FAILED\n");
+ exit(1);
+ }
+
+ test_ok = 1;
+ event_loopexit(NULL);
+}
+
+static void
+http_negative_content_length_test(void)
+{
+ short port = -1;
+ struct evhttp_connection *evcon = NULL;
+ struct evhttp_request *req = NULL;
+
+ test_ok = 0;
+ fprintf(stdout, "Testing HTTP Negative Content Length: ");
+
+ http = http_setup(&port, NULL);
+
+ evcon = evhttp_connection_new("127.0.0.1", port);
+ if (evcon == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /*
+ * At this point, we want to schedule a request to the HTTP
+ * server using our make request method.
+ */
+
+ req = evhttp_request_new(http_request_bad, NULL);
+
+ /* Cause the response to have a negative content-length */
+ evhttp_add_header(req->output_headers, "X-Negative", "makeitso");
+
+ /* We give ownership of the request to the connection */
+ if (evhttp_make_request(evcon, req, EVHTTP_REQ_GET, "/test") == -1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ event_dispatch();
+
+ evhttp_free(http);
+
+ if (test_ok != 1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+}
+
+void
+http_suite(void)
+{
+ http_base_test();
+ http_bad_header_test();
+ http_parse_query_test();
+ http_basic_test();
+ http_connection_test(0 /* not-persistent */);
+ http_connection_test(1 /* persistent */);
+ http_close_detection(0 /* with delay */);
+ http_close_detection(1 /* with delay */);
+ http_post_test();
+ http_failure_test();
+ http_highport_test();
+ http_dispatcher_test();
+
+ http_multi_line_header_test();
+ http_negative_content_length_test();
+
+ http_chunked_test();
+}
diff --git a/libevent/test/regress_rpc.c b/libevent/test/regress_rpc.c
new file mode 100644
index 00000000000..760934766a1
--- /dev/null
+++ b/libevent/test/regress_rpc.c
@@ -0,0 +1,631 @@
+/*
+ * Copyright (c) 2003-2006 Niels Provos <provos@citi.umich.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef WIN32
+#include <winsock2.h>
+#include <windows.h>
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#include <sys/queue.h>
+#ifndef WIN32
+#include <sys/socket.h>
+#include <signal.h>
+#include <unistd.h>
+#include <netdb.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "event.h"
+#include "evhttp.h"
+#include "log.h"
+#include "evrpc.h"
+
+#include "regress.gen.h"
+
+void rpc_suite(void);
+
+extern int test_ok;
+
+static struct evhttp *
+http_setup(short *pport)
+{
+ int i;
+ struct evhttp *myhttp;
+ short port = -1;
+
+ /* Try a few different ports */
+ for (i = 0; i < 50; ++i) {
+ myhttp = evhttp_start("127.0.0.1", 8080 + i);
+ if (myhttp != NULL) {
+ port = 8080 + i;
+ break;
+ }
+ }
+
+ if (port == -1)
+ event_errx(1, "Could not start web server");
+
+ *pport = port;
+ return (myhttp);
+}
+
+EVRPC_HEADER(Message, msg, kill);
+EVRPC_HEADER(NeverReply, msg, kill);
+
+EVRPC_GENERATE(Message, msg, kill);
+EVRPC_GENERATE(NeverReply, msg, kill);
+
+static int need_input_hook = 0;
+static int need_output_hook = 0;
+
+static void
+MessageCb(EVRPC_STRUCT(Message)* rpc, void *arg)
+{
+ struct kill* kill_reply = rpc->reply;
+
+ if (need_input_hook) {
+ struct evhttp_request* req = EVRPC_REQUEST_HTTP(rpc);
+ const char *header = evhttp_find_header(
+ req->input_headers, "X-Hook");
+ assert(strcmp(header, "input") == 0);
+ }
+
+ /* we just want to fill in some non-sense */
+ EVTAG_ASSIGN(kill_reply, weapon, "dagger");
+ EVTAG_ASSIGN(kill_reply, action, "wave around like an idiot");
+
+ /* no reply to the RPC */
+ EVRPC_REQUEST_DONE(rpc);
+}
+
+static EVRPC_STRUCT(NeverReply) *saved_rpc;
+
+static void
+NeverReplyCb(EVRPC_STRUCT(NeverReply)* rpc, void *arg)
+{
+ test_ok += 1;
+ saved_rpc = rpc;
+}
+
+static void
+rpc_setup(struct evhttp **phttp, short *pport, struct evrpc_base **pbase)
+{
+ short port;
+ struct evhttp *http = NULL;
+ struct evrpc_base *base = NULL;
+
+ http = http_setup(&port);
+ base = evrpc_init(http);
+
+ EVRPC_REGISTER(base, Message, msg, kill, MessageCb, NULL);
+ EVRPC_REGISTER(base, NeverReply, msg, kill, NeverReplyCb, NULL);
+
+ *phttp = http;
+ *pport = port;
+ *pbase = base;
+
+ need_input_hook = 0;
+ need_output_hook = 0;
+}
+
+static void
+rpc_teardown(struct evrpc_base *base)
+{
+ assert(EVRPC_UNREGISTER(base, Message) == 0);
+ assert(EVRPC_UNREGISTER(base, NeverReply) == 0);
+
+ evrpc_free(base);
+}
+
+static void
+rpc_postrequest_failure(struct evhttp_request *req, void *arg)
+{
+ if (req->response_code != HTTP_SERVUNAVAIL) {
+
+ fprintf(stderr, "FAILED (response code)\n");
+ exit(1);
+ }
+
+ test_ok = 1;
+ event_loopexit(NULL);
+}
+
+/*
+ * Test a malformed payload submitted as an RPC
+ */
+
+static void
+rpc_basic_test(void)
+{
+ short port;
+ struct evhttp *http = NULL;
+ struct evrpc_base *base = NULL;
+ struct evhttp_connection *evcon = NULL;
+ struct evhttp_request *req = NULL;
+
+ fprintf(stdout, "Testing Basic RPC Support: ");
+
+ rpc_setup(&http, &port, &base);
+
+ evcon = evhttp_connection_new("127.0.0.1", port);
+ if (evcon == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /*
+ * At this point, we want to schedule an HTTP POST request
+ * server using our make request method.
+ */
+
+ req = evhttp_request_new(rpc_postrequest_failure, NULL);
+ if (req == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /* Add the information that we care about */
+ evhttp_add_header(req->output_headers, "Host", "somehost");
+ evbuffer_add_printf(req->output_buffer, "Some Nonsense");
+
+ if (evhttp_make_request(evcon, req,
+ EVHTTP_REQ_POST,
+ "/.rpc.Message") == -1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ test_ok = 0;
+
+ event_dispatch();
+
+ evhttp_connection_free(evcon);
+
+ rpc_teardown(base);
+
+ if (test_ok != 1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+
+ evhttp_free(http);
+}
+
+static void
+rpc_postrequest_done(struct evhttp_request *req, void *arg)
+{
+ struct kill* kill_reply = NULL;
+
+ if (req->response_code != HTTP_OK) {
+
+ fprintf(stderr, "FAILED (response code)\n");
+ exit(1);
+ }
+
+ kill_reply = kill_new();
+
+ if ((kill_unmarshal(kill_reply, req->input_buffer)) == -1) {
+ fprintf(stderr, "FAILED (unmarshal)\n");
+ exit(1);
+ }
+
+ kill_free(kill_reply);
+
+ test_ok = 1;
+ event_loopexit(NULL);
+}
+
+static void
+rpc_basic_message(void)
+{
+ short port;
+ struct evhttp *http = NULL;
+ struct evrpc_base *base = NULL;
+ struct evhttp_connection *evcon = NULL;
+ struct evhttp_request *req = NULL;
+ struct msg *msg;
+
+ fprintf(stdout, "Testing Good RPC Post: ");
+
+ rpc_setup(&http, &port, &base);
+
+ evcon = evhttp_connection_new("127.0.0.1", port);
+ if (evcon == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /*
+ * At this point, we want to schedule an HTTP POST request
+ * server using our make request method.
+ */
+
+ req = evhttp_request_new(rpc_postrequest_done, NULL);
+ if (req == NULL) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ /* Add the information that we care about */
+ evhttp_add_header(req->output_headers, "Host", "somehost");
+
+ /* set up the basic message */
+ msg = msg_new();
+ EVTAG_ASSIGN(msg, from_name, "niels");
+ EVTAG_ASSIGN(msg, to_name, "tester");
+ msg_marshal(req->output_buffer, msg);
+ msg_free(msg);
+
+ if (evhttp_make_request(evcon, req,
+ EVHTTP_REQ_POST,
+ "/.rpc.Message") == -1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ test_ok = 0;
+
+ event_dispatch();
+
+ evhttp_connection_free(evcon);
+
+ rpc_teardown(base);
+
+ if (test_ok != 1) {
+ fprintf(stdout, "FAILED\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+
+ evhttp_free(http);
+}
+
+static struct evrpc_pool *
+rpc_pool_with_connection(short port)
+{
+ struct evhttp_connection *evcon;
+ struct evrpc_pool *pool;
+
+ pool = evrpc_pool_new(NULL);
+ assert(pool != NULL);
+
+ evcon = evhttp_connection_new("127.0.0.1", port);
+ assert(evcon != NULL);
+
+ evrpc_pool_add_connection(pool, evcon);
+
+ return (pool);
+}
+
+static void
+GotKillCb(struct evrpc_status *status,
+ struct msg *msg, struct kill *kill, void *arg)
+{
+ char *weapon;
+ char *action;
+
+ if (need_output_hook) {
+ struct evhttp_request *req = status->http_req;
+ const char *header = evhttp_find_header(
+ req->input_headers, "X-Pool-Hook");
+ assert(strcmp(header, "ran") == 0);
+ }
+
+ if (status->error != EVRPC_STATUS_ERR_NONE)
+ goto done;
+
+ if (EVTAG_GET(kill, weapon, &weapon) == -1) {
+ fprintf(stderr, "get weapon\n");
+ goto done;
+ }
+ if (EVTAG_GET(kill, action, &action) == -1) {
+ fprintf(stderr, "get action\n");
+ goto done;
+ }
+
+ if (strcmp(weapon, "dagger"))
+ goto done;
+
+ if (strcmp(action, "wave around like an idiot"))
+ goto done;
+
+ test_ok += 1;
+
+done:
+ event_loopexit(NULL);
+}
+
+static void
+GotKillCbTwo(struct evrpc_status *status,
+ struct msg *msg, struct kill *kill, void *arg)
+{
+ char *weapon;
+ char *action;
+
+ if (status->error != EVRPC_STATUS_ERR_NONE)
+ goto done;
+
+ if (EVTAG_GET(kill, weapon, &weapon) == -1) {
+ fprintf(stderr, "get weapon\n");
+ goto done;
+ }
+ if (EVTAG_GET(kill, action, &action) == -1) {
+ fprintf(stderr, "get action\n");
+ goto done;
+ }
+
+ if (strcmp(weapon, "dagger"))
+ goto done;
+
+ if (strcmp(action, "wave around like an idiot"))
+ goto done;
+
+ test_ok += 1;
+
+done:
+ if (test_ok == 2)
+ event_loopexit(NULL);
+}
+
+static int
+rpc_hook_add_header(struct evhttp_request *req,
+ struct evbuffer *evbuf, void *arg)
+{
+ const char *hook_type = arg;
+ if (strcmp("input", hook_type) == 0)
+ evhttp_add_header(req->input_headers, "X-Hook", hook_type);
+ else
+ evhttp_add_header(req->output_headers, "X-Hook", hook_type);
+ return (0);
+}
+
+static int
+rpc_hook_remove_header(struct evhttp_request *req,
+ struct evbuffer *evbuf, void *arg)
+{
+ const char *header = evhttp_find_header(req->input_headers, "X-Hook");
+ assert(header != NULL);
+ assert(strcmp(header, arg) == 0);
+ evhttp_remove_header(req->input_headers, "X-Hook");
+ evhttp_add_header(req->input_headers, "X-Pool-Hook", "ran");
+
+ return (0);
+}
+
+static void
+rpc_basic_client(void)
+{
+ short port;
+ struct evhttp *http = NULL;
+ struct evrpc_base *base = NULL;
+ struct evrpc_pool *pool = NULL;
+ struct msg *msg;
+ struct kill *kill;
+
+ fprintf(stdout, "Testing RPC Client: ");
+
+ rpc_setup(&http, &port, &base);
+
+ need_input_hook = 1;
+ need_output_hook = 1;
+
+ assert(evrpc_add_hook(base, EVRPC_INPUT, rpc_hook_add_header, (void*)"input")
+ != NULL);
+ assert(evrpc_add_hook(base, EVRPC_OUTPUT, rpc_hook_add_header, (void*)"output")
+ != NULL);
+
+ pool = rpc_pool_with_connection(port);
+
+ assert(evrpc_add_hook(pool, EVRPC_INPUT, rpc_hook_remove_header, (void*)"output"));
+
+ /* set up the basic message */
+ msg = msg_new();
+ EVTAG_ASSIGN(msg, from_name, "niels");
+ EVTAG_ASSIGN(msg, to_name, "tester");
+
+ kill = kill_new();
+
+ EVRPC_MAKE_REQUEST(Message, pool, msg, kill, GotKillCb, NULL);
+
+ test_ok = 0;
+
+ event_dispatch();
+
+ if (test_ok != 1) {
+ fprintf(stdout, "FAILED (1)\n");
+ exit(1);
+ }
+
+ /* we do it twice to make sure that reuse works correctly */
+ kill_clear(kill);
+
+ EVRPC_MAKE_REQUEST(Message, pool, msg, kill, GotKillCb, NULL);
+
+ event_dispatch();
+
+ rpc_teardown(base);
+
+ if (test_ok != 2) {
+ fprintf(stdout, "FAILED (2)\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+
+ msg_free(msg);
+ kill_free(kill);
+
+ evrpc_pool_free(pool);
+ evhttp_free(http);
+}
+
+/*
+ * We are testing that the second requests gets send over the same
+ * connection after the first RPCs completes.
+ */
+static void
+rpc_basic_queued_client(void)
+{
+ short port;
+ struct evhttp *http = NULL;
+ struct evrpc_base *base = NULL;
+ struct evrpc_pool *pool = NULL;
+ struct msg *msg;
+ struct kill *kill_one, *kill_two;
+
+ fprintf(stdout, "Testing RPC (Queued) Client: ");
+
+ rpc_setup(&http, &port, &base);
+
+ pool = rpc_pool_with_connection(port);
+
+ /* set up the basic message */
+ msg = msg_new();
+ EVTAG_ASSIGN(msg, from_name, "niels");
+ EVTAG_ASSIGN(msg, to_name, "tester");
+
+ kill_one = kill_new();
+ kill_two = kill_new();
+
+ EVRPC_MAKE_REQUEST(Message, pool, msg, kill_one, GotKillCbTwo, NULL);
+ EVRPC_MAKE_REQUEST(Message, pool, msg, kill_two, GotKillCb, NULL);
+
+ test_ok = 0;
+
+ event_dispatch();
+
+ rpc_teardown(base);
+
+ if (test_ok != 2) {
+ fprintf(stdout, "FAILED (1)\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+
+ msg_free(msg);
+ kill_free(kill_one);
+ kill_free(kill_two);
+
+ evrpc_pool_free(pool);
+ evhttp_free(http);
+}
+
+static void
+GotErrorCb(struct evrpc_status *status,
+ struct msg *msg, struct kill *kill, void *arg)
+{
+ if (status->error != EVRPC_STATUS_ERR_TIMEOUT)
+ goto done;
+
+ /* should never be complete but just to check */
+ if (kill_complete(kill) == 0)
+ goto done;
+
+ test_ok += 1;
+
+done:
+ event_loopexit(NULL);
+}
+
+static void
+rpc_client_timeout(void)
+{
+ short port;
+ struct evhttp *http = NULL;
+ struct evrpc_base *base = NULL;
+ struct evrpc_pool *pool = NULL;
+ struct msg *msg;
+ struct kill *kill;
+
+ fprintf(stdout, "Testing RPC Client Timeout: ");
+
+ rpc_setup(&http, &port, &base);
+
+ pool = rpc_pool_with_connection(port);
+
+ /* set the timeout to 5 seconds */
+ evrpc_pool_set_timeout(pool, 5);
+
+ /* set up the basic message */
+ msg = msg_new();
+ EVTAG_ASSIGN(msg, from_name, "niels");
+ EVTAG_ASSIGN(msg, to_name, "tester");
+
+ kill = kill_new();
+
+ EVRPC_MAKE_REQUEST(NeverReply, pool, msg, kill, GotErrorCb, NULL);
+
+ test_ok = 0;
+
+ event_dispatch();
+
+ /* free the saved RPC structure up */
+ EVRPC_REQUEST_DONE(saved_rpc);
+
+ rpc_teardown(base);
+
+ if (test_ok != 2) {
+ fprintf(stdout, "FAILED (1)\n");
+ exit(1);
+ }
+
+ fprintf(stdout, "OK\n");
+
+ msg_free(msg);
+ kill_free(kill);
+
+ evrpc_pool_free(pool);
+ evhttp_free(http);
+}
+
+void
+rpc_suite(void)
+{
+ rpc_basic_test();
+ rpc_basic_message();
+ rpc_basic_client();
+ rpc_basic_queued_client();
+ rpc_client_timeout();
+}
diff --git a/libevent/test/test-eof.c b/libevent/test/test-eof.c
new file mode 100644
index 00000000000..4fc1a19f224
--- /dev/null
+++ b/libevent/test/test-eof.c
@@ -0,0 +1,82 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o time-test time-test.c -L/usr/local/lib -levent
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#ifdef WIN32
+#include <winsock2.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <event.h>
+#include <evutil.h>
+
+int test_okay = 1;
+int called = 0;
+
+static void
+read_cb(int fd, short event, void *arg)
+{
+ char buf[256];
+ int len;
+
+ len = read(fd, buf, sizeof(buf));
+
+ printf("%s: read %d%s\n", __func__,
+ len, len ? "" : " - means EOF");
+
+ if (len) {
+ if (!called)
+ event_add(arg, NULL);
+ } else if (called == 1)
+ test_okay = 0;
+
+ called++;
+}
+
+#ifndef SHUT_WR
+#define SHUT_WR 1
+#endif
+
+int
+main (int argc, char **argv)
+{
+ struct event ev;
+ const char *test = "test string";
+ int pair[2];
+
+ if (evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == -1)
+ return (1);
+
+
+ write(pair[0], test, strlen(test)+1);
+ shutdown(pair[0], SHUT_WR);
+
+ /* Initalize the event library */
+ event_init();
+
+ /* Initalize one event */
+ event_set(&ev, pair[1], EV_READ, read_cb, &ev);
+
+ event_add(&ev, NULL);
+
+ event_dispatch();
+
+ return (test_okay);
+}
+
diff --git a/libevent/test/test-init.c b/libevent/test/test-init.c
new file mode 100644
index 00000000000..c368715fd67
--- /dev/null
+++ b/libevent/test/test-init.c
@@ -0,0 +1,33 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o time-test time-test.c -L/usr/local/lib -levent
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <event.h>
+
+int
+main(int argc, char **argv)
+{
+ /* Initalize the event library */
+ event_init();
+
+ return (0);
+}
+
diff --git a/libevent/test/test-time.c b/libevent/test/test-time.c
new file mode 100644
index 00000000000..a847d55ef38
--- /dev/null
+++ b/libevent/test/test-time.c
@@ -0,0 +1,82 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o time-test time-test.c -L/usr/local/lib -levent
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <event.h>
+
+int called = 0;
+
+#define NEVENT 20000
+
+struct event *ev[NEVENT];
+
+static int
+rand_int(int n)
+{
+#ifdef WIN32
+ return (int)(rand() * n);
+#else
+ return (int)(random() % n);
+#endif
+}
+
+static void
+time_cb(int fd, short event, void *arg)
+{
+ struct timeval tv;
+ int i, j;
+
+ called++;
+
+ if (called < 10*NEVENT) {
+ for (i = 0; i < 10; i++) {
+ j = rand_int(NEVENT);
+ tv.tv_sec = 0;
+ tv.tv_usec = rand_int(50000);
+ if (tv.tv_usec % 2)
+ evtimer_add(ev[j], &tv);
+ else
+ evtimer_del(ev[j]);
+ }
+ }
+}
+
+int
+main (int argc, char **argv)
+{
+ struct timeval tv;
+ int i;
+
+ /* Initalize the event library */
+ event_init();
+
+ for (i = 0; i < NEVENT; i++) {
+ ev[i] = malloc(sizeof(struct event));
+
+ /* Initalize one event */
+ evtimer_set(ev[i], time_cb, ev[i]);
+ tv.tv_sec = 0;
+ tv.tv_usec = rand_int(50000);
+ evtimer_add(ev[i], &tv);
+ }
+
+ event_dispatch();
+
+ return (called < NEVENT);
+}
+
diff --git a/libevent/test/test-weof.c b/libevent/test/test-weof.c
new file mode 100644
index 00000000000..5d87ceb8eb7
--- /dev/null
+++ b/libevent/test/test-weof.c
@@ -0,0 +1,80 @@
+/*
+ * Compile with:
+ * cc -I/usr/local/include -o time-test time-test.c -L/usr/local/lib -levent
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+
+#ifdef WIN32
+#include <winsock2.h>
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#ifdef HAVE_SYS_SOCKET_H
+#include <sys/socket.h>
+#endif
+#include <fcntl.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include <event.h>
+#include <evutil.h>
+
+int pair[2];
+int test_okay = 1;
+int called = 0;
+
+static void
+write_cb(int fd, short event, void *arg)
+{
+ const char *test = "test string";
+ int len;
+
+ len = write(fd, test, strlen(test) + 1);
+
+ printf("%s: write %d%s\n", __func__,
+ len, len ? "" : " - means EOF");
+
+ if (len > 0) {
+ if (!called)
+ event_add(arg, NULL);
+ close(pair[0]);
+ } else if (called == 1)
+ test_okay = 0;
+
+ called++;
+}
+
+int
+main (int argc, char **argv)
+{
+ struct event ev;
+
+#ifndef WIN32
+ if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
+ return (1);
+#endif
+
+ if (evutil_socketpair(AF_UNIX, SOCK_STREAM, 0, pair) == -1)
+ return (1);
+
+ /* Initalize the event library */
+ event_init();
+
+ /* Initalize one event */
+ event_set(&ev, pair[1], EV_WRITE, write_cb, &ev);
+
+ event_add(&ev, NULL);
+
+ event_dispatch();
+
+ return (test_okay);
+}
+
diff --git a/libevent/test/test.sh b/libevent/test/test.sh
new file mode 100644
index 00000000000..506a1988c34
--- /dev/null
+++ b/libevent/test/test.sh
@@ -0,0 +1,91 @@
+#!/bin/sh
+
+setup () {
+ EVENT_NOKQUEUE=yes; export EVENT_NOKQUEUE
+ EVENT_NODEVPOLL=yes; export EVENT_NODEVPOLL
+ EVENT_NOPOLL=yes; export EVENT_NOPOLL
+ EVENT_NOSELECT=yes; export EVENT_NOSELECT
+ EVENT_NOEPOLL=yes; export EVENT_NOEPOLL
+ EVENT_NOEVPORT=yes; export EVENT_NOEVPORT
+}
+
+test () {
+ if ./test-init 2>/dev/null ;
+ then
+ true
+ else
+ echo Skipping test
+ return
+ fi
+
+echo -n " test-eof: "
+if ./test-eof >/dev/null ;
+then
+ echo OKAY ;
+else
+ echo FAILED ;
+fi
+echo -n " test-weof: "
+if ./test-weof >/dev/null ;
+then
+ echo OKAY ;
+else
+ echo FAILED ;
+fi
+echo -n " test-time: "
+if ./test-time >/dev/null ;
+then
+ echo OKAY ;
+else
+ echo FAILED ;
+fi
+echo -n " regress: "
+if ./regress >/dev/null ;
+then
+ echo OKAY ;
+else
+ echo FAILED ;
+fi
+}
+
+echo "Running tests:"
+
+# Need to do this by hand?
+setup
+unset EVENT_NOKQUEUE
+export EVENT_NOKQUEUE
+echo "KQUEUE"
+test
+
+setup
+unset EVENT_NODEVPOLL
+export EVENT_NODEVPOLL
+echo "DEVPOLL"
+test
+
+setup
+unset EVENT_NOPOLL
+export EVENT_NOPOLL
+echo "POLL"
+test
+
+setup
+unset EVENT_NOSELECT
+export EVENT_NOSELECT
+echo "SELECT"
+test
+
+setup
+unset EVENT_NOEPOLL
+export EVENT_NOEPOLL
+echo "EPOLL"
+test
+
+setup
+unset EVENT_NOEVPORT
+export EVENT_NOEVPORT
+echo "EVPORT"
+test
+
+
+
diff --git a/libmysql/CMakeLists.txt b/libmysql/CMakeLists.txt
index 9996b167323..e178d546590 100644
--- a/libmysql/CMakeLists.txt
+++ b/libmysql/CMakeLists.txt
@@ -324,7 +324,8 @@ SET(CLIENT_SOURCES
../sql-common/client.c
../sql-common/mysql_async.c
../sql-common/my_time.c
- ../sql-common/client_plugin.c
+ ../sql-common/client_plugin.c
+ ../sql-common/client_authentication.cc
../sql/net_serv.cc
../sql-common/pack.c
../sql/password.c
@@ -334,7 +335,7 @@ ADD_CONVENIENCE_LIBRARY(clientlib ${CLIENT_SOURCES})
DTRACE_INSTRUMENT(clientlib)
ADD_DEPENDENCIES(clientlib GenError)
-SET(LIBS clientlib dbug strings vio mysys ${ZLIB_LIBRARY} ${SSL_LIBRARIES} ${LIBDL})
+SET(LIBS clientlib dbug strings vio mysys mysys_ssl ${ZLIB_LIBRARY} ${SSL_LIBRARIES} ${LIBDL})
# Merge several convenience libraries into one big mysqlclient
# and link them together into shared library.
diff --git a/libmysql/errmsg.c b/libmysql/errmsg.c
index 4c4485f7ec4..9985fa2233c 100644
--- a/libmysql/errmsg.c
+++ b/libmysql/errmsg.c
@@ -85,6 +85,8 @@ const char *client_errors[]=
"The number of columns in the result set differs from the number of bound buffers. You must reset the statement, rebind the result set columns, and execute the statement again",
"This handle is already connected. Use a separate handle for each connection.",
"Authentication plugin '%s' cannot be loaded: %s",
+ "There is an attribute with the same name already",
+ "Authentication plugin '%s' reported error: %s",
""
};
diff --git a/libmysql/libmysql.c b/libmysql/libmysql.c
index 69fce429ab9..251c8f29b70 100644
--- a/libmysql/libmysql.c
+++ b/libmysql/libmysql.c
@@ -1139,7 +1139,7 @@ void my_net_local_init(NET *net)
my_net_set_read_timeout(net, CLIENT_NET_READ_TIMEOUT);
my_net_set_write_timeout(net, CLIENT_NET_WRITE_TIMEOUT);
net->retry_count= 1;
- net->max_packet_size= max(net_buffer_length, max_allowed_packet);
+ net->max_packet_size= MY_MAX(net_buffer_length, max_allowed_packet);
}
/*
@@ -3228,7 +3228,7 @@ static void fetch_string_with_conversion(MYSQL_BIND *param, char *value,
copy_length= end - start;
/* We've got some data beyond offset: copy up to buffer_length bytes */
if (param->buffer_length)
- memcpy(buffer, start, min(copy_length, param->buffer_length));
+ memcpy(buffer, start, MY_MIN(copy_length, param->buffer_length));
}
else
copy_length= 0;
@@ -3455,7 +3455,7 @@ static void fetch_float_with_conversion(MYSQL_BIND *param, MYSQL_FIELD *field,
size_t len;
if (field->decimals >= NOT_FIXED_DEC)
len= my_gcvt(value, type,
- (int) min(sizeof(buff)-1, param->buffer_length),
+ (int) MY_MIN(sizeof(buff)-1, param->buffer_length),
buff, NULL);
else
len= my_fcvt(value, (int) field->decimals, buff, NULL);
@@ -3765,7 +3765,7 @@ static void fetch_result_bin(MYSQL_BIND *param,
uchar **row)
{
ulong length= net_field_length(row);
- ulong copy_length= min(length, param->buffer_length);
+ ulong copy_length= MY_MIN(length, param->buffer_length);
memcpy(param->buffer, (char *)*row, copy_length);
*param->length= length;
*param->error= copy_length < length;
@@ -3777,7 +3777,7 @@ static void fetch_result_str(MYSQL_BIND *param,
uchar **row)
{
ulong length= net_field_length(row);
- ulong copy_length= min(length, param->buffer_length);
+ ulong copy_length= MY_MIN(length, param->buffer_length);
memcpy(param->buffer, (char *)*row, copy_length);
/* Add an end null if there is room in the buffer */
if (copy_length != param->buffer_length)
diff --git a/mysys/CMakeLists.txt b/mysys/CMakeLists.txt
index 05606942d8e..a1aa5e38580 100644
--- a/mysys/CMakeLists.txt
+++ b/mysys/CMakeLists.txt
@@ -15,15 +15,14 @@
INCLUDE_DIRECTORIES(${ZLIB_INCLUDE_DIR} ${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/mysys)
-SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c
+SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c my_default.c
errors.c hash.c list.c
- md5.c md5_compute.cc
mf_cache.c mf_dirname.c mf_fn_ext.c
mf_format.c mf_getdate.c mf_iocache.c mf_iocache2.c mf_keycache.c
mf_keycaches.c mf_loadpath.c mf_pack.c mf_path.c mf_qsort.c mf_qsort2.c
mf_radix.c mf_same.c mf_sort.c mf_soundex.c mf_arr_appstr.c mf_tempdir.c
mf_tempfile.c mf_unixpath.c mf_wcomp.c mulalloc.c my_access.c
- my_aes.c my_alloc.c my_bit.c my_bitmap.c my_chsize.c
+ my_alloc.c my_bit.c my_bitmap.c my_chsize.c
my_compress.c my_copy.c my_create.c my_delete.c
my_div.c my_error.c my_file.c my_fopen.c my_fstream.c
my_gethwaddr.c my_getopt.c my_getsystime.c my_getwd.c my_compare.c my_init.c
@@ -33,7 +32,7 @@ SET(MYSYS_SOURCES array.c charset-def.c charset.c checksum.c default.c
my_static.c my_symlink.c my_symlink2.c my_sync.c my_thr_init.c
my_basename.c
my_write.c ptr_cmp.c queues.c stacktrace.c
- rijndael.c sha1.c string.c thr_alarm.c thr_lock.c thr_mutex.c
+ string.c thr_alarm.c thr_lock.c thr_mutex.c
thr_rwlock.c tree.c typelib.c base64.c my_memmem.c my_getpagesize.c
lf_alloc-pin.c lf_dynarray.c lf_hash.c
safemalloc.c my_new.cc
diff --git a/mysys/array.c b/mysys/array.c
index 60f2202f5b3..cf377f77676 100644
--- a/mysys/array.c
+++ b/mysys/array.c
@@ -48,7 +48,7 @@ my_bool my_init_dynamic_array2(DYNAMIC_ARRAY *array, uint element_size,
DBUG_ENTER("my_init_dynamic_array2");
if (!alloc_increment)
{
- alloc_increment=max((8192-MALLOC_OVERHEAD)/element_size,16);
+ alloc_increment=MY_MAX((8192-MALLOC_OVERHEAD)/element_size,16);
if (init_alloc > 8 && alloc_increment > init_alloc * 2)
alloc_increment=init_alloc*2;
}
@@ -333,7 +333,7 @@ void delete_dynamic_element(DYNAMIC_ARRAY *array, uint idx)
void freeze_size(DYNAMIC_ARRAY *array)
{
- uint elements=max(array->elements,1);
+ uint elements=MY_MAX(array->elements,1);
/*
Do nothing if we are using a static buffer
diff --git a/mysys/lf_alloc-pin.c b/mysys/lf_alloc-pin.c
index 6ab6ba3aae0..788537ec87b 100644
--- a/mysys/lf_alloc-pin.c
+++ b/mysys/lf_alloc-pin.c
@@ -287,7 +287,7 @@ struct st_harvester {
static int harvest_pins(LF_PINS *el, struct st_harvester *hv)
{
int i;
- LF_PINS *el_end= el+min(hv->npins, LF_DYNARRAY_LEVEL_LENGTH);
+ LF_PINS *el_end= el+MY_MIN(hv->npins, LF_DYNARRAY_LEVEL_LENGTH);
for (; el < el_end; el++)
{
for (i= 0; i < LF_PINBOX_PINS; i++)
diff --git a/mysys/lf_dynarray.c b/mysys/lf_dynarray.c
index 3d072fd063e..16a77c0fa1a 100644
--- a/mysys/lf_dynarray.c
+++ b/mysys/lf_dynarray.c
@@ -124,7 +124,7 @@ void *_lf_dynarray_lvalue(LF_DYNARRAY *array, uint idx)
{
uchar *alloc, *data;
alloc= my_malloc(LF_DYNARRAY_LEVEL_LENGTH * array->size_of_element +
- max(array->size_of_element, sizeof(void *)),
+ MY_MAX(array->size_of_element, sizeof(void *)),
MYF(MY_WME|MY_ZEROFILL));
if (unlikely(!alloc))
return(NULL);
diff --git a/mysys/ma_dyncol.c b/mysys/ma_dyncol.c
index 8a224f1c5e8..33627b85f92 100644
--- a/mysys/ma_dyncol.c
+++ b/mysys/ma_dyncol.c
@@ -3853,20 +3853,19 @@ mariadb_dyncol_val_str(DYNAMIC_STRING *str, DYNAMIC_COLUMN_VALUE *val,
if (!quote)
{
/* convert to the destination */
- str->length+= copy_and_convert_extended(str->str, bufflen,
- cs,
- from, (uint32)len,
- val->x.string.charset,
- &dummy_errors);
+ str->length+= my_convert(str->str, bufflen,
+ cs,
+ from, (uint32)len,
+ val->x.string.charset,
+ &dummy_errors);
return ER_DYNCOL_OK;
}
if ((alloc= (char *)my_malloc(bufflen, MYF(0))))
{
- len=
- copy_and_convert_extended(alloc, bufflen, cs,
- from, (uint32)len,
- val->x.string.charset,
- &dummy_errors);
+ len= my_convert(alloc, bufflen, cs,
+ from, (uint32)len,
+ val->x.string.charset,
+ &dummy_errors);
from= alloc;
}
else
diff --git a/mysys/mf_dirname.c b/mysys/mf_dirname.c
index 569293f5401..bc827f60d44 100644
--- a/mysys/mf_dirname.c
+++ b/mysys/mf_dirname.c
@@ -78,7 +78,7 @@ size_t dirname_part(char *to, const char *name, size_t *to_res_length)
SYNPOSIS
convert_dirname()
to Store result here. Must be at least of size
- min(FN_REFLEN, strlen(from) + 1) to make room
+ MY_MIN(FN_REFLEN, strlen(from) + 1) to make room
for adding FN_LIBCHAR at the end.
from Original filename. May be == to
from_end Pointer at end of filename (normally end \0)
diff --git a/mysys/mf_format.c b/mysys/mf_format.c
index 2b2356c08df..3b5b0aa8a4a 100644
--- a/mysys/mf_format.c
+++ b/mysys/mf_format.c
@@ -85,7 +85,7 @@ char * fn_format(char * to, const char *name, const char *dir,
tmp_length= strlength(startpos);
DBUG_PRINT("error",("dev: '%s' ext: '%s' length: %u",dev,ext,
(uint) length));
- (void) strmake(to,startpos,min(tmp_length,FN_REFLEN-1));
+ (void) strmake(to,startpos,MY_MIN(tmp_length,FN_REFLEN-1));
}
else
{
diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c
index 02e5c5373ae..3fa6ec28f7d 100644
--- a/mysys/mf_iocache.c
+++ b/mysys/mf_iocache.c
@@ -1127,7 +1127,7 @@ static void copy_to_read_buffer(IO_CACHE *write_cache,
*/
while (write_length)
{
- size_t copy_length= min(write_length, write_cache->buffer_length);
+ size_t copy_length= MY_MIN(write_length, write_cache->buffer_length);
int __attribute__((unused)) rc;
rc= lock_io_cache(write_cache, write_cache->pos_in_file);
@@ -1285,7 +1285,7 @@ read_append_buffer:
TODO: figure out if the assert below is needed or correct.
*/
DBUG_ASSERT(pos_in_file == info->end_of_file);
- copy_len=min(Count, len_in_buff);
+ copy_len=MY_MIN(Count, len_in_buff);
memcpy(Buffer, info->append_read_pos, copy_len);
info->append_read_pos += copy_len;
Count -= copy_len;
@@ -1394,7 +1394,7 @@ int _my_b_async_read(register IO_CACHE *info, uchar *Buffer, size_t Count)
}
#endif
/* Copy found bytes to buffer */
- length=min(Count,read_length);
+ length=MY_MIN(Count,read_length);
memcpy(Buffer,info->read_pos,(size_t) length);
Buffer+=length;
Count-=length;
@@ -1428,7 +1428,7 @@ int _my_b_async_read(register IO_CACHE *info, uchar *Buffer, size_t Count)
if ((read_length=mysql_file_read(info->file,info->request_pos,
read_length, info->myflags)) == (size_t) -1)
return info->error= -1;
- use_length=min(Count,read_length);
+ use_length=MY_MIN(Count,read_length);
memcpy(Buffer,info->request_pos,(size_t) use_length);
info->read_pos=info->request_pos+Count;
info->read_end=info->request_pos+read_length;
diff --git a/mysys/my_aes.c b/mysys/my_aes.c
deleted file mode 100644
index 575d4702dee..00000000000
--- a/mysys/my_aes.c
+++ /dev/null
@@ -1,227 +0,0 @@
-/* Copyright (C) 2002 MySQL AB
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-
-
-/*
- Implementation of AES Encryption for MySQL
- Initial version by Peter Zaitsev June 2002
-*/
-
-
-#include <my_global.h>
-#include <m_string.h>
-#include "my_aes.h"
-
-enum encrypt_dir { AES_ENCRYPT, AES_DECRYPT };
-
-#define AES_BLOCK_SIZE 16 /* Block size in bytes */
-
-#define AES_BAD_DATA -1 /* If bad data discovered during decoding */
-
-
-/* The structure for key information */
-typedef struct {
- int nr; /* Number of rounds */
- uint32 rk[4*(AES_MAXNR + 1)]; /* key schedule */
-} KEYINSTANCE;
-
-
-/*
- This is internal function just keeps joint code of Key generation
-
- SYNOPSIS
- my_aes_create_key()
- aes_key Address of Key Instance to be created
- direction Direction (are we encoding or decoding)
- key Key to use for real key creation
- key_length Length of the key
-
- DESCRIPTION
-
- RESULT
- 0 ok
- -1 Error Note: The current impementation never returns this
-*/
-
-static int my_aes_create_key(KEYINSTANCE *aes_key,
- enum encrypt_dir direction, const char *key,
- int key_length)
-{
- uint8 rkey[AES_KEY_LENGTH/8]; /* The real key to be used for encryption */
- uint8 *rkey_end=rkey+AES_KEY_LENGTH/8; /* Real key boundary */
- uint8 *ptr; /* Start of the real key*/
- const char *sptr; /* Start of the working key */
- const char *key_end=key+key_length; /* Working key boundary*/
-
- bzero((char*) rkey,AES_KEY_LENGTH/8); /* Set initial key */
-
- for (ptr= rkey, sptr= key; sptr < key_end; ptr++,sptr++)
- {
- if (ptr == rkey_end)
- ptr= rkey; /* Just loop over tmp_key until we used all key */
- *ptr^= (uint8) *sptr;
- }
-#ifdef AES_USE_KEY_BITS
- /*
- This block is intended to allow more weak encryption if application
- build with libmysqld needs to correspond to export regulations
- It should be never used in normal distribution as does not give
- any speed improvement.
- To get worse security define AES_USE_KEY_BITS to number of bits
- you want key to be. It should be divisible by 8
-
- WARNING: Changing this value results in changing of enryption for
- all key lengths so altering this value will result in impossibility
- to decrypt data encrypted with previous value
- */
-#define AES_USE_KEY_BYTES (AES_USE_KEY_BITS/8)
- /*
- To get weaker key we use first AES_USE_KEY_BYTES bytes of created key
- and cyclically copy them until we created all required key length
- */
- for (ptr= rkey+AES_USE_KEY_BYTES, sptr=rkey ; ptr < rkey_end;
- ptr++,sptr++)
- {
- if (sptr == rkey+AES_USE_KEY_BYTES)
- sptr=rkey;
- *ptr=*sptr;
- }
-#endif
- if (direction == AES_DECRYPT)
- aes_key->nr = rijndaelKeySetupDec(aes_key->rk, rkey, AES_KEY_LENGTH);
- else
- aes_key->nr = rijndaelKeySetupEnc(aes_key->rk, rkey, AES_KEY_LENGTH);
- return 0;
-}
-
-
-/*
- Crypt buffer with AES encryption algorithm.
-
- SYNOPSIS
- my_aes_encrypt()
- source Pointer to data for encryption
- source_length Size of encryption data
- dest Buffer to place encrypted data (must be large enough)
- key Key to be used for encryption
- key_length Length of the key. Will handle keys of any length
-
- RETURN
- >= 0 Size of encrypted data
- < 0 Error
-*/
-
-int my_aes_encrypt(const char* source, int source_length, char* dest,
- const char* key, int key_length)
-{
- KEYINSTANCE aes_key;
- uint8 block[AES_BLOCK_SIZE]; /* 128 bit block used for padding */
- int rc; /* result codes */
- int num_blocks; /* number of complete blocks */
- char pad_len; /* pad size for the last block */
- int i;
-
- if ((rc= my_aes_create_key(&aes_key,AES_ENCRYPT,key,key_length)))
- return rc;
-
- num_blocks = source_length/AES_BLOCK_SIZE;
-
- for (i = num_blocks; i > 0; i--) /* Encode complete blocks */
- {
- rijndaelEncrypt(aes_key.rk, aes_key.nr, (const uint8*) source,
- (uint8*) dest);
- source+= AES_BLOCK_SIZE;
- dest+= AES_BLOCK_SIZE;
- }
-
- /* Encode the rest. We always have incomplete block */
- pad_len = AES_BLOCK_SIZE - (source_length - AES_BLOCK_SIZE*num_blocks);
- memcpy(block, source, 16 - pad_len);
- bfill(block + AES_BLOCK_SIZE - pad_len, pad_len, pad_len);
- rijndaelEncrypt(aes_key.rk, aes_key.nr, block, (uint8*) dest);
- return AES_BLOCK_SIZE*(num_blocks + 1);
-}
-
-
-/*
- DeCrypt buffer with AES encryption algorithm.
-
- SYNOPSIS
- my_aes_decrypt()
- source Pointer to data for decryption
- source_length Size of encrypted data
- dest Buffer to place decrypted data (must be large enough)
- key Key to be used for decryption
- key_length Length of the key. Will handle keys of any length
-
- RETURN
- >= 0 Size of encrypted data
- < 0 Error
-*/
-
-int my_aes_decrypt(const char *source, int source_length, char *dest,
- const char *key, int key_length)
-{
- KEYINSTANCE aes_key;
- uint8 block[AES_BLOCK_SIZE]; /* 128 bit block used for padding */
- int rc; /* Result codes */
- int num_blocks; /* Number of complete blocks */
- uint pad_len; /* Pad size for the last block */
- int i;
-
- if ((rc=my_aes_create_key(&aes_key,AES_DECRYPT,key,key_length)))
- return rc;
-
- num_blocks = source_length/AES_BLOCK_SIZE;
-
- if ((source_length != num_blocks*AES_BLOCK_SIZE) || num_blocks ==0 )
- return AES_BAD_DATA; /* Input size has to be even and at least one block */
-
- for (i = num_blocks-1; i > 0; i--) /* Decode all but last blocks */
- {
- rijndaelDecrypt(aes_key.rk, aes_key.nr, (const uint8*) source,
- (uint8*) dest);
- source+= AES_BLOCK_SIZE;
- dest+= AES_BLOCK_SIZE;
- }
-
- rijndaelDecrypt(aes_key.rk, aes_key.nr, (const uint8*) source, block);
- /* Use last char in the block as size */
- pad_len = (uint) (uchar) block[AES_BLOCK_SIZE-1];
-
- if (pad_len > AES_BLOCK_SIZE)
- return AES_BAD_DATA;
- /* We could also check whole padding but we do not really need this */
-
- memcpy(dest, block, AES_BLOCK_SIZE - pad_len);
- return AES_BLOCK_SIZE*num_blocks - pad_len;
-}
-
-
-/*
- Get size of buffer which will be large enough for encrypted data
-
- SYNOPSIS
- my_aes_get_size()
- source_length Length of data to be encrypted
-
- RETURN
- Size of buffer required to store encrypted data
-*/
-
-int my_aes_get_size(int source_length)
-{
- return AES_BLOCK_SIZE*(source_length/AES_BLOCK_SIZE)+AES_BLOCK_SIZE;
-}
diff --git a/mysys/my_alloc.c b/mysys/my_alloc.c
index 6c8a73df4a7..d61c7e171d0 100644
--- a/mysys/my_alloc.c
+++ b/mysys/my_alloc.c
@@ -228,7 +228,7 @@ void *alloc_root(MEM_ROOT *mem_root, size_t length)
{ /* Time to alloc new block */
block_size= (mem_root->block_size & ~1) * (mem_root->block_num >> 2);
get_size= length+ALIGN_SIZE(sizeof(USED_MEM));
- get_size= max(get_size, block_size);
+ get_size= MY_MAX(get_size, block_size);
if (!(next = (USED_MEM*) my_malloc(get_size,
MYF(MY_WME | ME_FATALERROR |
diff --git a/mysys/my_bitmap.c b/mysys/my_bitmap.c
index 83d03177eba..8b4dd83ab21 100644
--- a/mysys/my_bitmap.c
+++ b/mysys/my_bitmap.c
@@ -405,7 +405,7 @@ void bitmap_intersect(MY_BITMAP *map, const MY_BITMAP *map2)
DBUG_ASSERT(map->bitmap && map2->bitmap);
- end= to+min(len,len2);
+ end= to+MY_MIN(len,len2);
while (to < end)
*to++ &= *from++;
diff --git a/mysys/my_compare.c b/mysys/my_compare.c
index 9e192e52fb7..670d377d4a4 100644
--- a/mysys/my_compare.c
+++ b/mysys/my_compare.c
@@ -36,7 +36,7 @@ static int compare_bin(const uchar *a, uint a_length,
const uchar *b, uint b_length,
my_bool part_key, my_bool skip_end_space)
{
- uint length= min(a_length,b_length);
+ uint length= MY_MIN(a_length,b_length);
const uchar *end= a+ length;
int flag;
@@ -171,7 +171,7 @@ int ha_key_cmp(HA_KEYSEG *keyseg, const uchar *a,
continue; /* To next key part */
}
}
- end= a+ min(keyseg->length,key_length);
+ end= a+ MY_MIN(keyseg->length,key_length);
next_key_length=key_length-keyseg->length;
switch ((enum ha_base_keytype) keyseg->type) {
diff --git a/mysys/my_compress.c b/mysys/my_compress.c
index ea56900db05..6b223d2b354 100644
--- a/mysys/my_compress.c
+++ b/mysys/my_compress.c
@@ -311,7 +311,7 @@ int unpackfrm(uchar **unpack_data, size_t *unpack_len,
if (ver != 1)
DBUG_RETURN(1);
- if (!(data= my_malloc(max(orglen, complen), MYF(MY_WME))))
+ if (!(data= my_malloc(MY_MAX(orglen, complen), MYF(MY_WME))))
DBUG_RETURN(2);
memcpy(data, pack_data + BLOB_HEADER, complen);
diff --git a/mysys/my_conio.c b/mysys/my_conio.c
index 5dbd31193a9..67b1a319f57 100644
--- a/mysys/my_conio.c
+++ b/mysys/my_conio.c
@@ -165,13 +165,13 @@ char* my_cgets(char *buffer, size_t clen, size_t* plen)
though it is known it should not be more than 64K
so we cut 64K and try first size of screen buffer
if it is still to large we cut half of it and try again
- later we may want to cycle from min(clen, 65535) to allowed size
+ later we may want to cycle from MY_MIN(clen, 65535) to allowed size
with small decrement to determine exact allowed buffer
*/
- clen= min(clen, 65535);
+ clen= MY_MIN(clen, 65535);
do
{
- clen= min(clen, (size_t) csbi.dwSize.X*csbi.dwSize.Y);
+ clen= MY_MIN(clen, (size_t) csbi.dwSize.X*csbi.dwSize.Y);
if (!ReadConsole((HANDLE)my_coninpfh, (LPVOID)buffer, (DWORD) clen - 1, &plen_res,
NULL))
{
diff --git a/mysys/default.c b/mysys/my_default.c
index a90f428eca5..f03a22b598e 100644
--- a/mysys/default.c
+++ b/mysys/my_default.c
@@ -34,8 +34,9 @@
****************************************************************************/
#include "mysys_priv.h"
-#include "m_string.h"
-#include "m_ctype.h"
+#include <my_default.h>
+#include <m_string.h>
+#include <m_ctype.h>
#include <my_dir.h>
#ifdef __WIN__
#include <winbase.h>
@@ -899,7 +900,7 @@ static int search_default_file_with_ext(Process_option_func opt_handler,
for ( ; my_isspace(&my_charset_latin1,end[-1]) ; end--) ;
end[0]=0;
- strmake(curr_gr, ptr, min((size_t) (end-ptr)+1, sizeof(curr_gr)-1));
+ strmake(curr_gr, ptr, MY_MIN((size_t) (end-ptr)+1, sizeof(curr_gr)-1));
/* signal that a new group is found */
opt_handler(handler_ctx, curr_gr, NULL);
diff --git a/mysys/my_error.c b/mysys/my_error.c
index 08c67412fe1..1200385a43d 100644
--- a/mysys/my_error.c
+++ b/mysys/my_error.c
@@ -48,44 +48,73 @@
*/
static struct my_err_head
{
- struct my_err_head *meh_next; /* chain link */
- const char** (*get_errmsgs) (); /* returns error message format */
- int meh_first; /* error number matching array slot 0 */
- int meh_last; /* error number matching last slot */
-} my_errmsgs_globerrs = {NULL, get_global_errmsgs, EE_ERROR_FIRST, EE_ERROR_LAST};
+ struct my_err_head *meh_next; /* chain link */
+ const char** (*get_errmsgs)(); /* returns error message format */
+ uint meh_first; /* error number matching array slot 0 */
+ uint meh_last; /* error number matching last slot */
+} my_errmsgs_globerrs=
+{NULL, get_global_errmsgs, EE_ERROR_FIRST, EE_ERROR_LAST};
static struct my_err_head *my_errmsgs_list= &my_errmsgs_globerrs;
-/*
- Error message to user
+/**
+ @brief Get an error format string from one of the my_error_register()ed sets
+
+ @note
+ NULL values are possible even within a registered range.
- SYNOPSIS
- my_error()
- nr Errno
- MyFlags Flags
- ... variable list
+ @param nr Errno
+ @retval NULL if no message is registered for this error number
+ @retval str C-string
*/
-void my_error(int nr, myf MyFlags, ...)
+const char *my_get_err_msg(uint nr)
{
const char *format;
struct my_err_head *meh_p;
- va_list args;
- char ebuff[ERRMSGSIZE];
- DBUG_ENTER("my_error");
- DBUG_PRINT("my", ("nr: %d MyFlags: %lu errno: %d", nr, MyFlags, errno));
- /* Search for the error messages array, which could contain the message. */
+ /* Search for the range this error is in. */
for (meh_p= my_errmsgs_list; meh_p; meh_p= meh_p->meh_next)
if (nr <= meh_p->meh_last)
break;
- /* get the error message string. Default, if NULL or empty string (""). */
- if (! (format= (meh_p && (nr >= meh_p->meh_first)) ?
- meh_p->get_errmsgs()[nr - meh_p->meh_first] : NULL) || ! *format)
- (void) my_snprintf (ebuff, sizeof(ebuff), "Unknown error %d", nr);
+ /*
+ If we found the range this error number is in, get the format string.
+ If the string is empty, or a NULL pointer, or if we're out of return,
+ we return NULL.
+ */
+ if (!(format= (meh_p && (nr >= meh_p->meh_first)) ?
+ meh_p->get_errmsgs()[nr - meh_p->meh_first] : NULL) ||
+ !*format)
+ return NULL;
+
+ return format;
+}
+
+
+/**
+ Fill in and print a previously registered error message.
+
+ @note
+ Goes through the (sole) function registered in error_handler_hook
+
+ @param nr error number
+ @param MyFlags Flags
+ @param ... variable list matching that error format string
+*/
+
+void my_error(uint nr, myf MyFlags, ...)
+{
+ const char *format;
+ va_list args;
+ char ebuff[ERRMSGSIZE];
+ DBUG_ENTER("my_error");
+ DBUG_PRINT("my", ("nr: %d MyFlags: %lu errno: %d", nr, MyFlags, errno));
+
+ if (!(format = my_get_err_msg(nr)))
+ (void) my_snprintf(ebuff, sizeof(ebuff), "Unknown error %d", nr);
else
{
va_start(args,MyFlags);
@@ -98,15 +127,16 @@ void my_error(int nr, myf MyFlags, ...)
}
-/*
- Error as printf
-
- SYNOPSIS
- my_printf_error()
- error Errno
- format Format string
- MyFlags Flags
- ... variable list
+/**
+ Print an error message.
+
+ @note
+ Goes through the (sole) function registered in error_handler_hook
+
+ @param error error number
+ @param format format string
+ @param MyFlags Flags
+ @param ... variable list matching that error format string
*/
void my_printf_error(uint error, const char *format, myf MyFlags, ...)
@@ -125,15 +155,16 @@ void my_printf_error(uint error, const char *format, myf MyFlags, ...)
DBUG_VOID_RETURN;
}
-/*
- Error with va_list
-
- SYNOPSIS
- my_printv_error()
- error Errno
- format Format string
- MyFlags Flags
- ... variable list
+/**
+ Print an error message.
+
+ @note
+ Goes through the (sole) function registered in error_handler_hook
+
+ @param error error number
+ @param format format string
+ @param MyFlags Flags
+ @param ap variable list matching that error format string
*/
void my_printv_error(uint error, const char *format, myf MyFlags, va_list ap)
@@ -149,14 +180,15 @@ void my_printv_error(uint error, const char *format, myf MyFlags, va_list ap)
}
-/*
- Give message using error_handler_hook
+/**
+ Print an error message.
- SYNOPSIS
- my_message()
- error Errno
- str Error message
- MyFlags Flags
+ @note
+ Goes through the (sole) function registered in error_handler_hook
+
+ @param error error number
+ @param str error message
+ @param MyFlags Flags
*/
void my_message(uint error, const char *str, register myf MyFlags)
@@ -165,16 +197,11 @@ void my_message(uint error, const char *str, register myf MyFlags)
}
-/*
+/**
Register error messages for use with my_error().
- SYNOPSIS
- my_error_register()
- errmsgs array of pointers to error messages
- first error number of first message in the array
- last error number of last message in the array
+ @description
- DESCRIPTION
The pointer array is expected to contain addresses to NUL-terminated
C character strings. The array contains (last - first + 1) pointers.
NULL pointers and empty strings ("") are allowed. These will be mapped to
@@ -182,12 +209,15 @@ void my_message(uint error, const char *str, register myf MyFlags)
This function registers the error numbers 'first' to 'last'.
No overlapping with previously registered error numbers is allowed.
- RETURN
- 0 OK
- != 0 Error
+ @param errmsgs array of pointers to error messages
+ @param first error number of first message in the array
+ @param last error number of last message in the array
+
+ @retval 0 OK
+ @retval != 0 Error
*/
-int my_error_register(const char** (*get_errmsgs) (), int first, int last)
+int my_error_register(const char** (*get_errmsgs) (), uint first, uint last)
{
struct my_err_head *meh_p;
struct my_err_head **search_meh_pp;
@@ -223,28 +253,27 @@ int my_error_register(const char** (*get_errmsgs) (), int first, int last)
}
-/*
+/**
Unregister formerly registered error messages.
- SYNOPSIS
- my_error_unregister()
- first error number of first message
- last error number of last message
+ @description
- DESCRIPTION
This function unregisters the error numbers 'first' to 'last'.
These must have been previously registered by my_error_register().
'first' and 'last' must exactly match the registration.
If a matching registration is present, the header is removed from the
list and the pointer to the error messages pointers array is returned.
+ (The messages themselves are not released here as they may be static.)
Otherwise, NULL is returned.
- RETURN
- non-NULL OK, returns address of error messages pointers array.
- NULL Error, no such number range registered.
+ @param first error number of first message
+ @param last error number of last message
+
+ @retval NULL Error, no such number range registered.
+ @retval non-NULL OK, returns address of error messages pointers array.
*/
-const char **my_error_unregister(int first, int last)
+const char **my_error_unregister(uint first, uint last)
{
struct my_err_head *meh_p;
struct my_err_head **search_meh_pp;
@@ -274,6 +303,17 @@ const char **my_error_unregister(int first, int last)
}
+/**
+ Unregister all formerly registered error messages.
+
+ @description
+
+ This function unregisters all error numbers that previously have
+ been previously registered by my_error_register().
+ All headers are removed from the list; the messages themselves are
+ not released here as they may be static.
+*/
+
void my_error_unregister_all(void)
{
struct my_err_head *cursor, *saved_next;
diff --git a/mysys/my_file.c b/mysys/my_file.c
index 8d01285a94b..a23ab487d00 100644
--- a/mysys/my_file.c
+++ b/mysys/my_file.c
@@ -76,7 +76,7 @@ static uint set_max_open_files(uint max_file_limit)
static uint set_max_open_files(uint max_file_limit)
{
/* We don't know the limit. Return best guess */
- return min(max_file_limit, OS_FILE_LIMIT);
+ return MY_MIN(max_file_limit, OS_FILE_LIMIT);
}
#endif
@@ -99,7 +99,7 @@ uint my_set_max_open_files(uint files)
DBUG_PRINT("enter",("files: %u my_file_limit: %u", files, my_file_limit));
files+= MY_FILE_MIN;
- files= set_max_open_files(min(files, OS_FILE_LIMIT));
+ files= set_max_open_files(MY_MIN(files, OS_FILE_LIMIT));
if (files <= MY_NFILE)
DBUG_RETURN(files);
@@ -109,9 +109,9 @@ uint my_set_max_open_files(uint files)
/* Copy any initialized files */
memcpy((char*) tmp, (char*) my_file_info,
- sizeof(*tmp) * min(my_file_limit, files));
+ sizeof(*tmp) * MY_MIN(my_file_limit, files));
bzero((char*) (tmp + my_file_limit),
- max((int) (files- my_file_limit), 0)*sizeof(*tmp));
+ MY_MAX((int) (files- my_file_limit), 0)*sizeof(*tmp));
my_free_open_file_info(); /* Free if already allocated */
my_file_info= tmp;
my_file_limit= files;
diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c
index 7905ad90877..16808fe1986 100644
--- a/mysys/my_getopt.c
+++ b/mysys/my_getopt.c
@@ -15,6 +15,7 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include <my_global.h>
+#include <my_default.h>
#include <m_string.h>
#include <stdlib.h>
#include <my_sys.h>
diff --git a/mysys/my_rnd.c b/mysys/my_rnd.c
index 178bcd9c539..1c8de9dd7a5 100644
--- a/mysys/my_rnd.c
+++ b/mysys/my_rnd.c
@@ -14,6 +14,7 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
#include "mysys_priv.h"
+#include <my_rnd.h>
#include <m_string.h>
/*
@@ -53,3 +54,39 @@ double my_rnd(struct my_rnd_struct *rand_st)
rand_st->seed2=(rand_st->seed1+rand_st->seed2+33) % rand_st->max_value;
return (((double) rand_st->seed1)/rand_st->max_value_dbl);
}
+
+
+/**
+ Generate a random number using the OpenSSL/yaSSL supplied
+ random number generator if available.
+
+ @param rand_st [INOUT] Structure used for number generation
+ only if none of the SSL libraries are
+ available.
+
+ @retval Generated random number.
+*/
+
+double my_rnd_ssl(struct my_rnd_struct *rand_st)
+{
+
+#if defined(HAVE_YASSL) || defined(HAVE_OPENSSL)
+ int rc;
+ unsigned int res;
+
+#if defined(HAVE_YASSL)
+ rc= yaSSL::RAND_bytes((unsigned char *) &res, sizeof (unsigned int));
+#else
+ rc= RAND_bytes((unsigned char *) &res, sizeof (unsigned int));
+#endif /* HAVE_YASSL */
+
+ if (rc)
+ return (double)res / (double)UINT_MAX;
+#endif /* defined(HAVE_YASSL) || defined(HAVE_OPENSSL) */
+
+ return my_rnd(rand_st);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/mysys/my_thr_init.c b/mysys/my_thr_init.c
index 4e5bed84637..5034ff9b35f 100644
--- a/mysys/my_thr_init.c
+++ b/mysys/my_thr_init.c
@@ -376,12 +376,16 @@ void my_thread_end(void)
This must be done before trashing st_my_thread_var,
because the LF_HASH depends on it.
*/
- if (PSI_server)
- PSI_server->delete_current_thread();
+ PSI_THREAD_CALL(delete_current_thread)();
#endif
+ /*
+ We need to disable DBUG early for this thread to ensure that the
+ the mutex calls doesn't enable it again
+ To this we have to both do DBUG_POP() and also reset THR_KEY_mysys
+ as the key is used by DBUG.
+ */
DBUG_POP();
-
pthread_setspecific(THR_KEY_mysys,0);
if (tmp && tmp->init)
@@ -418,6 +422,10 @@ struct st_my_thread_var *_my_thread_var(void)
return my_pthread_getspecific(struct st_my_thread_var*,THR_KEY_mysys);
}
+int set_mysys_var(struct st_my_thread_var *mysys_var)
+{
+ return my_pthread_setspecific_ptr(THR_KEY_mysys, mysys_var);
+}
/****************************************************************************
Get name of current thread.
diff --git a/mysys/my_uuid.c b/mysys/my_uuid.c
index ab1b259ae0f..569616d09eb 100644
--- a/mysys/my_uuid.c
+++ b/mysys/my_uuid.c
@@ -40,6 +40,7 @@
*/
#include "mysys_priv.h"
+#include <my_rnd.h>
#include <m_string.h>
#include <myisampack.h> /* mi_int2store, mi_int4store */
@@ -151,7 +152,7 @@ void my_uuid(uchar *to)
/*
-1 so we won't make tv= uuid_time for nanoseq >= (tv - uuid_time)
*/
- delta= min(nanoseq, (ulong)(tv - uuid_time -1));
+ delta= MY_MIN(nanoseq, (ulong)(tv - uuid_time -1));
tv-= delta;
nanoseq-= delta;
}
diff --git a/mysys/psi_noop.c b/mysys/psi_noop.c
index 78629ca16d7..8c9f2773170 100644
--- a/mysys/psi_noop.c
+++ b/mysys/psi_noop.c
@@ -119,7 +119,8 @@ static void destroy_cond_noop(PSI_cond* cond NNN)
}
static PSI_socket*
-init_socket_noop(PSI_socket_key key NNN, const my_socket *fd NNN)
+init_socket_noop(PSI_socket_key key NNN, const my_socket *fd NNN,
+ const struct sockaddr *addr NNN, socklen_t addr_len NNN)
{
return NULL;
}
@@ -188,12 +189,12 @@ static int spawn_thread_noop(PSI_thread_key key NNN,
static PSI_thread*
new_thread_noop(PSI_thread_key key NNN,
- const void *identity NNN, ulong thread_id NNN)
+ const void *identity NNN, ulonglong thread_id NNN)
{
return NULL;
}
-static void set_thread_id_noop(PSI_thread *thread NNN, unsigned long id NNN)
+static void set_thread_id_noop(PSI_thread *thread NNN, ulonglong id NNN)
{
return;
}
@@ -401,16 +402,17 @@ static void end_table_lock_wait_noop(PSI_table_locker* locker NNN)
return;
}
-static PSI_file* start_file_open_wait_noop(PSI_file_locker *locker NNN,
- const char *src_file NNN,
- uint src_line NNN)
+static void start_file_open_wait_noop(PSI_file_locker *locker NNN,
+ const char *src_file NNN,
+ uint src_line NNN)
{
- return NULL;
+ return;
}
-static void end_file_open_wait_noop(PSI_file_locker *locker NNN)
+static PSI_file* end_file_open_wait_noop(PSI_file_locker *locker NNN,
+ void *result NNN)
{
- return;
+ return NULL;
}
static void end_file_open_wait_and_bind_to_descriptor_noop
@@ -433,6 +435,19 @@ static void end_file_wait_noop(PSI_file_locker *locker NNN,
return;
}
+static void start_file_close_wait_noop(PSI_file_locker *locker NNN,
+ const char *src_file NNN,
+ uint src_line NNN)
+{
+ return;
+}
+
+static void end_file_close_wait_noop(PSI_file_locker *locker NNN,
+ int result NNN)
+{
+ return;
+}
+
static void start_stage_noop(PSI_stage_key key NNN,
const char *src_file NNN, int src_line NNN)
{
@@ -446,7 +461,8 @@ static void end_stage_noop(void)
static PSI_statement_locker*
get_thread_statement_locker_noop(PSI_statement_locker_state *state NNN,
- PSI_statement_key key NNN)
+ PSI_statement_key key NNN,
+ const void *charset NNN)
{
return NULL;
}
@@ -621,6 +637,14 @@ digest_add_token_noop(PSI_digest_locker *locker NNN,
return NULL;
}
+static int
+set_thread_connect_attrs_noop(const char *buffer __attribute__((unused)),
+ uint length __attribute__((unused)),
+ const void *from_cs __attribute__((unused)))
+{
+ return 0;
+}
+
static PSI PSI_noop=
{
register_mutex_noop,
@@ -687,6 +711,8 @@ static PSI PSI_noop=
end_file_open_wait_and_bind_to_descriptor_noop,
start_file_wait_noop,
end_file_wait_noop,
+ start_file_close_wait_noop,
+ end_file_close_wait_noop,
start_stage_noop,
end_stage_noop,
get_thread_statement_locker_noop,
@@ -716,7 +742,8 @@ static PSI PSI_noop=
set_socket_info_noop,
set_socket_thread_owner_noop,
digest_start_noop,
- digest_add_token_noop
+ digest_add_token_noop,
+ set_thread_connect_attrs_noop
};
/**
diff --git a/mysys/rijndael.c b/mysys/rijndael.c
deleted file mode 100644
index e893a886726..00000000000
--- a/mysys/rijndael.c
+++ /dev/null
@@ -1,1379 +0,0 @@
-/* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
-
-
-/*
- Based on version 3.0 (December 2000)
-
- Optimised ANSI C code for the Rijndael cipher (now AES)
-
- author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
- author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
- author Paulo Barreto <paulo.barreto@terra.com.br>
-*/
-
-#include <my_global.h>
-#include "rijndael.h"
-
-/*
- Define the following to use fastest and much larger code (~10K extra code)
- #define FULL_UNROLL
-*/
-
-static const uint32 Te0[256]=
-{
- 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
- 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
- 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
- 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
- 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
- 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
- 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
- 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
- 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
- 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
- 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
- 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
- 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
- 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
- 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
- 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
- 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
- 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
- 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
- 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
- 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
- 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
- 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
- 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
- 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
- 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
- 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
- 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
- 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
- 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
- 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
- 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
- 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
- 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
- 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
- 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
- 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
- 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
- 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
- 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
- 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
- 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
- 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
- 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
- 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
- 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
- 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
- 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
- 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
- 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
- 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
- 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
- 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
- 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
- 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
- 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
- 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
- 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
- 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
- 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
- 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
- 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
- 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
- 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
-};
-
-static const uint32 Te1[256]=
-{
- 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
- 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
- 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
- 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
- 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
- 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
- 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
- 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
- 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
- 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
- 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
- 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
- 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
- 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
- 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
- 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
- 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
- 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
- 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
- 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
- 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
- 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
- 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
- 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
- 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
- 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
- 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
- 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
- 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
- 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
- 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
- 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
- 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
- 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
- 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
- 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
- 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
- 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
- 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
- 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
- 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
- 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
- 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
- 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
- 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
- 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
- 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
- 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
- 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
- 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
- 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
- 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
- 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
- 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
- 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
- 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
- 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
- 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
- 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
- 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
- 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
- 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
- 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
- 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
-};
-
-static const uint32 Te2[256]=
-{
- 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
- 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
- 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
- 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
- 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
- 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
- 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
- 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
- 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
- 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
- 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
- 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
- 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
- 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
- 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
- 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
- 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
- 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
- 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
- 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
- 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
- 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
- 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
- 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
- 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
- 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
- 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
- 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
- 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
- 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
- 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
- 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
- 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
- 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
- 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
- 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
- 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
- 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
- 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
- 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
- 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
- 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
- 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
- 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
- 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
- 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
- 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
- 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
- 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
- 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
- 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
- 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
- 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
- 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
- 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
- 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
- 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
- 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
- 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
- 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
- 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
- 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
- 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
- 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
-};
-
-static const uint32 Te3[256]=
-{
- 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
- 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
- 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
- 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
- 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
- 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
- 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
- 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
- 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
- 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
- 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
- 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
- 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
- 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
- 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
- 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
- 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
- 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
- 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
- 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
- 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
- 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
- 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
- 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
- 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
- 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
- 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
- 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
- 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
- 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
- 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
- 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
- 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
- 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
- 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
- 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
- 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
- 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
- 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
- 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
- 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
- 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
- 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
- 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
- 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
- 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
- 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
- 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
- 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
- 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
- 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
- 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
- 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
- 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
- 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
- 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
- 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
- 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
- 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
- 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
- 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
- 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
- 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
- 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
-};
-
-static const uint32 Te4[256]=
-{
- 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
- 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
- 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
- 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
- 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
- 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
- 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
- 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
- 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
- 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
- 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
- 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
- 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
- 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
- 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
- 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
- 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
- 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
- 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
- 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
- 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
- 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
- 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
- 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
- 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
- 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
- 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
- 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
- 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
- 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
- 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
- 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
- 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
- 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
- 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
- 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
- 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
- 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
- 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
- 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
- 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
- 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
- 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
- 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
- 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
- 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
- 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
- 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
- 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
- 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
- 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
- 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
- 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
- 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
- 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
- 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
- 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
- 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
- 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
- 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
- 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
- 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
- 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
- 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
-};
-
-static const uint32 Td0[256]=
-{
- 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
- 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
- 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
- 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
- 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
- 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
- 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
- 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
- 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
- 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
- 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
- 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
- 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
- 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
- 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
- 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
- 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
- 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
- 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
- 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
- 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
- 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
- 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
- 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
- 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
- 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
- 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
- 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
- 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
- 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
- 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
- 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
- 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
- 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
- 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
- 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
- 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
- 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
- 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
- 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
- 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
- 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
- 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
- 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
- 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
- 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
- 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
- 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
- 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
- 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
- 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
- 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
- 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
- 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
- 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
- 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
- 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
- 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
- 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
- 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
- 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
- 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
- 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
- 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
-};
-
-static const uint32 Td1[256]=
-{
- 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
- 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
- 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
- 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
- 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
- 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
- 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
- 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
- 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
- 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
- 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
- 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
- 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
- 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
- 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
- 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
- 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
- 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
- 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
- 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
- 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
- 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
- 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
- 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
- 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
- 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
- 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
- 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
- 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
- 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
- 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
- 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
- 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
- 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
- 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
- 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
- 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
- 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
- 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
- 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
- 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
- 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
- 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
- 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
- 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
- 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
- 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
- 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
- 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
- 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
- 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
- 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
- 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
- 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
- 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
- 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
- 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
- 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
- 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
- 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
- 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
- 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
- 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
- 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
-};
-
-static const uint32 Td2[256]=
-{
- 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
- 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
- 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
- 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
- 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
- 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
- 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
- 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
- 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
- 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
- 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
- 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
- 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
- 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
- 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
- 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
- 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
- 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
- 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
- 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
-
- 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
- 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
- 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
- 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
- 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
- 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
- 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
- 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
- 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
- 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
- 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
- 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
- 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
- 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
- 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
- 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
- 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
- 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
- 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
- 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
- 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
- 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
- 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
- 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
- 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
- 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
- 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
- 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
- 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
- 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
- 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
- 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
- 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
- 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
- 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
- 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
- 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
- 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
- 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
- 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
- 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
- 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
- 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
- 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
-};
-
-static const uint32 Td3[256]=
-{
- 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
- 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
- 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
- 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
- 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
- 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
- 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
- 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
- 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
- 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
- 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
- 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
- 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
- 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
- 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
- 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
- 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
- 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
- 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
- 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
- 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
- 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
- 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
- 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
- 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
- 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
- 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
- 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
- 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
- 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
- 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
- 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
- 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
- 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
- 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
- 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
- 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
- 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
- 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
- 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
- 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
- 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
- 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
- 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
- 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
- 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
- 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
- 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
- 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
- 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
- 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
- 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
- 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
- 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
- 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
- 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
- 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
- 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
- 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
- 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
- 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
- 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
- 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
- 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
-};
-
-static const uint32 Td4[256]=
-{
- 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
- 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
- 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
- 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
- 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
- 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
- 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
- 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
- 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
- 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
- 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
- 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
- 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
- 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
- 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
- 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
- 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
- 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
- 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
- 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
- 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
- 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
- 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
- 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
- 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
- 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
- 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
- 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
- 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
- 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
- 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
- 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
- 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
- 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
- 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
- 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
- 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
- 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
- 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
- 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
- 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
- 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
- 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
- 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
- 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
- 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
- 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
- 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
- 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
- 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
- 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
- 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
- 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
- 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
- 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
- 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
- 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
- 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
- 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
- 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
- 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
- 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
- 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
- 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
-};
-
-
-/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
-static const uint32 rcon[]=
-{
- 0x01000000, 0x02000000, 0x04000000, 0x08000000,
- 0x10000000, 0x20000000, 0x40000000, 0x80000000,
- 0x1B000000, 0x36000000,
-};
-
-#if defined(_MSC_VER) && defined(__i386__)
-
-#define RJ_SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
-#define GETuint32(p) RJ_SWAP(*((uint32 *)(p)))
-#define PUTuint32(ct, st) { *((uint32 *)(ct)) = RJ_SWAP((st)); }
-
-#else
-
-#define GETuint32(pt) (((uint32)(pt)[0] << 24) ^ ((uint32)(pt)[1] << 16)\
- ^ ((uint32)(pt)[2] << 8) ^ ((uint32)(pt)[3]))
-#define PUTuint32(ct, st) { (ct)[0] = (uint8)((st) >> 24); (ct)[1]\
-= (uint8)((st) >> 16); (ct)[2] = (uint8)((st) >> 8); (ct)[3] = (uint8)(st); }
-
-#endif /* defined(_MSC_VER) && defined(__i386__) */
-
-
-/*
- Expand the cipher key into the encryption key schedule.
-
- RETURN
- The number of rounds for the given cipher key size.
-*/
-
-int rijndaelKeySetupEnc(uint32 rk[/*4*(Nr + 1)*/], const uint8 cipherKey[],
- int keyBits)
-{
- int i = 0;
- uint32 temp;
-
- rk[0] = GETuint32(cipherKey );
- rk[1] = GETuint32(cipherKey + 4);
- rk[2] = GETuint32(cipherKey + 8);
- rk[3] = GETuint32(cipherKey + 12);
- if (keyBits == 128)
- {
- for (;;)
- {
- temp = rk[3];
- rk[4] = (rk[0] ^
- (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
- (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
- (Te4[(temp ) & 0xff] & 0x0000ff00) ^
- (Te4[(temp >> 24) ] & 0x000000ff) ^
- rcon[i]);
- rk[5] = rk[1] ^ rk[4];
- rk[6] = rk[2] ^ rk[5];
- rk[7] = rk[3] ^ rk[6];
- if (++i == 10)
- return 10;
- rk += 4;
- }
- }
- rk[4] = GETuint32(cipherKey + 16);
- rk[5] = GETuint32(cipherKey + 20);
- if (keyBits == 192)
- {
- for (;;)
- {
- temp = rk[ 5];
- rk[ 6] = (rk[ 0] ^
- (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
- (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
- (Te4[(temp ) & 0xff] & 0x0000ff00) ^
- (Te4[(temp >> 24) ] & 0x000000ff) ^
- rcon[i]);
- rk[ 7] = rk[ 1] ^ rk[ 6];
- rk[ 8] = rk[ 2] ^ rk[ 7];
- rk[ 9] = rk[ 3] ^ rk[ 8];
- if (++i == 8)
- {
- return 12;
- }
- rk[10] = rk[ 4] ^ rk[ 9];
- rk[11] = rk[ 5] ^ rk[10];
- rk += 6;
- }
- }
- rk[6] = GETuint32(cipherKey + 24);
- rk[7] = GETuint32(cipherKey + 28);
- if (keyBits == 256)
- {
- for (;;)
- {
- temp = rk[ 7];
- rk[ 8] = (rk[ 0] ^
- (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
- (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
- (Te4[(temp ) & 0xff] & 0x0000ff00) ^
- (Te4[(temp >> 24) ] & 0x000000ff) ^
- rcon[i]);
- rk[ 9] = rk[ 1] ^ rk[ 8];
- rk[10] = rk[ 2] ^ rk[ 9];
- rk[11] = rk[ 3] ^ rk[10];
- if (++i == 7)
- {
- return 14;
- }
- temp = rk[11];
- rk[12] = (rk[ 4] ^
- (Te4[(temp >> 24) ] & 0xff000000) ^
- (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(temp ) & 0xff] & 0x000000ff));
- rk[13] = rk[ 5] ^ rk[12];
- rk[14] = rk[ 6] ^ rk[13];
- rk[15] = rk[ 7] ^ rk[14];
- rk += 8;
- }
- }
- return 0;
-}
-
-
-/*
- Expand the cipher key into the decryption key schedule.
-
- RETURN
- The number of rounds for the given cipher key size.
-*/
-
-int rijndaelKeySetupDec(uint32 rk[/*4*(Nr + 1)*/], const uint8 cipherKey[],
- int keyBits)
-{
- int nr, i, j;
- uint32 temp;
-
- /* expand the cipher key: */
- nr = rijndaelKeySetupEnc(rk, cipherKey, keyBits);
- /* invert the order of the round keys: */
- for (i = 0, j = 4*nr; i < j; i += 4, j -= 4)
- {
- temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
- temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
- temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
- temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
- }
- /*
- Apply the inverse MixColumn transform to all round keys but the first
- and the last:
- */
- for (i = 1; i < nr; i++)
- {
- rk += 4;
-
- rk[0]= (
- Td0[Te4[(rk[0] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[0] ) & 0xff] & 0xff]);
-
- rk[1]= (Td0[Te4[(rk[1] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[1] ) & 0xff] & 0xff]);
-
- rk[2]= (Td0[Te4[(rk[2] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[2] ) & 0xff] & 0xff]);
-
- rk[3]= (Td0[Te4[(rk[3] >> 24) ] & 0xff] ^
- Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
- Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
- Td3[Te4[(rk[3] ) & 0xff] & 0xff]);
- }
- return nr;
-}
-
-
-void rijndaelEncrypt(const uint32 rk[/*4*(Nr + 1)*/], int Nr,
- const uint8 pt[16], uint8 ct[16])
-{
- uint32 s0, s1, s2, s3, t0, t1, t2, t3;
-#ifndef FULL_UNROLL
- int r;
-#endif /* FULL_UNROLL */
-
- /* map byte array block to cipher state and add initial round key: */
- s0 = GETuint32(pt ) ^ rk[0];
- s1 = GETuint32(pt + 4) ^ rk[1];
- s2 = GETuint32(pt + 8) ^ rk[2];
- s3 = GETuint32(pt + 12) ^ rk[3];
-
-#ifdef FULL_UNROLL
- /* round 1: */
- t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff]
- ^ Te3[s3 & 0xff] ^ rk[ 4]);
- t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff]
- ^ Te3[s0 & 0xff] ^ rk[ 5]);
- t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff]
- ^ Te3[s1 & 0xff] ^ rk[ 6]);
- t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff]
- ^ Te3[s2 & 0xff] ^ rk[ 7]);
-
- /* round 2: */
- s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff]
- ^ Te3[t3 & 0xff] ^ rk[ 8]);
- s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff]
- ^ Te3[t0 & 0xff] ^ rk[ 9]);
- s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff]
- ^ Te3[t1 & 0xff] ^ rk[10]);
- s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff]
- ^ Te3[t2 & 0xff] ^ rk[11]);
-
- /* round 3: */
- t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff]
- ^ Te3[s3 & 0xff] ^ rk[12]);
- t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff]
- ^ Te3[s0 & 0xff] ^ rk[13]);
- t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff]
- ^ Te3[s1 & 0xff] ^ rk[14]);
- t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff]
- ^ Te3[s2 & 0xff] ^ rk[15]);
-
- /* round 4: */
- s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff]
- ^ Te3[t3 & 0xff] ^ rk[16]);
- s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff]
- ^ Te3[t0 & 0xff] ^ rk[17]);
- s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff]
- ^ Te3[t1 & 0xff] ^ rk[18]);
- s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff]
- ^ Te3[t2 & 0xff] ^ rk[19]);
-
- /* round 5: */
- t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff]
- ^ Te3[s3 & 0xff] ^ rk[20]);
- t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff]
- ^ Te3[s0 & 0xff] ^ rk[21]);
- t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff]
- ^ Te3[s1 & 0xff] ^ rk[22]);
- t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff]
- ^ Te3[s2 & 0xff] ^ rk[23]);
-
- /* round 6: */
- s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff]
- ^ Te3[t3 & 0xff] ^ rk[24]);
- s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff]
- ^ Te3[t0 & 0xff] ^ rk[25]);
- s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff]
- ^ Te3[t1 & 0xff] ^ rk[26]);
- s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff]
- ^ Te3[t2 & 0xff] ^ rk[27]);
-
- /* round 7: */
- t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff]
- ^ Te3[s3 & 0xff] ^ rk[28]);
- t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff]
- ^ Te3[s0 & 0xff] ^ rk[29]);
- t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff]
- ^ Te3[s1 & 0xff] ^ rk[30]);
- t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff]
- ^ Te3[s2 & 0xff] ^ rk[31]);
-
- /* round 8: */
- s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff]
- ^ Te3[t3 & 0xff] ^ rk[32]);
- s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff]
- ^ Te3[t0 & 0xff] ^ rk[33]);
- s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff]
- ^ Te3[t1 & 0xff] ^ rk[34]);
- s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff]
- ^ Te3[t2 & 0xff] ^ rk[35]);
-
- /* round 9: */
- t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff]
- ^ Te3[s3 & 0xff] ^ rk[36]);
- t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff]
- ^ Te3[s0 & 0xff] ^ rk[37]);
- t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff]
- ^ Te3[s1 & 0xff] ^ rk[38]);
- t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff]
- ^ Te3[s2 & 0xff] ^ rk[39]);
-
- if (Nr > 10)
- {
- /* round 10: */
- s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff]
- ^ Te3[t3 & 0xff] ^ rk[40]);
- s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff]
- ^ Te3[t0 & 0xff] ^ rk[41]);
- s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff]
- ^ Te3[t1 & 0xff] ^ rk[42]);
- s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff]
- ^ Te3[t2 & 0xff] ^ rk[43]);
-
- /* round 11: */
- t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff]
- ^ Te3[s3 & 0xff] ^ rk[44]);
- t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff]
- ^ Te3[s0 & 0xff] ^ rk[45]);
- t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff]
- ^ Te3[s1 & 0xff] ^ rk[46]);
- t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff]
- ^ Te3[s2 & 0xff] ^ rk[47]);
-
- if (Nr > 12)
- {
- /* round 12: */
- s0= (Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff]
- ^ Te3[t3 & 0xff] ^ rk[48]);
- s1= (Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff]
- ^ Te3[t0 & 0xff] ^ rk[49]);
- s2= (Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff]
- ^ Te3[t1 & 0xff] ^ rk[50]);
- s3= (Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff]
- ^ Te3[t2 & 0xff] ^ rk[51]);
-
- /* round 13: */
- t0= (Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff]
- ^ Te3[s3 & 0xff] ^ rk[52]);
- t1= (Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff]
- ^ Te3[s0 & 0xff] ^ rk[53]);
- t2= (Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff]
- ^ Te3[s1 & 0xff] ^ rk[54]);
- t3= (Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff]
- ^ Te3[s2 & 0xff] ^ rk[55]);
- }
- }
- rk += Nr << 2;
-#else /* !FULL_UNROLL */
-
- /* Nr - 1 full rounds: */
-
- r = Nr >> 1;
- for (;;)
- {
- t0= (Te0[(s0 >> 24) ] ^
- Te1[(s1 >> 16) & 0xff] ^
- Te2[(s2 >> 8) & 0xff] ^
- Te3[(s3 ) & 0xff] ^
- rk[4]);
-
- t1= (Te0[(s1 >> 24) ] ^
- Te1[(s2 >> 16) & 0xff] ^
- Te2[(s3 >> 8) & 0xff] ^
- Te3[(s0 ) & 0xff] ^
- rk[5]);
-
- t2= (Te0[(s2 >> 24) ] ^
- Te1[(s3 >> 16) & 0xff] ^
- Te2[(s0 >> 8) & 0xff] ^
- Te3[(s1 ) & 0xff] ^
- rk[6]);
-
- t3= (Te0[(s3 >> 24) ] ^
- Te1[(s0 >> 16) & 0xff] ^
- Te2[(s1 >> 8) & 0xff] ^
- Te3[(s2 ) & 0xff] ^
- rk[7]);
-
- rk+= 8;
- if (--r == 0)
- break;
-
- s0= (Te0[(t0 >> 24) ] ^
- Te1[(t1 >> 16) & 0xff] ^
- Te2[(t2 >> 8) & 0xff] ^
- Te3[(t3 ) & 0xff] ^
- rk[0]);
-
- s1= (Te0[(t1 >> 24) ] ^
- Te1[(t2 >> 16) & 0xff] ^
- Te2[(t3 >> 8) & 0xff] ^
- Te3[(t0 ) & 0xff] ^
- rk[1]);
-
- s2= (Te0[(t2 >> 24) ] ^
- Te1[(t3 >> 16) & 0xff] ^
- Te2[(t0 >> 8) & 0xff] ^
- Te3[(t1 ) & 0xff] ^
- rk[2]);
-
- s3= (Te0[(t3 >> 24) ] ^
- Te1[(t0 >> 16) & 0xff] ^
- Te2[(t1 >> 8) & 0xff] ^
- Te3[(t2 ) & 0xff] ^
- rk[3]);
- }
-#endif /* FULL_UNROLL */
-
- /* Apply last round and map cipher state to byte array block: */
- s0= ((Te4[(t0 >> 24) ] & 0xff000000) ^
- (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t3 ) & 0xff] & 0x000000ff) ^
- rk[0]);
- PUTuint32(ct , s0);
-
- s1= ((Te4[(t1 >> 24) ] & 0xff000000) ^
- (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t0 ) & 0xff] & 0x000000ff) ^
- rk[1]);
- PUTuint32(ct + 4, s1);
-
- s2= ((Te4[(t2 >> 24) ] & 0xff000000) ^
- (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t1 ) & 0xff] & 0x000000ff) ^
- rk[2]);
- PUTuint32(ct + 8, s2);
-
- s3= ((Te4[(t3 >> 24) ] & 0xff000000) ^
- (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
- (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
- (Te4[(t2 ) & 0xff] & 0x000000ff) ^
- rk[3]);
- PUTuint32(ct + 12, s3);
-}
-
-
-void rijndaelDecrypt(const uint32 rk[/*4*(Nr + 1)*/], int Nr,
- const uint8 ct[16], uint8 pt[16])
-{
- uint32 s0, s1, s2, s3, t0, t1, t2, t3;
-#ifndef FULL_UNROLL
- int r;
-#endif /* FULL_UNROLL */
-
- /* Map byte array block to cipher state and add initial round key: */
-
- s0 = GETuint32(ct ) ^ rk[0];
- s1 = GETuint32(ct + 4) ^ rk[1];
- s2 = GETuint32(ct + 8) ^ rk[2];
- s3 = GETuint32(ct + 12) ^ rk[3];
-
-#ifdef FULL_UNROLL
- /* round 1: */
- t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff]
- ^ Td3[s1 & 0xff] ^ rk[ 4]);
- t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff]
- ^ Td3[s2 & 0xff] ^ rk[ 5]);
- t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff]
- ^ Td3[s3 & 0xff] ^ rk[ 6]);
- t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff]
- ^ Td3[s0 & 0xff] ^ rk[ 7]);
-
- /* round 2: */
- s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff]
- ^ Td3[t1 & 0xff] ^ rk[ 8]);
- s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff]
- ^ Td3[t2 & 0xff] ^ rk[ 9]);
- s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff]
- ^ Td3[t3 & 0xff] ^ rk[10]);
- s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff]
- ^ Td3[t0 & 0xff] ^ rk[11]);
-
- /* round 3: */
- t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff]
- ^ Td3[s1 & 0xff] ^ rk[12]);
- t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff]
- ^ Td3[s2 & 0xff] ^ rk[13]);
- t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff]
- ^ Td3[s3 & 0xff] ^ rk[14]);
- t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff]
- ^ Td3[s0 & 0xff] ^ rk[15]);
-
- /* round 4: */
- s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff]
- ^ Td3[t1 & 0xff] ^ rk[16]);
- s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff]
- ^ Td3[t2 & 0xff] ^ rk[17]);
- s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff]
- ^ Td3[t3 & 0xff] ^ rk[18]);
- s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff]
- ^ Td3[t0 & 0xff] ^ rk[19]);
-
- /* round 5: */
- t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff]
- ^ Td3[s1 & 0xff] ^ rk[20]);
- t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff]
- ^ Td3[s2 & 0xff] ^ rk[21]);
- t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff]
- ^ Td3[s3 & 0xff] ^ rk[22]);
- t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff]
- ^ Td3[s0 & 0xff] ^ rk[23]);
-
- /* round 6: */
- s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff]
- ^ Td3[t1 & 0xff] ^ rk[24]);
- s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff]
- ^ Td3[t2 & 0xff] ^ rk[25]);
- s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff]
- ^ Td3[t3 & 0xff] ^ rk[26]);
- s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff]
- ^ Td3[t0 & 0xff] ^ rk[27]);
-
- /* round 7: */
- t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff]
- ^ Td3[s1 & 0xff] ^ rk[28]);
- t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff]
- ^ Td3[s2 & 0xff] ^ rk[29]);
- t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff]
- ^ Td3[s3 & 0xff] ^ rk[30]);
- t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff]
- ^ Td3[s0 & 0xff] ^ rk[31]);
-
- /* round 8: */
- s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff]
- ^ Td3[t1 & 0xff] ^ rk[32]);
- s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff]
- ^ Td3[t2 & 0xff] ^ rk[33]);
- s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff]
- ^ Td3[t3 & 0xff] ^ rk[34]);
- s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff]
- ^ Td3[t0 & 0xff] ^ rk[35]);
-
- /* round 9: */
- t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff]
- ^ Td3[s1 & 0xff] ^ rk[36]);
- t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff]
- ^ Td3[s2 & 0xff] ^ rk[37]);
- t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff]
- ^ Td3[s3 & 0xff] ^ rk[38]);
- t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff]
- ^ Td3[s0 & 0xff] ^ rk[39]);
-
- if (Nr > 10)
- {
- /* round 10: */
- s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff]
- ^ Td3[t1 & 0xff] ^ rk[40]);
- s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff]
- ^ Td3[t2 & 0xff] ^ rk[41]);
- s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff]
- ^ Td3[t3 & 0xff] ^ rk[42]);
- s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff]
- ^ Td3[t0 & 0xff] ^ rk[43]);
-
- /* round 11: */
- t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff]
- ^ Td3[s1 & 0xff] ^ rk[44]);
- t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff]
- ^ Td3[s2 & 0xff] ^ rk[45]);
- t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff]
- ^ Td3[s3 & 0xff] ^ rk[46]);
- t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff]
- ^ Td3[s0 & 0xff] ^ rk[47]);
-
- if (Nr > 12)
- {
- /* round 12: */
- s0= (Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff]
- ^ Td3[t1 & 0xff] ^ rk[48]);
- s1= (Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff]
- ^ Td3[t2 & 0xff] ^ rk[49]);
- s2= (Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff]
- ^ Td3[t3 & 0xff] ^ rk[50]);
- s3= (Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff]
- ^ Td3[t0 & 0xff] ^ rk[51]);
-
- /* round 13: */
- t0= (Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff]
- ^ Td3[s1 & 0xff] ^ rk[52]);
- t1= (Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff]
- ^ Td3[s2 & 0xff] ^ rk[53]);
- t2= (Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff]
- ^ Td3[s3 & 0xff] ^ rk[54]);
- t3= (Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff]
- ^ Td3[s0 & 0xff] ^ rk[55]);
- }
- }
- rk += Nr << 2;
-#else /* !FULL_UNROLL */
-
- /* Nr - 1 full rounds: */
- r= (Nr >> 1);
- for (;;)
- {
- t0= (Td0[(s0 >> 24) ] ^
- Td1[(s3 >> 16) & 0xff] ^
- Td2[(s2 >> 8) & 0xff] ^
- Td3[(s1 ) & 0xff] ^
- rk[4]);
-
- t1= (Td0[(s1 >> 24) ] ^
- Td1[(s0 >> 16) & 0xff] ^
- Td2[(s3 >> 8) & 0xff] ^
- Td3[(s2 ) & 0xff] ^
- rk[5]);
-
- t2= (Td0[(s2 >> 24) ] ^
- Td1[(s1 >> 16) & 0xff] ^
- Td2[(s0 >> 8) & 0xff] ^
- Td3[(s3 ) & 0xff] ^
- rk[6]);
-
- t3= (Td0[(s3 >> 24) ] ^
- Td1[(s2 >> 16) & 0xff] ^
- Td2[(s1 >> 8) & 0xff] ^
- Td3[(s0 ) & 0xff] ^
- rk[7]);
-
- rk+= 8;
- if (--r == 0)
- break;
-
- s0= (Td0[(t0 >> 24) ] ^
- Td1[(t3 >> 16) & 0xff] ^
- Td2[(t2 >> 8) & 0xff] ^
- Td3[(t1 ) & 0xff] ^
- rk[0]);
-
- s1= (Td0[(t1 >> 24) ] ^
- Td1[(t0 >> 16) & 0xff] ^
- Td2[(t3 >> 8) & 0xff] ^
- Td3[(t2 ) & 0xff] ^
- rk[1]);
-
- s2= (Td0[(t2 >> 24) ] ^
- Td1[(t1 >> 16) & 0xff] ^
- Td2[(t0 >> 8) & 0xff] ^
- Td3[(t3 ) & 0xff] ^
- rk[2]);
-
- s3= (Td0[(t3 >> 24) ] ^
- Td1[(t2 >> 16) & 0xff] ^
- Td2[(t1 >> 8) & 0xff] ^
- Td3[(t0 ) & 0xff] ^
- rk[3]);
- }
-
-#endif /* FULL_UNROLL */
-
- /* Apply last round and map cipher state to byte array block: */
-
- s0= ((Td4[(t0 >> 24) ] & 0xff000000) ^
- (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t1 ) & 0xff] & 0x000000ff) ^
- rk[0]);
- PUTuint32(pt , s0);
-
- s1= ((Td4[(t1 >> 24) ] & 0xff000000) ^
- (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t2 ) & 0xff] & 0x000000ff) ^
- rk[1]);
- PUTuint32(pt + 4, s1);
-
- s2= ((Td4[(t2 >> 24) ] & 0xff000000) ^
- (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t3 ) & 0xff] & 0x000000ff) ^
- rk[2]);
- PUTuint32(pt + 8, s2);
-
- s3= ((Td4[(t3 >> 24) ] & 0xff000000) ^
- (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
- (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
- (Td4[(t0 ) & 0xff] & 0x000000ff) ^
- rk[3]);
- PUTuint32(pt + 12, s3);
-}
diff --git a/mysys/sha1.c b/mysys/sha1.c
deleted file mode 100644
index e5b33a9ad13..00000000000
--- a/mysys/sha1.c
+++ /dev/null
@@ -1,422 +0,0 @@
-/* Copyright (c) 2002, 2004, 2006 MySQL AB
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-*/
-
-/*
- Original Source from: http://www.faqs.org/rfcs/rfc3174.html
-
- Copyright (C) The Internet Society (2001). All Rights Reserved.
-
- This document and translations of it may be copied and furnished to
- others, and derivative works that comment on or otherwise explain it
- or assist in its implementation may be prepared, copied, published
- and distributed, in whole or in part, without restriction of any
- kind, provided that the above copyright notice and this paragraph are
- included on all such copies and derivative works. However, this
- document itself may not be modified in any way, such as by removing
- the copyright notice or references to the Internet Society or other
- Internet organizations, except as needed for the purpose of
- developing Internet standards in which case the procedures for
- copyrights defined in the Internet Standards process must be
- followed, or as required to translate it into languages other than
- English.
-
- The limited permissions granted above are perpetual and will not be
- revoked by the Internet Society or its successors or assigns.
-
- This document and the information contained herein is provided on an
- "AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
- TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
- BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
- HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
- MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
-
- Acknowledgement
- Funding for the RFC Editor function is currently provided by the
- Internet Society.
-
- DESCRIPTION
- This file implements the Secure Hashing Algorithm 1 as
- defined in FIPS PUB 180-1 published April 17, 1995.
-
- The SHA-1, produces a 160-bit message digest for a given data
- stream. It should take about 2**n steps to find a message with the
- same digest as a given message and 2**(n/2) to find any two
- messages with the same digest, when n is the digest size in bits.
- Therefore, this algorithm can serve as a means of providing a
- "fingerprint" for a message.
-
- PORTABILITY ISSUES
- SHA-1 is defined in terms of 32-bit "words". This code uses
- <stdint.h> (included via "sha1.h" to define 32 and 8 bit unsigned
- integer types. If your C compiler does not support 32 bit unsigned
- integers, this code is not appropriate.
-
- CAVEATS
- SHA-1 is designed to work with messages less than 2^64 bits long.
- Although SHA-1 allows a message digest to be generated for messages
- of any number of bits less than 2^64, this implementation only
- works with messages with a length that is a multiple of the size of
- an 8-bit character.
-
- CHANGES
- 2002 by Peter Zaitsev to
- - fit to new prototypes according to MySQL standard
- - Some optimizations
- - All checking is now done in debug only mode
- - More comments
-*/
-
-#include "my_global.h"
-#include "m_string.h"
-#include "sha1.h"
-
-/*
- Define the SHA1 circular left shift macro
-*/
-
-#define SHA1CircularShift(bits,word) \
- (((word) << (bits)) | ((word) >> (32-(bits))))
-
-/* Local Function Prototyptes */
-static void SHA1PadMessage(SHA1_CONTEXT*);
-static void SHA1ProcessMessageBlock(SHA1_CONTEXT*);
-
-
-/*
- Initialize SHA1Context
-
- SYNOPSIS
- mysql_sha1_reset()
- context [in/out] The context to reset.
-
- DESCRIPTION
- This function will initialize the SHA1Context in preparation
- for computing a new SHA1 message digest.
-
- RETURN
- SHA_SUCCESS ok
- != SHA_SUCCESS sha Error Code.
-*/
-
-
-const uint32 sha_const_key[5]=
-{
- 0x67452301,
- 0xEFCDAB89,
- 0x98BADCFE,
- 0x10325476,
- 0xC3D2E1F0
-};
-
-
-int mysql_sha1_reset(SHA1_CONTEXT *context)
-{
-#ifndef DBUG_OFF
- if (!context)
- return SHA_NULL;
-#endif
-
- context->Length = 0;
- context->Message_Block_Index = 0;
-
- context->Intermediate_Hash[0] = sha_const_key[0];
- context->Intermediate_Hash[1] = sha_const_key[1];
- context->Intermediate_Hash[2] = sha_const_key[2];
- context->Intermediate_Hash[3] = sha_const_key[3];
- context->Intermediate_Hash[4] = sha_const_key[4];
-
- context->Computed = 0;
- context->Corrupted = 0;
-
- return SHA_SUCCESS;
-}
-
-
-/*
- Return the 160-bit message digest into the array provided by the caller
-
- SYNOPSIS
- mysql_sha1_result()
- context [in/out] The context to use to calculate the SHA-1 hash.
- Message_Digest: [out] Where the digest is returned.
-
- DESCRIPTION
- NOTE: The first octet of hash is stored in the 0th element,
- the last octet of hash in the 19th element.
-
- RETURN
- SHA_SUCCESS ok
- != SHA_SUCCESS sha Error Code.
-*/
-
-int mysql_sha1_result(SHA1_CONTEXT *context,
- uint8 Message_Digest[SHA1_HASH_SIZE])
-{
- int i;
-
-#ifndef DBUG_OFF
- if (!context || !Message_Digest)
- return SHA_NULL;
-
- if (context->Corrupted)
- return context->Corrupted;
-#endif
-
- if (!context->Computed)
- {
- SHA1PadMessage(context);
- /* message may be sensitive, clear it out */
- bzero((char*) context->Message_Block,64);
- context->Length = 0; /* and clear length */
- context->Computed = 1;
- }
-
- for (i = 0; i < SHA1_HASH_SIZE; i++)
- Message_Digest[i] = (int8)((context->Intermediate_Hash[i>>2] >> 8
- * ( 3 - ( i & 0x03 ) )));
- return SHA_SUCCESS;
-}
-
-
-/*
- Accepts an array of octets as the next portion of the message.
-
- SYNOPSIS
- mysql_sha1_input()
- context [in/out] The SHA context to update
- message_array An array of characters representing the next portion
- of the message.
- length The length of the message in message_array
-
- RETURN
- SHA_SUCCESS ok
- != SHA_SUCCESS sha Error Code.
-*/
-
-int mysql_sha1_input(SHA1_CONTEXT *context, const uint8 *message_array,
- unsigned length)
-{
- if (!length)
- return SHA_SUCCESS;
-
-#ifndef DBUG_OFF
- /* We assume client konows what it is doing in non-debug mode */
- if (!context || !message_array)
- return SHA_NULL;
- if (context->Computed)
- return (context->Corrupted= SHA_STATE_ERROR);
- if (context->Corrupted)
- return context->Corrupted;
-#endif
-
- while (length--)
- {
- context->Message_Block[context->Message_Block_Index++]=
- (*message_array & 0xFF);
- context->Length += 8; /* Length is in bits */
-
-#ifndef DBUG_OFF
- /*
- Then we're not debugging we assume we never will get message longer
- 2^64 bits.
- */
- if (context->Length == 0)
- return (context->Corrupted= 1); /* Message is too long */
-#endif
-
- if (context->Message_Block_Index == 64)
- {
- SHA1ProcessMessageBlock(context);
- }
- message_array++;
- }
- return SHA_SUCCESS;
-}
-
-
-/*
- Process the next 512 bits of the message stored in the Message_Block array.
-
- SYNOPSIS
- SHA1ProcessMessageBlock()
-
- DESCRIPTION
- Many of the variable names in this code, especially the single
- character names, were used because those were the names used in
- the publication.
-*/
-
-/* Constants defined in SHA-1 */
-static const uint32 K[]=
-{
- 0x5A827999,
- 0x6ED9EBA1,
- 0x8F1BBCDC,
- 0xCA62C1D6
-};
-
-
-static void SHA1ProcessMessageBlock(SHA1_CONTEXT *context)
-{
- int t; /* Loop counter */
- uint32 temp; /* Temporary word value */
- uint32 W[80]; /* Word sequence */
- uint32 A, B, C, D, E; /* Word buffers */
- int idx;
-
- /*
- Initialize the first 16 words in the array W
- */
-
- for (t = 0; t < 16; t++)
- {
- idx=t*4;
- W[t] = context->Message_Block[idx] << 24;
- W[t] |= context->Message_Block[idx + 1] << 16;
- W[t] |= context->Message_Block[idx + 2] << 8;
- W[t] |= context->Message_Block[idx + 3];
- }
-
-
- for (t = 16; t < 80; t++)
- {
- W[t] = SHA1CircularShift(1,W[t-3] ^ W[t-8] ^ W[t-14] ^ W[t-16]);
- }
-
- A = context->Intermediate_Hash[0];
- B = context->Intermediate_Hash[1];
- C = context->Intermediate_Hash[2];
- D = context->Intermediate_Hash[3];
- E = context->Intermediate_Hash[4];
-
- for (t = 0; t < 20; t++)
- {
- temp= SHA1CircularShift(5,A) + ((B & C) | ((~B) & D)) + E + W[t] + K[0];
- E = D;
- D = C;
- C = SHA1CircularShift(30,B);
- B = A;
- A = temp;
- }
-
- for (t = 20; t < 40; t++)
- {
- temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[1];
- E = D;
- D = C;
- C = SHA1CircularShift(30,B);
- B = A;
- A = temp;
- }
-
- for (t = 40; t < 60; t++)
- {
- temp= (SHA1CircularShift(5,A) + ((B & C) | (B & D) | (C & D)) + E + W[t] +
- K[2]);
- E = D;
- D = C;
- C = SHA1CircularShift(30,B);
- B = A;
- A = temp;
- }
-
- for (t = 60; t < 80; t++)
- {
- temp = SHA1CircularShift(5,A) + (B ^ C ^ D) + E + W[t] + K[3];
- E = D;
- D = C;
- C = SHA1CircularShift(30,B);
- B = A;
- A = temp;
- }
-
- context->Intermediate_Hash[0] += A;
- context->Intermediate_Hash[1] += B;
- context->Intermediate_Hash[2] += C;
- context->Intermediate_Hash[3] += D;
- context->Intermediate_Hash[4] += E;
-
- context->Message_Block_Index = 0;
-}
-
-
-/*
- Pad message
-
- SYNOPSIS
- SHA1PadMessage()
- context: [in/out] The context to pad
-
- DESCRIPTION
- According to the standard, the message must be padded to an even
- 512 bits. The first padding bit must be a '1'. The last 64 bits
- represent the length of the original message. All bits in between
- should be 0. This function will pad the message according to
- those rules by filling the Message_Block array accordingly. It
- will also call the ProcessMessageBlock function provided
- appropriately. When it returns, it can be assumed that the message
- digest has been computed.
-
-*/
-
-static void SHA1PadMessage(SHA1_CONTEXT *context)
-{
- /*
- Check to see if the current message block is too small to hold
- the initial padding bits and length. If so, we will pad the
- block, process it, and then continue padding into a second
- block.
- */
-
- int i=context->Message_Block_Index;
-
- if (i > 55)
- {
- context->Message_Block[i++] = 0x80;
- bzero((char*) &context->Message_Block[i],
- sizeof(context->Message_Block[0])*(64-i));
- context->Message_Block_Index=64;
-
- /* This function sets context->Message_Block_Index to zero */
- SHA1ProcessMessageBlock(context);
-
- bzero((char*) &context->Message_Block[0],
- sizeof(context->Message_Block[0])*56);
- context->Message_Block_Index=56;
- }
- else
- {
- context->Message_Block[i++] = 0x80;
- bzero((char*) &context->Message_Block[i],
- sizeof(context->Message_Block[0])*(56-i));
- context->Message_Block_Index=56;
- }
-
- /*
- Store the message length as the last 8 octets
- */
-
- context->Message_Block[56] = (int8) (context->Length >> 56);
- context->Message_Block[57] = (int8) (context->Length >> 48);
- context->Message_Block[58] = (int8) (context->Length >> 40);
- context->Message_Block[59] = (int8) (context->Length >> 32);
- context->Message_Block[60] = (int8) (context->Length >> 24);
- context->Message_Block[61] = (int8) (context->Length >> 16);
- context->Message_Block[62] = (int8) (context->Length >> 8);
- context->Message_Block[63] = (int8) (context->Length);
-
- SHA1ProcessMessageBlock(context);
-}
diff --git a/mysys/stacktrace.c b/mysys/stacktrace.c
index 402520990b6..613911e4495 100644
--- a/mysys/stacktrace.c
+++ b/mysys/stacktrace.c
@@ -95,7 +95,7 @@ static int safe_print_str(const char *addr, int max_len)
/* Read up to the maximum number of bytes. */
while (total)
{
- count= min(sizeof(buf), total);
+ count= MY_MIN(sizeof(buf), total);
if ((nbytes= pread(fd, buf, count, offset)) < 0)
{
@@ -348,7 +348,7 @@ void my_print_stacktrace(uchar* stack_bottom, ulong thread_stack)
if (!stack_bottom || (uchar*) stack_bottom > (uchar*) &fp)
{
- ulong tmp= min(0x10000,thread_stack);
+ ulong tmp= MY_MIN(0x10000,thread_stack);
/* Assume that the stack starts at the previous even 65K */
stack_bottom= (uchar*) (((ulong) &fp + tmp) & ~(ulong) 0xFFFF);
my_safe_printf_stderr("Cannot determine thread, fp=%p, "
diff --git a/mysys/string.c b/mysys/string.c
index 1263e7824f9..42fe83ed4e1 100644
--- a/mysys/string.c
+++ b/mysys/string.c
@@ -223,77 +223,3 @@ void dynstr_reassociate(DYNAMIC_STRING *str, char **ptr, size_t *length,
*alloc_length= str->max_length;
str->str=0;
}
-
-
-/*
- copy a string from one character set to another
-
- SYNOPSIS
- copy_and_convert()
- to Store result here
- to_cs Character set of result string
- from Copy from here
- from_length Length of from string
- from_cs From character set
-
- NOTES
- 'to' must be big enough as form_length * to_cs->mbmaxlen
-
- RETURN
- length of bytes copied to 'to'
-*/
-
-uint32
-copy_and_convert_extended(char *to, uint32 to_length, CHARSET_INFO *to_cs,
- const char *from, uint32 from_length,
- CHARSET_INFO *from_cs,
- uint *errors)
-{
- int cnvres;
- my_wc_t wc;
- const uchar *from_end= (const uchar*) from+from_length;
- char *to_start= to;
- uchar *to_end= (uchar*) to+to_length;
- my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
- my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
- uint error_count= 0;
-
- while (1)
- {
- if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from,
- from_end)) > 0)
- from+= cnvres;
- else if (cnvres == MY_CS_ILSEQ)
- {
- error_count++;
- from++;
- wc= '?';
- }
- else if (cnvres > MY_CS_TOOSMALL)
- {
- /*
- A correct multibyte sequence detected
- But it doesn't have Unicode mapping.
- */
- error_count++;
- from+= (-cnvres);
- wc= '?';
- }
- else
- break; // Not enough characters
-
-outp:
- if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
- to+= cnvres;
- else if (cnvres == MY_CS_ILUNI && wc != '?')
- {
- error_count++;
- wc= '?';
- goto outp;
- }
- else
- break;
- }
- *errors= error_count;
- return (uint32) (to - to_start);
-}
diff --git a/mysys/testhash.c b/mysys/testhash.c
index ffdaaece770..3359b5dce29 100644
--- a/mysys/testhash.c
+++ b/mysys/testhash.c
@@ -79,7 +79,7 @@ static int do_test()
for (i=0 ; i < recant ; i++)
{
- n1=rnd(1000); n2=rnd(100); n3=rnd(min(recant*5,MAX_RECORDS));
+ n1=rnd(1000); n2=rnd(100); n3=rnd(MY_MIN(recant*5,MAX_RECORDS));
record= (char*) my_malloc(reclength,MYF(MY_FAE));
sprintf(record,"%6d:%4d:%8d:Pos: %4d ",n1,n2,n3,write_count);
if (my_hash_insert(&hash,record))
@@ -133,7 +133,7 @@ static int do_test()
printf("- Update\n");
for (i=0 ; i < write_count/10 ; i++)
{
- n1=rnd(1000); n2=rnd(100); n3=rnd(min(recant*2,MAX_RECORDS));
+ n1=rnd(1000); n2=rnd(100); n3=rnd(MY_MIN(recant*2,MAX_RECORDS));
for (j=rnd(1000) ; j>0 && key1[j] == 0 ; j--) ;
if (j)
{
diff --git a/mysys_ssl/CMakeLists.txt b/mysys_ssl/CMakeLists.txt
new file mode 100644
index 00000000000..b91988d1c8b
--- /dev/null
+++ b/mysys_ssl/CMakeLists.txt
@@ -0,0 +1,48 @@
+# Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include
+ ${CMAKE_SOURCE_DIR}/mysys_ssl
+ ${SSL_INCLUDE_DIRS})
+
+IF(SSL_DEFINES)
+ADD_DEFINITIONS(${SSL_DEFINES})
+ENDIF()
+
+# We do RESTRICT_SYMBOL_EXPORTS(yassl) elsewhere.
+# In order to get correct symbol visibility, these files
+# must be compiled with "-fvisibility=hidden"
+IF(WITH_SSL STREQUAL "bundled" AND HAVE_VISIBILITY_HIDDEN)
+ SET_SOURCE_FILES_PROPERTIES(
+ crypt_genhash_impl.cc
+ my_aes.cc
+ my_md5.cc
+ my_sha1.cc
+ my_sha2.cc
+ PROPERTIES COMPILE_FLAGS "-fvisibility=hidden")
+ENDIF()
+
+SET(MYSYS_SSL_SOURCES
+ crypt_genhash_impl.cc
+ my_aes.cc
+ my_sha1.cc
+ my_sha2.cc
+ my_md5.cc
+ my_rnd.cc
+ )
+
+ADD_CONVENIENCE_LIBRARY(mysys_ssl ${MYSYS_SSL_SOURCES})
+TARGET_LINK_LIBRARIES(mysys_ssl dbug strings ${SSL_LIBRARIES})
+DTRACE_INSTRUMENT(mysys_ssl)
diff --git a/mysys_ssl/CTestTestfile.cmake b/mysys_ssl/CTestTestfile.cmake
new file mode 100644
index 00000000000..fc98399082f
--- /dev/null
+++ b/mysys_ssl/CTestTestfile.cmake
@@ -0,0 +1,6 @@
+# CMake generated Testfile for
+# Source directory: /my/maria-10.0-merge/mysys_ssl
+# Build directory: /my/maria-10.0-merge/mysys_ssl
+#
+# This file includes the relevent testing commands required for
+# testing this directory and lists subdirectories to be tested as well.
diff --git a/mysys_ssl/cmake_install.cmake b/mysys_ssl/cmake_install.cmake
new file mode 100644
index 00000000000..9617527ed80
--- /dev/null
+++ b/mysys_ssl/cmake_install.cmake
@@ -0,0 +1,34 @@
+# Install script for directory: /my/maria-10.0-merge/mysys_ssl
+
+# Set the install prefix
+IF(NOT DEFINED CMAKE_INSTALL_PREFIX)
+ SET(CMAKE_INSTALL_PREFIX "/usr/local/mysql")
+ENDIF(NOT DEFINED CMAKE_INSTALL_PREFIX)
+STRING(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
+
+# Set the install configuration name.
+IF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+ IF(BUILD_TYPE)
+ STRING(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
+ CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
+ ELSE(BUILD_TYPE)
+ SET(CMAKE_INSTALL_CONFIG_NAME "Debug")
+ ENDIF(BUILD_TYPE)
+ MESSAGE(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
+ENDIF(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
+
+# Set the component getting installed.
+IF(NOT CMAKE_INSTALL_COMPONENT)
+ IF(COMPONENT)
+ MESSAGE(STATUS "Install component: \"${COMPONENT}\"")
+ SET(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
+ ELSE(COMPONENT)
+ SET(CMAKE_INSTALL_COMPONENT)
+ ENDIF(COMPONENT)
+ENDIF(NOT CMAKE_INSTALL_COMPONENT)
+
+# Install shared libraries without execute permission?
+IF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+ SET(CMAKE_INSTALL_SO_NO_EXE "0")
+ENDIF(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
+
diff --git a/mysys_ssl/crypt_genhash_impl.cc b/mysys_ssl/crypt_genhash_impl.cc
new file mode 100644
index 00000000000..ab7fdec46b9
--- /dev/null
+++ b/mysys_ssl/crypt_genhash_impl.cc
@@ -0,0 +1,454 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1307 USA */
+
+/* We always should include my_global first */
+
+#include <my_global.h>
+
+#ifdef HAVE_OPENSSL
+
+#ifdef HAVE_YASSL
+#include <sha.hpp>
+#include <openssl/ssl.h>
+#else
+#include <openssl/sha.h>
+#include <openssl/rand.h>
+#endif
+#include "crypt_genhash_impl.h"
+#include <string.h>
+
+#ifndef HAVE_YASSL
+#define DIGEST_CTX SHA256_CTX
+#define DIGESTInit SHA256_Init
+#define DIGESTUpdate SHA256_Update
+#define DIGESTFinal SHA256_Final
+#define DIGEST_LEN SHA256_DIGEST_LENGTH
+#else
+#define DIGEST_CTX TaoCrypt::SHA256
+#define DIGEST_LEN 32
+void DIGESTInit(DIGEST_CTX *ctx)
+{
+ ctx->Init();
+}
+
+void DIGESTUpdate(DIGEST_CTX *ctx, const void *plaintext, int len)
+{
+ ctx->Update((const TaoCrypt::byte *)plaintext, len);
+}
+
+void DIGESTFinal(void *txt, DIGEST_CTX *ctx)
+{
+ ctx->Final((TaoCrypt::byte *)txt);
+}
+
+#endif // HAVE_YASSL
+
+static const char crypt_alg_magic[] = "$5";
+
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+#ifndef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+#endif
+
+
+/**
+ Size-bounded string copying and concatenation
+ This is a replacement for STRLCPY(3)
+*/
+
+size_t
+strlcat(char *dst, const char *src, size_t siz)
+{
+ char *d= dst;
+ const char *s= src;
+ size_t n= siz;
+ size_t dlen;
+ /* Find the end of dst and adjust bytes left but don't go past end */
+ while (n-- != 0 && *d != '\0')
+ d++;
+ dlen= d - dst;
+ n= siz - dlen;
+ if (n == 0)
+ return(dlen + siz);
+ while (*s != '\0')
+ {
+ if (n != 1)
+ {
+ *d++= *s;
+ n--;
+ }
+ s++;
+ }
+ *d= '\0';
+ return(dlen + (s - src)); /* count does not include NUL */
+}
+
+static const int crypt_alg_magic_len = sizeof (crypt_alg_magic) - 1;
+
+static unsigned char b64t[] = /* 0 ... 63 => ascii - 64 */
+ "./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+
+#define b64_from_24bit(B2, B1, B0, N) \
+{ \
+ uint32 w = ((B2) << 16) | ((B1) << 8) | (B0); \
+ int n = (N); \
+ while (--n >= 0 && ctbufflen > 0) { \
+ *p++ = b64t[w & 0x3f]; \
+ w >>= 6; \
+ ctbufflen--; \
+} \
+}
+
+#define ROUNDS "rounds="
+#define ROUNDSLEN (sizeof (ROUNDS) - 1)
+
+/**
+ Get the integer value after rounds= where ever it occurs in the string.
+ if the last char after the int is a , or $ that is fine anything else is an
+ error.
+*/
+static uint32 getrounds(const char *s)
+{
+ const char *r;
+ const char *p;
+ char *e;
+ long val;
+
+ if (s == NULL)
+ return (0);
+
+ if ((r = strstr(s, ROUNDS)) == NULL)
+ {
+ return (0);
+ }
+
+ if (strncmp(r, ROUNDS, ROUNDSLEN) != 0)
+ {
+ return (0);
+ }
+
+ p= r + ROUNDSLEN;
+ errno= 0;
+ val= strtol(p, &e, 10);
+ /*
+ An error occurred or there is non-numeric stuff at the end
+ which isn't one of the crypt(3c) special chars ',' or '$'
+ */
+ if (errno != 0 || val < 0 || !(*e == '\0' || *e == ',' || *e == '$'))
+ {
+ return (0);
+ }
+
+ return ((uint32) val);
+}
+
+/**
+ Finds the interval which envelopes the user salt in a crypt password
+ The crypt format is assumed to be $a$bbbb$cccccc\0 and the salt is found
+ by counting the delimiters and marking begin and end.
+
+ @param salt_being[in] Pointer to start of crypt passwd
+ @param salt_being[out] Pointer to first byte of the salt
+ @param salt_end[in] Pointer to the last byte in passwd
+ @param salt_end[out] Pointer to the byte immediatly following the salt ($)
+
+ @return The size of the salt identified
+*/
+
+int extract_user_salt(char **salt_begin,
+ char **salt_end)
+{
+ char *it= *salt_begin;
+ int delimiter_count= 0;
+ while(it != *salt_end)
+ {
+ if (*it == '$')
+ {
+ ++delimiter_count;
+ if (delimiter_count == 2)
+ {
+ *salt_begin= it + 1;
+ }
+ if (delimiter_count == 3)
+ break;
+ }
+ ++it;
+ }
+ *salt_end= it;
+ return *salt_end - *salt_begin;
+}
+
+const char *sha256_find_digest(char *pass)
+{
+ int sz= strlen(pass);
+ return pass + sz - SHA256_HASH_LENGTH;
+}
+
+/*
+ * Portions of the below code come from crypt_bsdmd5.so (bsdmd5.c) :
+ * ----------------------------------------------------------------------------
+ * "THE BEER-WARE LICENSE" (Revision 42):
+ * <phk@login.dknet.dk> wrote this file. As long as you retain this notice you
+ * can do whatever you want with this stuff. If we meet some day, and you think
+ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
+ * ----------------------------------------------------------------------------
+ *
+ * $FreeBSD: crypt.c,v 1.5 1996/10/14 08:34:02 phk Exp $
+ *
+ */
+
+/*
+ * The below code implements the specification from:
+ *
+ * From http://people.redhat.com/drepper/SHA-crypt.txt
+ *
+ * Portions of the code taken from inspired by or verified against the
+ * source in the above document which is licensed as:
+ *
+ * "Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>."
+ */
+
+/*
+ Due to a Solaris namespace bug DS is a reserved word. To work around this
+ DS is undefined.
+*/
+#undef DS
+
+/* ARGSUSED4 */
+extern "C"
+char *
+my_crypt_genhash(char *ctbuffer,
+ size_t ctbufflen,
+ const char *plaintext,
+ int plaintext_len,
+ const char *switchsalt,
+ const char **params)
+{
+ int salt_len, i;
+ char *salt;
+ unsigned char A[DIGEST_LEN];
+ unsigned char B[DIGEST_LEN];
+ unsigned char DP[DIGEST_LEN];
+ unsigned char DS[DIGEST_LEN];
+ DIGEST_CTX ctxA, ctxB, ctxC, ctxDP, ctxDS;
+ int rounds = ROUNDS_DEFAULT;
+ int srounds = 0;
+ bool custom_rounds= false;
+ char *p;
+ char *P, *Pp;
+ char *S, *Sp;
+
+ /* Refine the salt */
+ salt = (char *)switchsalt;
+
+ /* skip our magic string */
+ if (strncmp((char *)salt, crypt_alg_magic, crypt_alg_magic_len) == 0)
+ {
+ salt += crypt_alg_magic_len + 1;
+ }
+
+ srounds = getrounds(salt);
+ if (srounds != 0) {
+ rounds = MAX(ROUNDS_MIN, MIN(srounds, ROUNDS_MAX));
+ custom_rounds= true;
+ p = strchr(salt, '$');
+ if (p != NULL)
+ salt = p + 1;
+ }
+
+ salt_len = MIN(strcspn(salt, "$"), CRYPT_SALT_LENGTH);
+ //plaintext_len = strlen(plaintext);
+
+ /* 1. */
+ DIGESTInit(&ctxA);
+
+ /* 2. The password first, since that is what is most unknown */
+ DIGESTUpdate(&ctxA, plaintext, plaintext_len);
+
+ /* 3. Then the raw salt */
+ DIGESTUpdate(&ctxA, salt, salt_len);
+
+ /* 4. - 8. */
+ DIGESTInit(&ctxB);
+ DIGESTUpdate(&ctxB, plaintext, plaintext_len);
+ DIGESTUpdate(&ctxB, salt, salt_len);
+ DIGESTUpdate(&ctxB, plaintext, plaintext_len);
+ DIGESTFinal(B, &ctxB);
+
+ /* 9. - 10. */
+ for (i= plaintext_len; i > MIXCHARS; i -= MIXCHARS)
+ DIGESTUpdate(&ctxA, B, MIXCHARS);
+ DIGESTUpdate(&ctxA, B, i);
+
+ /* 11. */
+ for (i= plaintext_len; i > 0; i >>= 1) {
+ if ((i & 1) != 0)
+ {
+ DIGESTUpdate(&ctxA, B, MIXCHARS);
+ }
+ else
+ {
+ DIGESTUpdate(&ctxA, plaintext, plaintext_len);
+ }
+ }
+
+ /* 12. */
+ DIGESTFinal(A, &ctxA);
+
+ /* 13. - 15. */
+ DIGESTInit(&ctxDP);
+ for (i= 0; i < plaintext_len; i++)
+ DIGESTUpdate(&ctxDP, plaintext, plaintext_len);
+ DIGESTFinal(DP, &ctxDP);
+
+ /* 16. */
+ Pp= P= (char *)alloca(plaintext_len);
+ for (i= plaintext_len; i >= MIXCHARS; i -= MIXCHARS)
+ {
+ Pp= (char *)(memcpy(Pp, DP, MIXCHARS)) + MIXCHARS;
+ }
+ (void) memcpy(Pp, DP, i);
+
+ /* 17. - 19. */
+ DIGESTInit(&ctxDS);
+ for (i= 0; i < 16 + (uint8)A[0]; i++)
+ DIGESTUpdate(&ctxDS, salt, salt_len);
+ DIGESTFinal(DS, &ctxDS);
+
+ /* 20. */
+ Sp= S= (char *)alloca(salt_len);
+ for (i= salt_len; i >= MIXCHARS; i -= MIXCHARS)
+ {
+ Sp= (char *)(memcpy(Sp, DS, MIXCHARS)) + MIXCHARS;
+ }
+ (void) memcpy(Sp, DS, i);
+
+ /* 21. */
+ for (i= 0; i < rounds; i++)
+ {
+ DIGESTInit(&ctxC);
+
+ if ((i & 1) != 0)
+ {
+ DIGESTUpdate(&ctxC, P, plaintext_len);
+ }
+ else
+ {
+ if (i == 0)
+ DIGESTUpdate(&ctxC, A, MIXCHARS);
+ else
+ DIGESTUpdate(&ctxC, DP, MIXCHARS);
+ }
+
+ if (i % 3 != 0) {
+ DIGESTUpdate(&ctxC, S, salt_len);
+ }
+
+ if (i % 7 != 0) {
+ DIGESTUpdate(&ctxC, P, plaintext_len);
+ }
+
+ if ((i & 1) != 0)
+ {
+ if (i == 0)
+ DIGESTUpdate(&ctxC, A, MIXCHARS);
+ else
+ DIGESTUpdate(&ctxC, DP, MIXCHARS);
+ }
+ else
+ {
+ DIGESTUpdate(&ctxC, P, plaintext_len);
+ }
+ DIGESTFinal(DP, &ctxC);
+ }
+
+ /* 22. Now make the output string */
+ if (custom_rounds)
+ {
+ (void) snprintf(ctbuffer, ctbufflen,
+ "%s$rounds=%zu$", crypt_alg_magic, (size_t)rounds);
+ }
+ else
+ {
+ (void) snprintf(ctbuffer, ctbufflen,
+ "%s$", crypt_alg_magic);
+ }
+ (void) strncat(ctbuffer, (const char *)salt, salt_len);
+ (void) strlcat(ctbuffer, "$", ctbufflen);
+
+ p= ctbuffer + strlen(ctbuffer);
+ ctbufflen -= strlen(ctbuffer);
+
+ b64_from_24bit(DP[ 0], DP[10], DP[20], 4);
+ b64_from_24bit(DP[21], DP[ 1], DP[11], 4);
+ b64_from_24bit(DP[12], DP[22], DP[ 2], 4);
+ b64_from_24bit(DP[ 3], DP[13], DP[23], 4);
+ b64_from_24bit(DP[24], DP[ 4], DP[14], 4);
+ b64_from_24bit(DP[15], DP[25], DP[ 5], 4);
+ b64_from_24bit(DP[ 6], DP[16], DP[26], 4);
+ b64_from_24bit(DP[27], DP[ 7], DP[17], 4);
+ b64_from_24bit(DP[18], DP[28], DP[ 8], 4);
+ b64_from_24bit(DP[ 9], DP[19], DP[29], 4);
+ b64_from_24bit(0, DP[31], DP[30], 3);
+ *p= '\0';
+
+ (void) memset(A, 0, sizeof (A));
+ (void) memset(B, 0, sizeof (B));
+ (void) memset(DP, 0, sizeof (DP));
+ (void) memset(DS, 0, sizeof (DS));
+
+ return (ctbuffer);
+}
+
+
+/**
+ Generate a random string using ASCII characters but avoid seperator character.
+ Stdlib rand and srand are used to produce pseudo random numbers between
+ with about 7 bit worth of entropty between 1-127.
+*/
+extern "C"
+void generate_user_salt(char *buffer, int buffer_len)
+{
+ char *end= buffer + buffer_len - 1;
+#ifdef HAVE_YASSL
+ yaSSL::RAND_bytes((unsigned char *) buffer, buffer_len);
+#else
+ RAND_bytes((unsigned char *) buffer, buffer_len);
+#endif
+
+ /* Sequence must be a legal UTF8 string */
+ for (; buffer < end; buffer++)
+ {
+ *buffer &= 0x7f;
+ if (*buffer == '\0' || *buffer == '$')
+ *buffer= *buffer + 1;
+ }
+ /* Make sure the buffer is terminated properly */
+ *end= '\0';
+}
+
+void xor_string(char *to, int to_len, char *pattern, int pattern_len)
+{
+ int loop= 0;
+ while(loop <= to_len)
+ {
+ *(to + loop) ^= *(pattern + loop % pattern_len);
+ ++loop;
+ }
+}
+
+#endif // HAVE_OPENSSL
diff --git a/mysys_ssl/my_aes.cc b/mysys_ssl/my_aes.cc
new file mode 100644
index 00000000000..9327bc32a3b
--- /dev/null
+++ b/mysys_ssl/my_aes.cc
@@ -0,0 +1,278 @@
+/* Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+
+#include <my_global.h>
+#include <m_string.h>
+#include <my_aes.h>
+
+#if defined(HAVE_YASSL)
+#include "aes.hpp"
+#include "openssl/ssl.h"
+#elif defined(HAVE_OPENSSL)
+#include <openssl/aes.h>
+#include <openssl/evp.h>
+
+// Wrap C struct, to ensure resources are released.
+struct MyCipherCtx
+{
+ MyCipherCtx() { memset(&ctx, 0, sizeof(ctx)); }
+ ~MyCipherCtx() { EVP_CIPHER_CTX_cleanup(&ctx); }
+
+ EVP_CIPHER_CTX ctx;
+};
+#endif
+
+enum encrypt_dir { MY_AES_ENCRYPT, MY_AES_DECRYPT };
+
+#define MY_AES_BLOCK_SIZE 16 /* Block size in bytes */
+
+/* If bad data discovered during decoding */
+#define AES_BAD_DATA -1
+
+/**
+ This is internal function just keeps joint code of Key generation
+
+ SYNOPSIS
+ my_aes_create_key()
+ @param key [in] Key to use for real key creation
+ @param key_length [in] Length of the key
+ @param rkey [out] Real key (used by OpenSSL/YaSSL)
+
+ @return
+ 0 Ok
+ -1 Error; Note: The current impementation never returns this
+*/
+
+static int my_aes_create_key(const char *key, int key_length, uint8 *rkey)
+{
+ uint8 *rkey_end= rkey + AES_KEY_LENGTH / 8; /* Real key boundary */
+ uint8 *ptr; /* Start of the real key*/
+ const char *sptr; /* Start of the working key */
+ const char *key_end= key + key_length; /* Working key boundary*/
+
+ memset(rkey, 0, AES_KEY_LENGTH / 8); /* Set initial key */
+
+ for (ptr= rkey, sptr= key; sptr < key_end; ptr ++, sptr ++)
+ {
+ if (ptr == rkey_end)
+ /* Just loop over tmp_key until we used all key */
+ ptr= rkey;
+ *ptr ^= (uint8) *sptr;
+ }
+#ifdef AES_USE_KEY_BITS
+ /*
+ This block is intended to allow more weak encryption if application
+ build with libmysqld needs to correspond to export regulations
+ It should be never used in normal distribution as does not give
+ any speed improvement.
+ To get worse security define AES_USE_KEY_BITS to number of bits
+ you want key to be. It should be divisible by 8
+
+ WARNING: Changing this value results in changing of enryption for
+ all key lengths so altering this value will result in impossibility
+ to decrypt data encrypted with previous value
+ */
+#define AES_USE_KEY_BYTES (AES_USE_KEY_BITS/8)
+ /*
+ To get weaker key we use first AES_USE_KEY_BYTES bytes of created key
+ and cyclically copy them until we created all required key length
+ */
+ for (ptr= rkey+AES_USE_KEY_BYTES, sptr=rkey ; ptr < rkey_end;
+ ptr ++, sptr ++)
+ {
+ if (sptr == rkey + AES_USE_KEY_BYTES)
+ sptr= rkey;
+ *ptr= *sptr;
+ }
+#endif
+ return 0;
+}
+
+
+/**
+ Crypt buffer with AES encryption algorithm.
+
+ SYNOPSIS
+ my_aes_encrypt()
+ @param source [in] Pointer to data for encryption
+ @param source_length [in] Size of encryption data
+ @param dest [out] Buffer to place encrypted data (must be large enough)
+ @param key [in] Key to be used for encryption
+ @param key_length [in] Length of the key. Will handle keys of any length
+
+ @return
+ >= 0 Size of encrypted data
+ < 0 Error
+*/
+
+int my_aes_encrypt(const char* source, int source_length, char* dest,
+ const char* key, int key_length)
+{
+#if defined(HAVE_YASSL)
+ TaoCrypt::AES_ECB_Encryption enc;
+ /* 128 bit block used for padding */
+ uint8 block[MY_AES_BLOCK_SIZE];
+ int num_blocks; /* number of complete blocks */
+ int i;
+#elif defined(HAVE_OPENSSL)
+ MyCipherCtx ctx;
+ int u_len, f_len;
+#endif
+
+ /* The real key to be used for encryption */
+ uint8 rkey[AES_KEY_LENGTH / 8];
+ int rc; /* result codes */
+
+ if ((rc= my_aes_create_key(key, key_length, rkey)))
+ return rc;
+
+#if defined(HAVE_YASSL)
+ enc.SetKey((const TaoCrypt::byte *) rkey, MY_AES_BLOCK_SIZE);
+
+ num_blocks = source_length / MY_AES_BLOCK_SIZE;
+
+ for (i = num_blocks; i > 0; i--) /* Encode complete blocks */
+ {
+ enc.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) source,
+ MY_AES_BLOCK_SIZE);
+ source += MY_AES_BLOCK_SIZE;
+ dest += MY_AES_BLOCK_SIZE;
+ }
+
+ /* Encode the rest. We always have incomplete block */
+ char pad_len = MY_AES_BLOCK_SIZE - (source_length -
+ MY_AES_BLOCK_SIZE * num_blocks);
+ memcpy(block, source, 16 - pad_len);
+ memset(block + MY_AES_BLOCK_SIZE - pad_len, pad_len, pad_len);
+
+ enc.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) block,
+ MY_AES_BLOCK_SIZE);
+
+ return MY_AES_BLOCK_SIZE * (num_blocks + 1);
+#elif defined(HAVE_OPENSSL)
+ if (! EVP_EncryptInit(&ctx.ctx, EVP_aes_128_ecb(),
+ (const unsigned char *) rkey, NULL))
+ return AES_BAD_DATA; /* Error */
+ if (! EVP_EncryptUpdate(&ctx.ctx, (unsigned char *) dest, &u_len,
+ (unsigned const char *) source, source_length))
+ return AES_BAD_DATA; /* Error */
+ if (! EVP_EncryptFinal(&ctx.ctx, (unsigned char *) dest + u_len, &f_len))
+ return AES_BAD_DATA; /* Error */
+
+ return u_len + f_len;
+#endif
+}
+
+
+/**
+ DeCrypt buffer with AES encryption algorithm.
+
+ SYNOPSIS
+ my_aes_decrypt()
+ @param source [in] Pointer to data for decryption
+ @param source_length [in] Size of encrypted data
+ @param dest [out] Buffer to place decrypted data (must
+ be large enough)
+ @param key [in] Key to be used for decryption
+ @param key_length [in] Length of the key. Will handle keys of any length
+
+ @return
+ >= 0 Size of encrypted data
+ < 0 Error
+*/
+
+int my_aes_decrypt(const char *source, int source_length, char *dest,
+ const char *key, int key_length)
+{
+#if defined(HAVE_YASSL)
+ TaoCrypt::AES_ECB_Decryption dec;
+ /* 128 bit block used for padding */
+ uint8 block[MY_AES_BLOCK_SIZE];
+ int num_blocks; /* Number of complete blocks */
+ int i;
+#elif defined(HAVE_OPENSSL)
+ MyCipherCtx ctx;
+ int u_len, f_len;
+#endif
+
+ /* The real key to be used for decryption */
+ uint8 rkey[AES_KEY_LENGTH / 8];
+ int rc; /* Result codes */
+
+ if ((rc= my_aes_create_key(key, key_length, rkey)))
+ return rc;
+
+#if defined(HAVE_YASSL)
+ dec.SetKey((const TaoCrypt::byte *) rkey, MY_AES_BLOCK_SIZE);
+
+ num_blocks = source_length / MY_AES_BLOCK_SIZE;
+
+ if ((source_length != num_blocks * MY_AES_BLOCK_SIZE) || num_blocks == 0 )
+ /* Input size has to be even and at least one block */
+ return AES_BAD_DATA;
+
+ /* Decode all but last blocks */
+ for (i = num_blocks - 1; i > 0; i--)
+ {
+ dec.Process((TaoCrypt::byte *) dest, (const TaoCrypt::byte *) source,
+ MY_AES_BLOCK_SIZE);
+ source += MY_AES_BLOCK_SIZE;
+ dest += MY_AES_BLOCK_SIZE;
+ }
+
+ dec.Process((TaoCrypt::byte *) block, (const TaoCrypt::byte *) source,
+ MY_AES_BLOCK_SIZE);
+
+ /* Use last char in the block as size */
+ uint pad_len = (uint) (uchar) block[MY_AES_BLOCK_SIZE - 1];
+
+ if (pad_len > MY_AES_BLOCK_SIZE)
+ return AES_BAD_DATA;
+ /* We could also check whole padding but we do not really need this */
+
+ memcpy(dest, block, MY_AES_BLOCK_SIZE - pad_len);
+ return MY_AES_BLOCK_SIZE * num_blocks - pad_len;
+#elif defined(HAVE_OPENSSL)
+ if (! EVP_DecryptInit(&ctx.ctx, EVP_aes_128_ecb(),
+ (const unsigned char *) rkey, NULL))
+ return AES_BAD_DATA; /* Error */
+ if (! EVP_DecryptUpdate(&ctx.ctx, (unsigned char *) dest, &u_len,
+ (unsigned const char *) source, source_length))
+ return AES_BAD_DATA; /* Error */
+ if (! EVP_DecryptFinal(&ctx.ctx, (unsigned char *) dest + u_len, &f_len))
+ return AES_BAD_DATA; /* Error */
+ return u_len + f_len;
+#endif
+}
+
+
+/**
+ Get size of buffer which will be large enough for encrypted data
+
+ SYNOPSIS
+ my_aes_get_size()
+ @param source_length [in] Length of data to be encrypted
+
+ @return
+ Size of buffer required to store encrypted data
+*/
+
+int my_aes_get_size(int source_length)
+{
+ return MY_AES_BLOCK_SIZE * (source_length / MY_AES_BLOCK_SIZE)
+ + MY_AES_BLOCK_SIZE;
+}
+
diff --git a/mysys_ssl/my_md5.cc b/mysys_ssl/my_md5.cc
new file mode 100644
index 00000000000..4c14366a4e3
--- /dev/null
+++ b/mysys_ssl/my_md5.cc
@@ -0,0 +1,68 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+
+/**
+ @file
+
+ @brief
+ Wrapper functions for OpenSSL and YaSSL. Also provides a Compatibility layer
+ to make available YaSSL's MD5 implementation.
+*/
+
+#include <my_global.h>
+#include <my_md5.h>
+
+#if defined(HAVE_YASSL)
+#include "my_config.h"
+#include "md5.hpp"
+
+static void my_md5_hash(char *digest, const char *buf, int len)
+{
+ TaoCrypt::MD5 hasher;
+ hasher.Update((TaoCrypt::byte *) buf, len);
+ hasher.Final((TaoCrypt::byte *) digest);
+}
+
+#elif defined(HAVE_OPENSSL)
+#include <openssl/md5.h>
+
+static void my_md5_hash(unsigned char* digest, unsigned const char *buf, int len)
+{
+ MD5_CTX ctx;
+ MD5_Init (&ctx);
+ MD5_Update (&ctx, buf, len);
+ MD5_Final (digest, &ctx);
+}
+
+#endif /* HAVE_YASSL */
+
+/**
+ Wrapper function to compute MD5 message digest.
+
+ @param digest [out] Computed MD5 digest
+ @param buf [in] Message to be computed
+ @param len [in] Length of the message
+
+ @return void
+*/
+void compute_md5_hash(char *digest, const char *buf, int len)
+{
+#if defined(HAVE_YASSL)
+ my_md5_hash(digest, buf, len);
+#elif defined(HAVE_OPENSSL)
+ my_md5_hash((unsigned char*)digest, (unsigned const char*)buf, len);
+#endif /* HAVE_YASSL */
+}
diff --git a/mysys_ssl/my_rnd.cc b/mysys_ssl/my_rnd.cc
new file mode 100644
index 00000000000..aa8fb63cd4d
--- /dev/null
+++ b/mysys_ssl/my_rnd.cc
@@ -0,0 +1,103 @@
+/*
+ Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
+
+#include <my_global.h>
+#include <my_rnd.h>
+#include <m_string.h>
+
+#if defined(HAVE_YASSL)
+#if defined(YASSL_PREFIX)
+#define RAND_bytes yaRAND_bytes
+#endif /* YASSL_PREFIX */
+
+#include <openssl/ssl.h>
+
+#elif defined(HAVE_OPENSSL)
+#include <openssl/rand.h>
+#endif /* HAVE_YASSL */
+
+
+/*
+ A wrapper to use OpenSSL/yaSSL PRNGs.
+*/
+
+extern "C" {
+
+/*
+ Initialize random generator
+
+ NOTES
+ MySQL's password checks depends on this, so don't do any changes
+ that changes the random numbers that are generated!
+*/
+
+void my_rnd_init(struct my_rnd_struct *rand_st, ulong seed1, ulong seed2)
+{
+#ifdef HAVE_valgrind
+ bzero((char*) rand_st,sizeof(*rand_st)); /* Avoid UMC varnings */
+#endif
+ rand_st->max_value= 0x3FFFFFFFL;
+ rand_st->max_value_dbl=(double) rand_st->max_value;
+ rand_st->seed1=seed1%rand_st->max_value ;
+ rand_st->seed2=seed2%rand_st->max_value;
+}
+
+/**
+ Generate random number.
+
+ @param rand_st [INOUT] Structure used for number generation.
+
+ @retval Generated pseudo random number.
+*/
+
+double my_rnd(struct my_rnd_struct *rand_st)
+{
+ rand_st->seed1= (rand_st->seed1*3+rand_st->seed2) % rand_st->max_value;
+ rand_st->seed2= (rand_st->seed1+rand_st->seed2+33) % rand_st->max_value;
+ return (((double) rand_st->seed1) / rand_st->max_value_dbl);
+}
+
+/**
+ Generate a random number using the OpenSSL/yaSSL supplied
+ random number generator if available.
+
+ @param rand_st [INOUT] Structure used for number generation
+ only if none of the SSL libraries are
+ available.
+
+ @retval Generated random number.
+*/
+
+double my_rnd_ssl(struct my_rnd_struct *rand_st)
+{
+
+#if defined(HAVE_YASSL) || defined(HAVE_OPENSSL)
+ int rc;
+ unsigned int res;
+
+#if defined(HAVE_YASSL)
+ rc= yaSSL::RAND_bytes((unsigned char *) &res, sizeof (unsigned int));
+#else
+ rc= RAND_bytes((unsigned char *) &res, sizeof (unsigned int));
+#endif /* HAVE_YASSL */
+ if (rc)
+ return (double)res / (double)UINT_MAX;
+
+#endif /* defined(HAVE_YASSL) || defined(HAVE_OPENSSL) */
+ return my_rnd(rand_st);
+}
+
+}
diff --git a/mysys_ssl/my_sha1.cc b/mysys_ssl/my_sha1.cc
new file mode 100644
index 00000000000..1c4bf7c9747
--- /dev/null
+++ b/mysys_ssl/my_sha1.cc
@@ -0,0 +1,141 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+
+/**
+ @file
+
+ @brief
+ Wrapper functions for OpenSSL, YaSSL implementations. Also provides a
+ Compatibility layer to make available YaSSL's SHA1 implementation.
+*/
+
+#include <my_global.h>
+#include <sha1.h>
+
+#if defined(HAVE_YASSL)
+#include "sha.hpp"
+
+/**
+ Compute SHA1 message digest using YaSSL.
+
+ @param digest [out] Computed SHA1 digest
+ @param buf [in] Message to be computed
+ @param len [in] Length of the message
+
+ @return void
+*/
+void mysql_sha1_yassl(uint8 *digest, const char *buf, int len)
+{
+ TaoCrypt::SHA hasher;
+ hasher.Update((const TaoCrypt::byte *) buf, len);
+ hasher.Final ((TaoCrypt::byte *) digest);
+}
+
+/**
+ Compute SHA1 message digest for two messages in order to
+ emulate sha1(msg1, msg2) using YaSSL.
+
+ @param digest [out] Computed SHA1 digest
+ @param buf1 [in] First message
+ @param len1 [in] Length of first message
+ @param buf2 [in] Second message
+ @param len2 [in] Length of second message
+
+ @return void
+*/
+void mysql_sha1_multi_yassl(uint8 *digest, const char *buf1, int len1,
+ const char *buf2, int len2)
+{
+ TaoCrypt::SHA hasher;
+ hasher.Update((const TaoCrypt::byte *) buf1, len1);
+ hasher.Update((const TaoCrypt::byte *) buf2, len2);
+ hasher.Final((TaoCrypt::byte *) digest);
+}
+
+#elif defined(HAVE_OPENSSL)
+#include <openssl/sha.h>
+
+int mysql_sha1_reset(SHA_CTX *context)
+{
+ return SHA1_Init(context);
+}
+
+
+int mysql_sha1_input(SHA_CTX *context, const uint8 *message_array,
+ unsigned length)
+{
+ return SHA1_Update(context, message_array, length);
+}
+
+
+int mysql_sha1_result(SHA_CTX *context,
+ uint8 Message_Digest[SHA1_HASH_SIZE])
+{
+ return SHA1_Final(Message_Digest, context);
+}
+
+#endif /* HAVE_YASSL */
+
+/**
+ Wrapper function to compute SHA1 message digest.
+
+ @param digest [out] Computed SHA1 digest
+ @param buf [in] Message to be computed
+ @param len [in] Length of the message
+
+ @return void
+*/
+void compute_sha1_hash(uint8 *digest, const char *buf, int len)
+{
+#if defined(HAVE_YASSL)
+ mysql_sha1_yassl(digest, buf, len);
+#elif defined(HAVE_OPENSSL)
+ SHA_CTX sha1_context;
+
+ mysql_sha1_reset(&sha1_context);
+ mysql_sha1_input(&sha1_context, (const uint8 *) buf, len);
+ mysql_sha1_result(&sha1_context, digest);
+#endif /* HAVE_YASSL */
+}
+
+
+/**
+ Wrapper function to compute SHA1 message digest for
+ two messages in order to emulate sha1(msg1, msg2).
+
+ @param digest [out] Computed SHA1 digest
+ @param buf1 [in] First message
+ @param len1 [in] Length of first message
+ @param buf2 [in] Second message
+ @param len2 [in] Length of second message
+
+ @return void
+*/
+void compute_sha1_hash_multi(uint8 *digest, const char *buf1, int len1,
+ const char *buf2, int len2)
+{
+#if defined(HAVE_YASSL)
+ mysql_sha1_multi_yassl(digest, buf1, len1, buf2, len2);
+#elif defined(HAVE_OPENSSL)
+ SHA_CTX sha1_context;
+
+ mysql_sha1_reset(&sha1_context);
+ mysql_sha1_input(&sha1_context, (const uint8 *) buf1, len1);
+ mysql_sha1_input(&sha1_context, (const uint8 *) buf2, len2);
+ mysql_sha1_result(&sha1_context, digest);
+#endif /* HAVE_YASSL */
+}
+
diff --git a/mysys_ssl/my_sha2.cc b/mysys_ssl/my_sha2.cc
new file mode 100644
index 00000000000..00200337f08
--- /dev/null
+++ b/mysys_ssl/my_sha2.cc
@@ -0,0 +1,68 @@
+/* Copyright (c) 2007, 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+
+/**
+ @file
+ A compatibility layer to our built-in SSL implementation, to mimic the
+ oft-used external library, OpenSSL.
+*/
+
+#include <my_global.h>
+#include <sha2.h>
+
+#ifdef HAVE_YASSL
+
+/*
+ If TaoCrypt::SHA512 or ::SHA384 are not defined (but ::SHA256 is), it's
+ probably that neither of config.h's SIZEOF_LONG or SIZEOF_LONG_LONG are
+ 64 bits long. At present, both OpenSSL and YaSSL require 64-bit integers
+ for SHA-512. (The SIZEOF_* definitions come from autoconf's config.h .)
+*/
+
+# define GEN_YASSL_SHA2_BRIDGE(size) \
+unsigned char* SHA##size(const unsigned char *input_ptr, size_t input_length, \
+ char unsigned *output_ptr) { \
+ TaoCrypt::SHA##size hasher; \
+ \
+ hasher.Update(input_ptr, input_length); \
+ hasher.Final(output_ptr); \
+ return(output_ptr); \
+}
+
+
+/**
+ @fn SHA512
+ @fn SHA384
+ @fn SHA256
+ @fn SHA224
+
+ Instantiate an hash object, fill in the cleartext value, compute the digest,
+ and extract the result from the object.
+
+ (Generate the functions. See similar .h code for the prototypes.)
+*/
+# ifndef OPENSSL_NO_SHA512
+GEN_YASSL_SHA2_BRIDGE(512);
+GEN_YASSL_SHA2_BRIDGE(384);
+# else
+# warning Some SHA2 functionality is missing. See OPENSSL_NO_SHA512.
+# endif
+GEN_YASSL_SHA2_BRIDGE(256);
+GEN_YASSL_SHA2_BRIDGE(224);
+
+# undef GEN_YASSL_SHA2_BRIDGE
+
+#endif /* HAVE_YASSL */
diff --git a/sql-common/client.c b/sql-common/client.c
index e03f5236fef..e9e6b857dd8 100644
--- a/sql-common/client.c
+++ b/sql-common/client.c
@@ -36,7 +36,7 @@
*/
#include <my_global.h>
-
+#include <my_default.h>
#include "mysql.h"
/* Remove client convenience wrappers */
diff --git a/sql-common/client_authentication.cc b/sql-common/client_authentication.cc
new file mode 100644
index 00000000000..195f37bcc59
--- /dev/null
+++ b/sql-common/client_authentication.cc
@@ -0,0 +1,253 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates.
+ Copyright (c) 2013, Monty Program Ab
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1307 USA */
+
+#include <my_global.h>
+
+#if defined(HAVE_OPENSSL)
+#include "crypt_genhash_impl.h"
+#include "mysql/client_authentication.h"
+#include "m_ctype.h"
+#include "sql_common.h"
+#include "errmsg.h"
+#include "m_string.h"
+#include <string.h>
+
+#if !defined(HAVE_YASSL)
+#include <openssl/rsa.h>
+#include <openssl/pem.h>
+#include <openssl/err.h>
+#if defined(_WIN32) && !defined(_OPENSSL_Applink) && defined(HAVE_OPENSSL_APPLINK_C)
+#include <openssl/applink.c>
+#endif
+#endif
+#include "mysql/service_my_plugin_log.h"
+
+#define MAX_CIPHER_LENGTH 1024
+
+#if !defined(HAVE_YASSL)
+mysql_mutex_t g_public_key_mutex;
+#endif
+
+int sha256_password_init(char *a, size_t b, int c, va_list d)
+{
+#if !defined(HAVE_YASSL)
+ mysql_mutex_init(0,&g_public_key_mutex, MY_MUTEX_INIT_SLOW);
+#endif
+ return 0;
+}
+
+int sha256_password_deinit(void)
+{
+#if !defined(HAVE_YASSL)
+ mysql_mutex_destroy(&g_public_key_mutex);
+#endif
+ return 0;
+}
+
+
+#if !defined(HAVE_YASSL)
+/**
+ Reads and parse RSA public key data from a file.
+
+ @param mysql connection handle with file path data
+
+ @return Pointer to the RSA public key storage buffer
+*/
+
+RSA *rsa_init(MYSQL *mysql)
+{
+ static RSA *g_public_key= NULL;
+ RSA *key= NULL;
+
+ mysql_mutex_lock(&g_public_key_mutex);
+ key= g_public_key;
+ mysql_mutex_unlock(&g_public_key_mutex);
+
+ if (key != NULL)
+ return key;
+
+ FILE *pub_key_file= NULL;
+
+ if (mysql->options.extension != NULL &&
+ mysql->options.extension->server_public_key_path != NULL &&
+ mysql->options.extension->server_public_key_path != '\0')
+ {
+ pub_key_file= fopen(mysql->options.extension->server_public_key_path,
+ "r");
+ }
+ /* No public key is used; return 0 without errors to indicate this. */
+ else
+ return 0;
+
+ if (pub_key_file == NULL)
+ {
+ /*
+ If a key path was submitted but no key located then we print an error
+ message. Else we just report that there is no public key.
+ */
+ fprintf(stderr,"Can't locate server public key '%s'\n",
+ mysql->options.extension->server_public_key_path);
+
+ return 0;
+ }
+
+ mysql_mutex_lock(&g_public_key_mutex);
+ key= g_public_key= PEM_read_RSA_PUBKEY(pub_key_file, 0, 0, 0);
+ mysql_mutex_unlock(&g_public_key_mutex);
+ fclose(pub_key_file);
+ if (g_public_key == NULL)
+ {
+ fprintf(stderr, "Public key is not in PEM format: '%s'\n",
+ mysql->options.extension->server_public_key_path);
+ return 0;
+ }
+
+ return key;
+}
+#endif // !defined(HAVE_YASSL)
+
+/**
+ Authenticate the client using the RSA or TLS and a SHA256 salted password.
+
+ @param vio Provides plugin access to communication channel
+ @param mysql Client connection handler
+
+ @return Error status
+ @retval CR_ERROR An error occurred.
+ @retval CR_OK Authentication succeeded.
+*/
+
+extern "C"
+int sha256_password_auth_client(MYSQL_PLUGIN_VIO *vio, MYSQL *mysql)
+{
+ bool uses_password= mysql->passwd[0] != 0;
+#if !defined(HAVE_YASSL)
+ unsigned char encrypted_password[MAX_CIPHER_LENGTH];
+ static char request_public_key= '\1';
+ RSA *public_key= NULL;
+ bool got_public_key_from_server= false;
+#endif
+ bool connection_is_secure= false;
+ unsigned char scramble_pkt[20];
+ unsigned char *pkt;
+
+
+ DBUG_ENTER("sha256_password_auth_client");
+
+ /*
+ Get the scramble from the server because we need it when sending encrypted
+ password.
+ */
+ if (vio->read_packet(vio, &pkt) != SCRAMBLE_LENGTH)
+ {
+ DBUG_PRINT("info",("Scramble is not of correct length."));
+ DBUG_RETURN(CR_ERROR);
+ }
+ /*
+ Copy the scramble to the stack or it will be lost on the next use of the
+ net buffer.
+ */
+ memcpy(scramble_pkt, pkt, SCRAMBLE_LENGTH);
+
+ if (mysql_get_ssl_cipher(mysql) != NULL)
+ connection_is_secure= true;
+
+ /* If connection isn't secure attempt to get the RSA public key file */
+ if (!connection_is_secure)
+ {
+ #if !defined(HAVE_YASSL)
+ public_key= rsa_init(mysql);
+#endif
+ }
+
+ if (!uses_password)
+ {
+ /* We're not using a password */
+ static const unsigned char zero_byte= '\0';
+ if (vio->write_packet(vio, (const unsigned char *) &zero_byte, 1))
+ DBUG_RETURN(CR_ERROR);
+ }
+ else
+ {
+ /* Password is a 0-terminated byte array ('\0' character included) */
+ unsigned int passwd_len= strlen(mysql->passwd) + 1;
+ if (!connection_is_secure)
+ {
+#if !defined(HAVE_YASSL)
+ /*
+ If no public key; request one from the server.
+ */
+ if (public_key == NULL)
+ {
+ if (vio->write_packet(vio, (const unsigned char *) &request_public_key,
+ 1))
+ DBUG_RETURN(CR_ERROR);
+
+ int pkt_len= 0;
+ unsigned char *pkt;
+ if ((pkt_len= vio->read_packet(vio, &pkt)) == -1)
+ DBUG_RETURN(CR_ERROR);
+ BIO* bio= BIO_new_mem_buf(pkt, pkt_len);
+ public_key= PEM_read_bio_RSA_PUBKEY(bio, NULL, NULL, NULL);
+ BIO_free(bio);
+ if (public_key == 0)
+ DBUG_RETURN(CR_ERROR);
+ got_public_key_from_server= true;
+ }
+
+ /* Obfuscate the plain text password with the session scramble */
+ xor_string(mysql->passwd, strlen(mysql->passwd), (char *) scramble_pkt,
+ SCRAMBLE_LENGTH);
+ /* Encrypt the password and send it to the server */
+ int cipher_length= RSA_size(public_key);
+ /*
+ When using RSA_PKCS1_OAEP_PADDING the password length must be less
+ than RSA_size(rsa) - 41.
+ */
+ if (passwd_len + 41 >= (unsigned) cipher_length)
+ {
+ /* password message is to long */
+ DBUG_RETURN(CR_ERROR);
+ }
+ RSA_public_encrypt(passwd_len, (unsigned char *) mysql->passwd,
+ encrypted_password,
+ public_key, RSA_PKCS1_OAEP_PADDING);
+ if (got_public_key_from_server)
+ RSA_free(public_key);
+
+ if (vio->write_packet(vio, (uchar*) encrypted_password, cipher_length))
+ DBUG_RETURN(CR_ERROR);
+#else
+ set_mysql_extended_error(mysql, CR_AUTH_PLUGIN_ERR, unknown_sqlstate,
+ ER(CR_AUTH_PLUGIN_ERR), "sha256_password",
+ "Authentication requires SSL encryption");
+ DBUG_RETURN(CR_ERROR); // If no openssl support
+#endif
+ }
+ else
+ {
+ /* The vio is encrypted already; just send the plain text passwd */
+ if (vio->write_packet(vio, (uchar*) mysql->passwd, passwd_len))
+ DBUG_RETURN(CR_ERROR);
+ }
+
+ memset(mysql->passwd, 0, passwd_len);
+ }
+
+ DBUG_RETURN(CR_OK);
+}
+
+#endif
diff --git a/sql-common/my_time.c b/sql-common/my_time.c
index fbcf52dbf19..640d52dab16 100644
--- a/sql-common/my_time.c
+++ b/sql-common/my_time.c
@@ -126,7 +126,7 @@ static int get_number(uint *val, uint *number_of_fields, const char **str,
static int get_digits(uint *val, uint *number_of_fields, const char **str,
const char *end, uint length)
{
- return get_number(val, number_of_fields, str, min(end, *str + length));
+ return get_number(val, number_of_fields, str, MY_MIN(end, *str + length));
}
static int get_punct(const char **str, const char *end)
diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt
index 8c2b6c81755..070afbd9c38 100644
--- a/sql/CMakeLists.txt
+++ b/sql/CMakeLists.txt
@@ -106,7 +106,7 @@ ADD_LIBRARY(sql STATIC ${SQL_SOURCE})
ADD_DEPENDENCIES(sql GenServerSource)
DTRACE_INSTRUMENT(sql)
TARGET_LINK_LIBRARIES(sql ${MYSQLD_STATIC_PLUGIN_LIBS}
- mysys dbug strings vio regex
+ mysys mysys_ssl dbug strings vio regex
${LIBWRAP} ${LIBCRYPT} ${LIBDL}
${SSL_LIBRARIES})
@@ -141,7 +141,7 @@ IF(NOT WITHOUT_DYNAMIC_PLUGINS)
# incremental appears to crash from time to time,if used with /DEF option
SET_TARGET_PROPERTIES(mysqld PROPERTIES LINK_FLAGS "${mysqld_link_flags} /DEF:mysqld.def /INCREMENTAL:NO")
- FOREACH (CORELIB sql mysys dbug strings)
+ FOREACH (CORELIB sql mysys mysys_ssl dbug strings)
GET_TARGET_PROPERTY(LOC ${CORELIB} LOCATION)
FILE(TO_NATIVE_PATH ${LOC} LOC)
SET (LIB_LOCATIONS ${LIB_LOCATIONS} ${LOC})
@@ -171,7 +171,7 @@ ENDIF()
# On Solaris, some extra effort is required in order to get dtrace probes
# from static libraries
DTRACE_INSTRUMENT_STATIC_LIBS(mysqld
- "sql;mysys;${MYSQLD_STATIC_PLUGIN_LIBS}")
+ "sql;mysys;mysys_ssl;${MYSQLD_STATIC_PLUGIN_LIBS}")
SET(WITH_MYSQLD_LDFLAGS "" CACHE STRING "Additional linker flags for mysqld")
@@ -222,7 +222,7 @@ ADD_CUSTOM_COMMAND(
MYSQL_ADD_EXECUTABLE(mysql_tzinfo_to_sql tztime.cc COMPONENT Server)
SET_TARGET_PROPERTIES(mysql_tzinfo_to_sql PROPERTIES COMPILE_FLAGS "-DTZINFO2SQL")
-TARGET_LINK_LIBRARIES(mysql_tzinfo_to_sql mysys)
+TARGET_LINK_LIBRARIES(mysql_tzinfo_to_sql mysys mysys_ssl)
ADD_CUSTOM_TARGET(
GenServerSource
diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc
index 25f028e5451..5e439839bca 100644
--- a/sql/debug_sync.cc
+++ b/sql/debug_sync.cc
@@ -38,7 +38,7 @@
*/
struct st_debug_sync_action
{
- ulong activation_count; /* max(hit_limit, execute) */
+ ulong activation_count; /* MY_MAX(hit_limit, execute) */
ulong hit_limit; /* hits before kill query */
ulong execute; /* executes before self-clear */
ulong timeout; /* wait_for timeout */
@@ -734,6 +734,11 @@ static st_debug_sync_action *debug_sync_get_action(THD *thd,
static bool debug_sync_set_action(THD *thd, st_debug_sync_action *action)
{
+ if(!thd)
+ {
+ return;
+ }
+
st_debug_sync_control *ds_control= thd->debug_sync_control;
bool is_dsp_now= FALSE;
DBUG_ENTER("debug_sync_set_action");
@@ -741,7 +746,7 @@ static bool debug_sync_set_action(THD *thd, st_debug_sync_action *action)
DBUG_ASSERT(action);
DBUG_ASSERT(ds_control);
- action->activation_count= max(action->hit_limit, action->execute);
+ action->activation_count= MY_MAX(action->hit_limit, action->execute);
if (!action->activation_count)
{
debug_sync_remove_action(ds_control, action);
@@ -1521,9 +1526,10 @@ static void debug_sync_execute(THD *thd, st_debug_sync_action *action)
static void debug_sync(THD *thd, const char *sync_point_name, size_t name_len)
{
if (!thd)
- thd= current_thd;
- if (!thd)
- return;
+ {
+ if (!(thd= current_thd))
+ return;
+ }
st_debug_sync_control *ds_control= thd->debug_sync_control;
st_debug_sync_action *action;
diff --git a/sql/derror.cc b/sql/derror.cc
index 665427f45bc..abe642bea79 100644
--- a/sql/derror.cc
+++ b/sql/derror.cc
@@ -146,8 +146,8 @@ bool read_texts(const char *file_name, const char *language,
const char ***point, uint error_messages)
{
register uint i;
- uint count,funktpos,textcount;
- size_t length;
+ uint count,funktpos;
+ size_t offset, length;
File file;
char name[FN_REFLEN];
char lang_path[FN_REFLEN];
@@ -186,9 +186,8 @@ bool read_texts(const char *file_name, const char *language,
goto err;
funktpos=2;
if (head[0] != (uchar) 254 || head[1] != (uchar) 254 ||
- head[2] != 2 || head[3] != 2)
+ head[2] != 2 || head[3] != 3)
goto err; /* purecov: inspected */
- textcount=head[4];
error_message_charset_info= system_charset_info;
length=uint4korr(head+6); count=uint2korr(head+10);
@@ -203,7 +202,7 @@ Error message file '%s' had only %d error messages, but it should contain at lea
}
if (!(*point= (const char**)
- my_malloc((size_t) (max(length,count*2)+count*sizeof(char*)),MYF(0))))
+ my_malloc((size_t) (MY_MAX(length,count*2)+count*sizeof(char*)),MYF(0))))
{
funktpos=3; /* purecov: inspected */
goto err; /* purecov: inspected */
@@ -212,18 +211,15 @@ Error message file '%s' had only %d error messages, but it should contain at lea
if (mysql_file_read(file, buff, (size_t) count*2, MYF(MY_NABP)))
goto err;
- for (i=0, pos= buff ; i< count ; i++)
+ for (i=0, offset=0, pos= buff ; i< count ; i++)
{
- (*point)[i]= (char*) buff+uint2korr(pos);
+ (*point)[i]= (char*) buff+offset;
+ offset+= uint2korr(pos);
pos+=2;
}
if (mysql_file_read(file, buff, length, MYF(MY_NABP)))
goto err;
- for (i=1 ; i < textcount ; i++)
- {
- point[i]= *point +uint2korr(head+10+i+i);
- }
(void) mysql_file_close(file, MYF(0));
i= check_error_mesg(file_name, *point);
diff --git a/sql/field.cc b/sql/field.cc
index 1769e4e55cb..1ae5c95ad56 100644
--- a/sql/field.cc
+++ b/sql/field.cc
@@ -68,7 +68,7 @@ const char field_separator=',';
#define LONGLONG_TO_STRING_CONVERSION_BUFFER_SIZE 128
#define DECIMAL_TO_STRING_CONVERSION_BUFFER_SIZE 128
#define BLOB_PACK_LENGTH_TO_MAX_LENGH(arg) \
-((ulong) ((LL(1) << min(arg, 4) * 8) - LL(1)))
+((ulong) ((LL(1) << MY_MIN(arg, 4) * 8) - LL(1)))
#define ASSERT_COLUMN_MARKED_FOR_READ DBUG_ASSERT(!table || (!table->read_set || bitmap_is_set(table->read_set, field_index)))
#define ASSERT_COLUMN_MARKED_FOR_WRITE_OR_COMPUTED DBUG_ASSERT(is_stat_field || !table || (!table->write_set || bitmap_is_set(table->write_set, field_index) || bitmap_is_set(table->vcol_set, field_index)))
@@ -1070,7 +1070,7 @@ static void push_numerical_conversion_warning(THD* thd, const char* str,
const char* field_name="UNKNOWN",
ulong row_num=0)
{
- char buf[max(max(DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE,
+ char buf[MY_MAX(MY_MAX(DOUBLE_TO_STRING_CONVERSION_BUFFER_SIZE,
LONGLONG_TO_STRING_CONVERSION_BUFFER_SIZE),
DECIMAL_TO_STRING_CONVERSION_BUFFER_SIZE)];
@@ -2147,7 +2147,7 @@ int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs)
tmp_uint=tmp_dec+(uint)(int_digits_end-int_digits_from);
else if (expo_sign_char == '-')
{
- tmp_uint=min(exponent,(uint)(int_digits_end-int_digits_from));
+ tmp_uint=MY_MIN(exponent,(uint)(int_digits_end-int_digits_from));
frac_digits_added_zeros=exponent-tmp_uint;
int_digits_end -= tmp_uint;
frac_digits_head_end=int_digits_end+tmp_uint;
@@ -2155,7 +2155,7 @@ int Field_decimal::store(const char *from_arg, uint len, CHARSET_INFO *cs)
}
else // (expo_sign_char=='+')
{
- tmp_uint=min(exponent,(uint)(frac_digits_end-frac_digits_from));
+ tmp_uint=MY_MIN(exponent,(uint)(frac_digits_end-frac_digits_from));
int_digits_added_zeros=exponent-tmp_uint;
int_digits_tail_from=frac_digits_from;
frac_digits_from=frac_digits_from+tmp_uint;
@@ -2574,7 +2574,7 @@ Field *Field_new_decimal::create_from_item (Item *item)
{
signed int overflow;
- dec= min(dec, DECIMAL_MAX_SCALE);
+ dec= MY_MIN(dec, DECIMAL_MAX_SCALE);
/*
If the value still overflows the field with the corrected dec,
@@ -2590,7 +2590,7 @@ Field *Field_new_decimal::create_from_item (Item *item)
overflow= required_length - len;
if (overflow > 0)
- dec= max(0, dec - overflow); // too long, discard fract
+ dec= MY_MAX(0, dec - overflow); // too long, discard fract
else
/* Corrected value fits. */
len= required_length;
@@ -3139,7 +3139,7 @@ String *Field_tiny::val_str(String *val_buffer,
ASSERT_COLUMN_MARKED_FOR_READ;
CHARSET_INFO *cs= &my_charset_numeric;
uint length;
- uint mlength=max(field_length+1,5*cs->mbmaxlen);
+ uint mlength=MY_MAX(field_length+1,5*cs->mbmaxlen);
val_buffer->alloc(mlength);
char *to=(char*) val_buffer->ptr();
@@ -3321,7 +3321,7 @@ String *Field_short::val_str(String *val_buffer,
ASSERT_COLUMN_MARKED_FOR_READ;
CHARSET_INFO *cs= &my_charset_numeric;
uint length;
- uint mlength=max(field_length+1,7*cs->mbmaxlen);
+ uint mlength=MY_MAX(field_length+1,7*cs->mbmaxlen);
val_buffer->alloc(mlength);
char *to=(char*) val_buffer->ptr();
short j;
@@ -3511,7 +3511,7 @@ String *Field_medium::val_str(String *val_buffer,
ASSERT_COLUMN_MARKED_FOR_READ;
CHARSET_INFO *cs= &my_charset_numeric;
uint length;
- uint mlength=max(field_length+1,10*cs->mbmaxlen);
+ uint mlength=MY_MAX(field_length+1,10*cs->mbmaxlen);
val_buffer->alloc(mlength);
char *to=(char*) val_buffer->ptr();
long j= unsigned_flag ? (long) uint3korr(ptr) : sint3korr(ptr);
@@ -3700,7 +3700,7 @@ String *Field_long::val_str(String *val_buffer,
ASSERT_COLUMN_MARKED_FOR_READ;
CHARSET_INFO *cs= &my_charset_numeric;
uint length;
- uint mlength=max(field_length+1,12*cs->mbmaxlen);
+ uint mlength=MY_MAX(field_length+1,12*cs->mbmaxlen);
val_buffer->alloc(mlength);
char *to=(char*) val_buffer->ptr();
int32 j;
@@ -3850,7 +3850,7 @@ String *Field_longlong::val_str(String *val_buffer,
{
CHARSET_INFO *cs= &my_charset_numeric;
uint length;
- uint mlength=max(field_length+1,22*cs->mbmaxlen);
+ uint mlength=MY_MAX(field_length+1,22*cs->mbmaxlen);
val_buffer->alloc(mlength);
char *to=(char*) val_buffer->ptr();
longlong j;
@@ -6379,7 +6379,7 @@ void Field_string::sql_type(String &res) const
uchar *Field_string::pack(uchar *to, const uchar *from, uint max_length)
{
- uint length= min(field_length,max_length);
+ uint length= MY_MIN(field_length,max_length);
uint local_char_length= max_length/field_charset->mbmaxlen;
DBUG_PRINT("debug", ("Packing field '%s' - length: %u ", field_name, length));
@@ -7126,7 +7126,7 @@ int Field_blob::store(const char *from,uint length,CHARSET_INFO *cs)
from= tmpstr.ptr();
}
- new_length= min(max_data_length(), field_charset->mbmaxlen * length);
+ new_length= MY_MIN(max_data_length(), field_charset->mbmaxlen * length);
if (value.alloc(new_length))
goto oom_error;
@@ -7286,7 +7286,7 @@ int Field_blob::cmp_binary(const uchar *a_ptr, const uchar *b_ptr,
b_length=get_length(b_ptr);
if (b_length > max_length)
b_length=max_length;
- diff=memcmp(a,b,min(a_length,b_length));
+ diff=memcmp(a,b,MY_MIN(a_length,b_length));
return diff ? diff : (int) (a_length - b_length);
}
@@ -7464,7 +7464,7 @@ uchar *Field_blob::pack(uchar *to, const uchar *from, uint max_length)
length given is smaller than the actual length of the blob, we
just store the initial bytes of the blob.
*/
- store_length(to, packlength, min(length, max_length));
+ store_length(to, packlength, MY_MIN(length, max_length));
/*
Store the actual blob data, which will occupy 'length' bytes.
@@ -8342,7 +8342,7 @@ String *Field_bit::val_str(String *val_buffer,
{
ASSERT_COLUMN_MARKED_FOR_READ;
char buff[sizeof(longlong)];
- uint length= min(pack_length(), sizeof(longlong));
+ uint length= MY_MIN(pack_length(), sizeof(longlong));
ulonglong bits= val_int();
mi_int8store(buff,bits);
@@ -8430,7 +8430,7 @@ uint Field_bit::get_key_image(uchar *buff, uint length, imagetype type_arg)
*buff++= bits;
length--;
}
- uint data_length = min(length, bytes_in_rec);
+ uint data_length = MY_MIN(length, bytes_in_rec);
memcpy(buff, ptr, data_length);
return data_length + 1;
}
@@ -8554,7 +8554,7 @@ Field_bit::pack(uchar *to, const uchar *from, uint max_length)
uchar bits= get_rec_bits(bit_ptr + (from - ptr), bit_ofs, bit_len);
*to++= bits;
}
- length= min(bytes_in_rec, max_length - (bit_len > 0));
+ length= MY_MIN(bytes_in_rec, max_length - (bit_len > 0));
memcpy(to, from, length);
return to + length;
}
diff --git a/sql/field.h b/sql/field.h
index e832928b114..162812adfba 100644
--- a/sql/field.h
+++ b/sql/field.h
@@ -464,32 +464,53 @@ public:
*/
virtual void sql_type(String &str) const =0;
virtual uint size_of() const =0; // For new field
- inline bool is_null(my_ptrdiff_t row_offset= 0)
- { return null_ptr ? (null_ptr[row_offset] & null_bit ? 1 : 0) : table->null_row; }
- inline bool is_real_null(my_ptrdiff_t row_offset= 0)
+ inline bool is_null(my_ptrdiff_t row_offset= 0) const
+ {
+ /*
+ The table may have been marked as containing only NULL values
+ for all fields if it is a NULL-complemented row of an OUTER JOIN
+ or if the query is an implicitly grouped query (has aggregate
+ functions but no GROUP BY clause) with no qualifying rows. If
+ this is the case (in which TABLE::null_row is true), the field
+ is considered to be NULL.
+ Note that if a table->null_row is set then also all null_bits are
+ set for the row.
+
+ Otherwise, if the field is NULLable, it has a valid null_ptr
+ pointer, and its NULLity is recorded in the "null_bit" bit of
+ null_ptr[row_offset].
+ */
+ return (table->null_row ? TRUE :
+ null_ptr ? test(null_ptr[row_offset] & null_bit) : 0);
+ }
+ inline bool is_real_null(my_ptrdiff_t row_offset= 0) const
{ return null_ptr ? (null_ptr[row_offset] & null_bit ? 1 : 0) : 0; }
- inline bool is_null_in_record(const uchar *record)
+ inline bool is_null_in_record(const uchar *record) const
{
if (!null_ptr)
return 0;
return test(record[(uint) (null_ptr -table->record[0])] &
null_bit);
}
- inline bool is_null_in_record_with_offset(my_ptrdiff_t col_offset)
- {
- if (!null_ptr)
- return 0;
- return test(null_ptr[col_offset] & null_bit);
- }
inline void set_null(my_ptrdiff_t row_offset= 0)
{ if (null_ptr) null_ptr[row_offset]|= null_bit; }
inline void set_notnull(my_ptrdiff_t row_offset= 0)
{ if (null_ptr) null_ptr[row_offset]&= (uchar) ~null_bit; }
- inline bool maybe_null(void) { return null_ptr != 0 || table->maybe_null; }
- /**
- Signals that this field is NULL-able.
- */
- inline bool real_maybe_null(void) { return null_ptr != 0; }
+ inline bool maybe_null(void) const
+ { return null_ptr != 0 || table->maybe_null; }
+
+ /* @return true if this field is NULL-able, false otherwise. */
+ inline bool real_maybe_null(void) const { return null_ptr != 0; }
+ uint null_offset(const uchar *record) const
+ { return (uint) (null_ptr - record); }
+
+ uint null_offset() const
+ { return null_offset(table->record[0]); }
+ void set_null_ptr(uchar *p_null_ptr, uint p_null_bit)
+ {
+ null_ptr= p_null_ptr;
+ null_bit= p_null_bit;
+ }
inline THD *get_thd() { return table ? table->in_use : current_thd; }
diff --git a/sql/filesort.cc b/sql/filesort.cc
index 49aaa0af574..9195255c363 100644
--- a/sql/filesort.cc
+++ b/sql/filesort.cc
@@ -245,12 +245,12 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
{
DBUG_PRINT("info", ("filesort PQ is not applicable"));
- ulong min_sort_memory= max(MIN_SORT_MEMORY, param.sort_length*MERGEBUFF2);
+ ulong min_sort_memory= MY_MAX(MIN_SORT_MEMORY, param.sort_length*MERGEBUFF2);
set_if_bigger(min_sort_memory, sizeof(BUFFPEK*)*MERGEBUFF2);
while (memory_available >= min_sort_memory)
{
ulong keys= memory_available / (param.rec_length + sizeof(char*));
- param.max_keys_per_buffer= (uint) min(num_rows, keys);
+ param.max_keys_per_buffer= (uint) MY_MIN(num_rows, keys);
if (table_sort.get_sort_keys())
{
// If we have already allocated a buffer, it better have same size!
@@ -1368,7 +1368,7 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
register uint count;
uint length;
- if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+ if ((count=(uint) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
{
if (mysql_file_pread(fromfile->file, (uchar*) buffpek->base,
(length= rec_length*count),
@@ -1693,7 +1693,7 @@ int merge_buffers(Sort_param *param, IO_CACHE *from_file,
!= -1 && error != 0);
end:
- lastbuff->count= min(org_max_rows-max_rows, param->max_rows);
+ lastbuff->count= MY_MIN(org_max_rows-max_rows, param->max_rows);
lastbuff->file_pos= to_start_filepos;
err:
delete_queue(&queue);
diff --git a/sql/ha_ndbcluster.cc b/sql/ha_ndbcluster.cc
index 6fc30fa4fa0..3c0bdc724c1 100644
--- a/sql/ha_ndbcluster.cc
+++ b/sql/ha_ndbcluster.cc
@@ -930,7 +930,7 @@ int ha_ndbcluster::set_ndb_value(NdbOperation *ndb_op, Field *field,
DBUG_PRINT("value", ("set blob ptr: 0x%lx len: %u",
(long) blob_ptr, blob_len));
- DBUG_DUMP("value", blob_ptr, min(blob_len, 26));
+ DBUG_DUMP("value", blob_ptr, MY_MIN(blob_len, 26));
if (set_blob_value)
*set_blob_value= TRUE;
diff --git a/sql/handler.cc b/sql/handler.cc
index 5297a8e8cfc..2e0ccc5e1e5 100644
--- a/sql/handler.cc
+++ b/sql/handler.cc
@@ -3076,13 +3076,25 @@ void handler::ha_release_auto_increment()
}
-void handler::print_keydup_error(uint key_nr, const char *msg, myf errflag)
+/**
+ Construct and emit duplicate key error message using information
+ from table's record buffer.
+
+ @param table TABLE object which record buffer should be used as
+ source for column values.
+ @param key Key description.
+ @param msg Error message template to which key value should be
+ added.
+ @param errflag Flags for my_error() call.
+*/
+
+void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag)
{
/* Write the duplicated key in the error message */
- char key[MAX_KEY_LENGTH];
- String str(key,sizeof(key),system_charset_info);
+ char key_buff[MAX_KEY_LENGTH];
+ String str(key_buff,sizeof(key_buff),system_charset_info);
- if (key_nr == MAX_KEY)
+ if (key == NULL)
{
/* Key is unknown */
str.copy("", 0, system_charset_info);
@@ -3091,18 +3103,29 @@ void handler::print_keydup_error(uint key_nr, const char *msg, myf errflag)
else
{
/* Table is opened and defined at this point */
- key_unpack(&str,table,(uint) key_nr);
+ key_unpack(&str,table, key);
uint max_length=MYSQL_ERRMSG_SIZE-(uint) strlen(msg);
if (str.length() >= max_length)
{
str.length(max_length-4);
str.append(STRING_WITH_LEN("..."));
}
- my_printf_error(ER_DUP_ENTRY, msg,
- errflag, str.c_ptr_safe(), table->key_info[key_nr].name);
+ my_printf_error(ER_DUP_ENTRY, msg, errflag, str.c_ptr_safe(), key->name);
}
}
+/**
+ Construct and emit duplicate key error message using information
+ from table's record buffer.
+
+ @sa print_keydup_error(table, key, msg, errflag).
+*/
+
+void print_keydup_error(TABLE *table, KEY *key, myf errflag)
+{
+ print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag);
+}
+
/**
Print error that we got from handler function.
diff --git a/sql/handler.h b/sql/handler.h
index 8ee1044f10c..e74a2c6c42c 100644
--- a/sql/handler.h
+++ b/sql/handler.h
@@ -40,6 +40,8 @@
#error MAX_KEY is too large. Values up to 128 are supported.
#endif
+class Alter_info;
+
// the following is for checking tables
#define HA_ADMIN_ALREADY_DONE 1
@@ -57,6 +59,22 @@
#define HA_ADMIN_NEEDS_ALTER -11
#define HA_ADMIN_NEEDS_CHECK -12
+/**
+ Return values for check_if_supported_inplace_alter().
+
+ @see check_if_supported_inplace_alter() for description of
+ the individual values.
+*/
+enum enum_alter_inplace_result {
+ HA_ALTER_ERROR,
+ HA_ALTER_INPLACE_NOT_SUPPORTED,
+ HA_ALTER_INPLACE_EXCLUSIVE_LOCK,
+ HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE,
+ HA_ALTER_INPLACE_SHARED_LOCK,
+ HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE,
+ HA_ALTER_INPLACE_NO_LOCK
+};
+
/* Bits in table_flags() to show what database can do */
#define HA_NO_TRANSACTIONS (1 << 0) /* Doesn't support transactions */
@@ -84,7 +102,7 @@
*/
#define HA_REQUIRES_KEY_COLUMNS_FOR_DELETE (1 << 6)
#define HA_NULL_IN_KEY (1 << 7) /* One can have keys with NULL */
-#define HA_DUPLICATE_POS (1 << 8) /* ha_position() gives dup row */
+#define HA_DUPLICATE_POS (1 << 8) /* position() gives dup row */
#define HA_NO_BLOBS (1 << 9) /* Doesn't support blobs */
#define HA_CAN_INDEX_BLOBS (1 << 10)
#define HA_AUTO_PART_KEY (1 << 11) /* auto-increment in multi-part key */
@@ -97,8 +115,8 @@
#define HA_CAN_INSERT_DELAYED (1 << 14)
/*
If we get the primary key columns for free when we do an index read
- It also implies that we have to retrive the primary key when using
- position() and rnd_pos().
+ (usually, it also implies that HA_PRIMARY_KEY_REQUIRED_FOR_POSITION
+ flag is set).
*/
#define HA_PRIMARY_KEY_IN_READ_INDEX (1 << 15)
/*
@@ -430,6 +448,45 @@ enum enum_binlog_command {
/* The following two are used by Maria engine: */
#define HA_CREATE_USED_TRANSACTIONAL (1L << 20)
#define HA_CREATE_USED_PAGE_CHECKSUM (1L << 21)
+/** This is set whenever STATS_PERSISTENT=0|1|default has been
+specified in CREATE/ALTER TABLE. See also HA_OPTION_STATS_PERSISTENT in
+include/my_base.h. It is possible to distinguish whether
+STATS_PERSISTENT=default has been specified or no STATS_PERSISTENT= is
+given at all. */
+#define HA_CREATE_USED_STATS_PERSISTENT (1L << 22)
+/**
+ This is set whenever STATS_AUTO_RECALC=0|1|default has been
+ specified in CREATE/ALTER TABLE. See enum_stats_auto_recalc.
+ It is possible to distinguish whether STATS_AUTO_RECALC=default
+ has been specified or no STATS_AUTO_RECALC= is given at all.
+*/
+#define HA_CREATE_USED_STATS_AUTO_RECALC (1L << 23)
+/**
+ This is set whenever STATS_SAMPLE_PAGES=N|default has been
+ specified in CREATE/ALTER TABLE. It is possible to distinguish whether
+ STATS_SAMPLE_PAGES=default has been specified or no STATS_SAMPLE_PAGES= is
+ given at all.
+*/
+#define HA_CREATE_USED_STATS_SAMPLE_PAGES (1L << 24)
+
+
+/*
+ This is master database for most of system tables. However there
+ can be other databases which can hold system tables. Respective
+ storage engines define their own system database names.
+*/
+extern const char *mysqld_system_database;
+
+/*
+ Structure to hold list of system_database.system_table.
+ This is used at both mysqld and storage engine layer.
+*/
+struct st_system_tablename
+{
+ const char *db;
+ const char *tablename;
+};
+
typedef ulonglong my_xid; // this line is the same as in log_event.h
#define MYSQL_XID_PREFIX "MySQLXid"
@@ -1105,15 +1162,74 @@ struct handlerton
const char *name);
uint32 license; /* Flag for Engine License */
+ void *data; /* Location for engines to keep personal structures */
+
/*
Optional clauses in the CREATE/ALTER TABLE
*/
ha_create_table_option *table_options; // table level options
ha_create_table_option *field_options; // these are specified per field
ha_create_table_option *index_options; // these are specified per index
-
};
+/**
+ The handler supports read before write removal optimization
+
+ Read before write removal may be used for storage engines which support
+ write without previous read of the row to be updated. Handler returning
+ this flag must implement start_read_removal() and end_read_removal().
+ The handler may return "fake" rows constructed from the key of the row
+ asked for. This is used to optimize UPDATE and DELETE by reducing the
+ numer of roundtrips between handler and storage engine.
+
+ Example:
+ UPDATE a=1 WHERE pk IN (<keys>)
+
+ mysql_update()
+ {
+ if (<conditions for starting read removal>)
+ start_read_removal()
+ -> handler returns true if read removal supported for this table/query
+
+ while(read_record("pk=<key>"))
+ -> handler returns fake row with column "pk" set to <key>
+
+ ha_update_row()
+ -> handler sends write "a=1" for row with "pk=<key>"
+
+ end_read_removal()
+ -> handler returns the number of rows actually written
+ }
+
+ @note This optimization in combination with batching may be used to
+ remove even more roundtrips.
+*/
+#define HA_READ_BEFORE_WRITE_REMOVAL (LL(1) << 38)
+
+/*
+ Engine supports extended fulltext API
+ */
+#define HA_CAN_FULLTEXT_EXT (LL(1) << 39)
+
+/*
+ Storage engine doesn't synchronize result set with expected table contents.
+ Used by replication slave to check if it is possible to retrieve rows from
+ the table when deciding whether to do a full table scan, index scan or hash
+ scan while applying a row event.
+ */
+#define HA_READ_OUT_OF_SYNC (LL(1) << 40)
+
+/*
+ Storage engine supports table export using the
+ FLUSH TABLE <table_list> FOR EXPORT statement.
+ */
+#define HA_CAN_EXPORT (LL(1) << 41)
+
+/*
+ The handler don't want accesses to this table to
+ be const-table optimized
+*/
+#define HA_BLOCK_CONST_TABLE (LL(1) << 42)
inline LEX_STRING *hton_name(const handlerton *hton)
{
@@ -1315,6 +1431,10 @@ struct st_partition_iter;
enum ha_choice { HA_CHOICE_UNDEF, HA_CHOICE_NO, HA_CHOICE_YES };
+enum enum_stats_auto_recalc { HA_STATS_AUTO_RECALC_DEFAULT= 0,
+ HA_STATS_AUTO_RECALC_ON,
+ HA_STATS_AUTO_RECALC_OFF };
+
typedef struct st_ha_create_information
{
CHARSET_INFO *table_charset, *default_table_charset;
@@ -1329,6 +1449,9 @@ typedef struct st_ha_create_information
ulong avg_row_length;
ulong used_fields;
ulong key_block_size;
+ uint stats_sample_pages; /* number of pages to sample during
+ stats estimation, if used, otherwise 0. */
+ enum_stats_auto_recalc stats_auto_recalc;
SQL_I_List<TABLE_LIST> merge_list;
handlerton *db_type;
/**
@@ -1358,12 +1481,306 @@ typedef struct st_ha_create_information
} HA_CREATE_INFO;
+/**
+ In-place alter handler context.
+
+ This is a superclass intended to be subclassed by individual handlers
+ in order to store handler unique context between in-place alter API calls.
+
+ The handler is responsible for creating the object. This can be done
+ as early as during check_if_supported_inplace_alter().
+
+ The SQL layer is responsible for destroying the object.
+ The class extends Sql_alloc so the memory will be mem root allocated.
+
+ @see Alter_inplace_info
+*/
+
+class inplace_alter_handler_ctx : public Sql_alloc
+{
+public:
+ inplace_alter_handler_ctx() {}
+
+ virtual ~inplace_alter_handler_ctx() {}
+};
+
+
+/**
+ Class describing changes to be done by ALTER TABLE.
+ Instance of this class is passed to storage engine in order
+ to determine if this ALTER TABLE can be done using in-place
+ algorithm. It is also used for executing the ALTER TABLE
+ using in-place algorithm.
+*/
+
+class Alter_inplace_info
+{
+public:
+ /**
+ Bits to show in detail what operations the storage engine is
+ to execute.
+
+ All these operations are supported as in-place operations by the
+ SQL layer. This means that operations that by their nature must
+ be performed by copying the table to a temporary table, will not
+ have their own flags here (e.g. ALTER TABLE FORCE, ALTER TABLE
+ ENGINE).
+
+ We generally try to specify handler flags only if there are real
+ changes. But in cases when it is cumbersome to determine if some
+ attribute has really changed we might choose to set flag
+ pessimistically, for example, relying on parser output only.
+ */
+ typedef ulong HA_ALTER_FLAGS;
+
+ // Add non-unique, non-primary index
+ static const HA_ALTER_FLAGS ADD_INDEX = 1L << 0;
+
+ // Drop non-unique, non-primary index
+ static const HA_ALTER_FLAGS DROP_INDEX = 1L << 1;
+
+ // Add unique, non-primary index
+ static const HA_ALTER_FLAGS ADD_UNIQUE_INDEX = 1L << 2;
+
+ // Drop unique, non-primary index
+ static const HA_ALTER_FLAGS DROP_UNIQUE_INDEX = 1L << 3;
+
+ // Add primary index
+ static const HA_ALTER_FLAGS ADD_PK_INDEX = 1L << 4;
+
+ // Drop primary index
+ static const HA_ALTER_FLAGS DROP_PK_INDEX = 1L << 5;
+
+ // Add column
+ static const HA_ALTER_FLAGS ADD_COLUMN = 1L << 6;
+
+ // Drop column
+ static const HA_ALTER_FLAGS DROP_COLUMN = 1L << 7;
+
+ // Rename column
+ static const HA_ALTER_FLAGS ALTER_COLUMN_NAME = 1L << 8;
+
+ // Change column datatype
+ static const HA_ALTER_FLAGS ALTER_COLUMN_TYPE = 1L << 9;
+
+ /**
+ Change column datatype in such way that new type has compatible
+ packed representation with old type, so it is theoretically
+ possible to perform change by only updating data dictionary
+ without changing table rows.
+ */
+ static const HA_ALTER_FLAGS ALTER_COLUMN_EQUAL_PACK_LENGTH = 1L << 10;
+
+ // Reorder column
+ static const HA_ALTER_FLAGS ALTER_COLUMN_ORDER = 1L << 11;
+
+ // Change column from NOT NULL to NULL
+ static const HA_ALTER_FLAGS ALTER_COLUMN_NULLABLE = 1L << 12;
+
+ // Change column from NULL to NOT NULL
+ static const HA_ALTER_FLAGS ALTER_COLUMN_NOT_NULLABLE = 1L << 13;
+
+ // Set or remove default column value
+ static const HA_ALTER_FLAGS ALTER_COLUMN_DEFAULT = 1L << 14;
+
+ // Add foreign key
+ static const HA_ALTER_FLAGS ADD_FOREIGN_KEY = 1L << 15;
+
+ // Drop foreign key
+ static const HA_ALTER_FLAGS DROP_FOREIGN_KEY = 1L << 16;
+
+ // table_options changed, see HA_CREATE_INFO::used_fields for details.
+ static const HA_ALTER_FLAGS CHANGE_CREATE_OPTION = 1L << 17;
+
+ // Table is renamed
+ static const HA_ALTER_FLAGS ALTER_RENAME = 1L << 18;
+
+ // Change the storage type of column
+ static const HA_ALTER_FLAGS ALTER_COLUMN_STORAGE_TYPE = 1L << 19;
+
+ // Change the column format of column
+ static const HA_ALTER_FLAGS ALTER_COLUMN_COLUMN_FORMAT = 1L << 20;
+
+ // Add partition
+ static const HA_ALTER_FLAGS ADD_PARTITION = 1L << 21;
+
+ // Drop partition
+ static const HA_ALTER_FLAGS DROP_PARTITION = 1L << 22;
+
+ // Changing partition options
+ static const HA_ALTER_FLAGS ALTER_PARTITION = 1L << 23;
+
+ // Coalesce partition
+ static const HA_ALTER_FLAGS COALESCE_PARTITION = 1L << 24;
+
+ // Reorganize partition ... into
+ static const HA_ALTER_FLAGS REORGANIZE_PARTITION = 1L << 25;
+
+ // Reorganize partition
+ static const HA_ALTER_FLAGS ALTER_TABLE_REORG = 1L << 26;
+
+ // Remove partitioning
+ static const HA_ALTER_FLAGS ALTER_REMOVE_PARTITIONING = 1L << 27;
+
+ // Partition operation with ALL keyword
+ static const HA_ALTER_FLAGS ALTER_ALL_PARTITION = 1L << 28;
+
+ /**
+ Create options (like MAX_ROWS) for the new version of table.
+
+ @note The referenced instance of HA_CREATE_INFO object was already
+ used to create new .FRM file for table being altered. So it
+ has been processed by mysql_prepare_create_table() already.
+ For example, this means that it has HA_OPTION_PACK_RECORD
+ flag in HA_CREATE_INFO::table_options member correctly set.
+ */
+ HA_CREATE_INFO *create_info;
+
+ /**
+ Alter options, fields and keys for the new version of table.
+
+ @note The referenced instance of Alter_info object was already
+ used to create new .FRM file for table being altered. So it
+ has been processed by mysql_prepare_create_table() already.
+ In particular, this means that in Create_field objects for
+ fields which were present in some form in the old version
+ of table, Create_field::field member points to corresponding
+ Field instance for old version of table.
+ */
+ Alter_info *alter_info;
+
+ /**
+ Array of KEYs for new version of table - including KEYs to be added.
+
+ @note Currently this array is produced as result of
+ mysql_prepare_create_table() call.
+ This means that it follows different convention for
+ KEY_PART_INFO::fieldnr values than objects in TABLE::key_info
+ array.
+
+ @todo This is mainly due to the fact that we need to keep compatibility
+ with removed handler::add_index() call. We plan to switch to
+ TABLE::key_info numbering later.
+
+ KEYs are sorted - see sort_keys().
+ */
+ KEY *key_info_buffer;
+
+ /** Size of key_info_buffer array. */
+ uint key_count;
+
+ /** Size of index_drop_buffer array. */
+ uint index_drop_count;
+
+ /**
+ Array of pointers to KEYs to be dropped belonging to the TABLE instance
+ for the old version of the table.
+ */
+ KEY **index_drop_buffer;
+
+ /** Size of index_add_buffer array. */
+ uint index_add_count;
+
+ /**
+ Array of indexes into key_info_buffer for KEYs to be added,
+ sorted in increasing order.
+ */
+ uint *index_add_buffer;
+
+ /**
+ Context information to allow handlers to keep context between in-place
+ alter API calls.
+
+ @see inplace_alter_handler_ctx for information about object lifecycle.
+ */
+ inplace_alter_handler_ctx *handler_ctx;
+
+ /**
+ Flags describing in detail which operations the storage engine is to execute.
+ */
+ HA_ALTER_FLAGS handler_flags;
+
+ /**
+ Partition_info taking into account the partition changes to be performed.
+ Contains all partitions which are present in the old version of the table
+ with partitions to be dropped or changed marked as such + all partitions
+ to be added in the new version of table marked as such.
+ */
+ partition_info *modified_part_info;
+
+ /** true for ALTER IGNORE TABLE ... */
+ const bool ignore;
+
+ /** true for online operation (LOCK=NONE) */
+ bool online;
+
+ /**
+ Can be set by handler to describe why a given operation cannot be done
+ in-place (HA_ALTER_INPLACE_NOT_SUPPORTED) or why it cannot be done
+ online (HA_ALTER_INPLACE_NO_LOCK or
+ HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE)
+ If set, it will be used with ER_ALTER_OPERATION_NOT_SUPPORTED_REASON if
+ results from handler::check_if_supported_inplace_alter() doesn't match
+ requirements set by user. If not set, the more generic
+ ER_ALTER_OPERATION_NOT_SUPPORTED will be used.
+
+ Please set to a properly localized string, for example using
+ my_get_err_msg(), so that the error message as a whole is localized.
+ */
+ const char *unsupported_reason;
+
+ Alter_inplace_info(HA_CREATE_INFO *create_info_arg,
+ Alter_info *alter_info_arg,
+ KEY *key_info_arg, uint key_count_arg,
+ partition_info *modified_part_info_arg,
+ bool ignore_arg)
+ : create_info(create_info_arg),
+ alter_info(alter_info_arg),
+ key_info_buffer(key_info_arg),
+ key_count(key_count_arg),
+ index_drop_count(0),
+ index_drop_buffer(NULL),
+ index_add_count(0),
+ index_add_buffer(NULL),
+ handler_ctx(NULL),
+ handler_flags(0),
+ modified_part_info(modified_part_info_arg),
+ ignore(ignore_arg),
+ online(false),
+ unsupported_reason(NULL)
+ {}
+
+ ~Alter_inplace_info()
+ {
+ delete handler_ctx;
+ }
+
+ /**
+ Used after check_if_supported_inplace_alter() to report
+ error if the result does not match the LOCK/ALGORITHM
+ requirements set by the user.
+
+ @param not_supported Part of statement that was not supported.
+ @param try_instead Suggestion as to what the user should
+ replace not_supported with.
+ */
+ void report_unsupported_error(const char *not_supported,
+ const char *try_instead);
+};
+
+
typedef struct st_key_create_information
{
enum ha_key_alg algorithm;
ulong block_size;
LEX_STRING parser_name;
LEX_STRING comment;
+ /**
+ A flag to determine if we will check for duplicate indexes.
+ This typically means that the key information was specified
+ directly by the user (set by the parser).
+ */
+ bool check_for_duplicate_indexes;
} KEY_CREATE_INFO;
@@ -2060,7 +2477,6 @@ public:
void adjust_next_insert_id_after_explicit_value(ulonglong nr);
int update_auto_increment();
- void print_keydup_error(uint key_nr, const char *msg, myf errflag);
virtual void print_error(int error, myf errflag);
virtual bool get_error_message(int error, String *buf);
uint get_dup_key(int error);
@@ -2557,15 +2973,15 @@ public:
{ return (HA_ERR_WRONG_COMMAND); }
uint max_record_length() const
- { return min(HA_MAX_REC_LENGTH, max_supported_record_length()); }
+ { return MY_MIN(HA_MAX_REC_LENGTH, max_supported_record_length()); }
uint max_keys() const
- { return min(MAX_KEY, max_supported_keys()); }
+ { return MY_MIN(MAX_KEY, max_supported_keys()); }
uint max_key_parts() const
- { return min(MAX_REF_PARTS, max_supported_key_parts()); }
+ { return MY_MIN(MAX_REF_PARTS, max_supported_key_parts()); }
uint max_key_length() const
- { return min(MAX_KEY_LENGTH, max_supported_key_length()); }
+ { return MY_MIN(MAX_KEY_LENGTH, max_supported_key_length()); }
uint max_key_part_length() const
- { return min(MAX_KEY_LENGTH, max_supported_key_part_length()); }
+ { return MY_MIN(MAX_KEY_LENGTH, max_supported_key_part_length()); }
virtual uint max_supported_record_length() const { return HA_MAX_REC_LENGTH; }
virtual uint max_supported_keys() const { return 0; }
@@ -3192,4 +3608,7 @@ inline const char *table_case_name(HA_CREATE_INFO *info, const char *name)
{
return ((lower_case_table_names == 2 && info->alias) ? info->alias : name);
}
+
+void print_keydup_error(TABLE *table, KEY *key, const char *msg, myf errflag);
+void print_keydup_error(TABLE *table, KEY *key, myf errflag);
#endif
diff --git a/sql/item.cc b/sql/item.cc
index 665521c641e..80b3269dc63 100644
--- a/sql/item.cc
+++ b/sql/item.cc
@@ -105,7 +105,7 @@ void
Hybrid_type_traits_decimal::fix_length_and_dec(Item *item, Item *arg) const
{
item->decimals= arg->decimals;
- item->max_length= min(arg->max_length + DECIMAL_LONGLONG_DIGITS,
+ item->max_length= MY_MIN(arg->max_length + DECIMAL_LONGLONG_DIGITS,
DECIMAL_MAX_STR_LENGTH);
}
@@ -531,9 +531,9 @@ uint Item::decimal_precision() const
uint prec=
my_decimal_length_to_precision(max_char_length(), decimals,
unsigned_flag);
- return min(prec, DECIMAL_MAX_PRECISION);
+ return MY_MIN(prec, DECIMAL_MAX_PRECISION);
}
- return min(max_char_length(), DECIMAL_MAX_PRECISION);
+ return MY_MIN(max_char_length(), DECIMAL_MAX_PRECISION);
}
@@ -977,7 +977,7 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
{
char buff[SAFE_NAME_LEN];
strmake(buff, str_start,
- min(sizeof(buff)-1, length + (int) (str-str_start)));
+ MY_MIN(sizeof(buff)-1, length + (int) (str-str_start)));
if (length == 0)
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
@@ -998,7 +998,7 @@ void Item::set_name(const char *str, uint length, CHARSET_INFO *cs)
name_length= res_length;
}
else
- name= sql_strmake(str, (name_length= min(length,MAX_ALIAS_NAME)));
+ name= sql_strmake(str, (name_length= MY_MIN(length,MAX_ALIAS_NAME)));
}
@@ -6157,7 +6157,7 @@ longlong Item_hex_string::val_int()
// following assert is redundant, because fixed=1 assigned in constructor
DBUG_ASSERT(fixed == 1);
char *end=(char*) str_value.ptr()+str_value.length(),
- *ptr=end-min(str_value.length(),sizeof(longlong));
+ *ptr=end-MY_MIN(str_value.length(),sizeof(longlong));
ulonglong value=0;
for (; ptr != end ; ptr++)
@@ -6212,7 +6212,7 @@ warn:
void Item_hex_string::print(String *str, enum_query_type query_type)
{
char *end= (char*) str_value.ptr() + str_value.length(),
- *ptr= end - min(str_value.length(), sizeof(longlong));
+ *ptr= end - MY_MIN(str_value.length(), sizeof(longlong));
str->append("0x");
for (; ptr != end ; ptr++)
{
@@ -9295,14 +9295,14 @@ bool Item_type_holder::join_types(THD *thd, Item *item)
/* fix variable decimals which always is NOT_FIXED_DEC */
if (Field::result_merge_type(fld_type) == INT_RESULT)
item_decimals= 0;
- decimals= max(decimals, item_decimals);
+ decimals= MY_MAX(decimals, item_decimals);
}
if (Field::result_merge_type(fld_type) == DECIMAL_RESULT)
{
- decimals= min(max(decimals, item->decimals), DECIMAL_MAX_SCALE);
+ decimals= MY_MIN(MY_MAX(decimals, item->decimals), DECIMAL_MAX_SCALE);
int item_int_part= item->decimal_int_part();
- int item_prec = max(prev_decimal_int_part, item_int_part) + decimals;
- int precision= min(item_prec, DECIMAL_MAX_PRECISION);
+ int item_prec = MY_MAX(prev_decimal_int_part, item_int_part) + decimals;
+ int precision= MY_MIN(item_prec, DECIMAL_MAX_PRECISION);
unsigned_flag&= item->unsigned_flag;
max_length= my_decimal_precision_to_length_no_truncation(precision,
decimals,
@@ -9333,7 +9333,7 @@ bool Item_type_holder::join_types(THD *thd, Item *item)
*/
if (collation.collation != &my_charset_bin)
{
- max_length= max(old_max_chars * collation.collation->mbmaxlen,
+ max_length= MY_MAX(old_max_chars * collation.collation->mbmaxlen,
display_length(item) /
item->collation.collation->mbmaxlen *
collation.collation->mbmaxlen);
@@ -9355,7 +9355,7 @@ bool Item_type_holder::join_types(THD *thd, Item *item)
{
int delta1= max_length_orig - decimals_orig;
int delta2= item->max_length - item->decimals;
- max_length= max(delta1, delta2) + decimals;
+ max_length= MY_MAX(delta1, delta2) + decimals;
if (fld_type == MYSQL_TYPE_FLOAT && max_length > FLT_DIG + 2)
{
max_length= MAX_FLOAT_STR_LENGTH;
@@ -9373,7 +9373,7 @@ bool Item_type_holder::join_types(THD *thd, Item *item)
break;
}
default:
- max_length= max(max_length, display_length(item));
+ max_length= MY_MAX(max_length, display_length(item));
};
maybe_null|= item->maybe_null;
get_full_info(item);
diff --git a/sql/item_buff.cc b/sql/item_buff.cc
index ce396736d6f..a08ae8d8403 100644
--- a/sql/item_buff.cc
+++ b/sql/item_buff.cc
@@ -71,7 +71,7 @@ Cached_item::~Cached_item() {}
Cached_item_str::Cached_item_str(THD *thd, Item *arg)
:item(arg),
- value_max_length(min(arg->max_length, thd->variables.max_sort_length)),
+ value_max_length(MY_MIN(arg->max_length, thd->variables.max_sort_length)),
value(value_max_length)
{}
@@ -81,7 +81,7 @@ bool Cached_item_str::cmp(void)
bool tmp;
if ((res=item->val_str(&tmp_value)))
- res->length(min(res->length(), value_max_length));
+ res->length(MY_MIN(res->length(), value_max_length));
if (null_value != item->null_value)
{
if ((null_value= item->null_value))
diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc
index 3b09da68927..d49af9bc2a0 100644
--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -679,7 +679,7 @@ int Arg_comparator::set_compare_func(Item_result_field *item, Item_result type)
{
if ((*a)->decimals < NOT_FIXED_DEC && (*b)->decimals < NOT_FIXED_DEC)
{
- precision= 5 / log_10[max((*a)->decimals, (*b)->decimals) + 1];
+ precision= 5 / log_10[MY_MAX((*a)->decimals, (*b)->decimals) + 1];
if (func == &Arg_comparator::compare_real)
func= &Arg_comparator::compare_real_fixed;
else if (func == &Arg_comparator::compare_e_real)
@@ -1019,7 +1019,7 @@ int Arg_comparator::compare_binary_string()
owner->null_value= 0;
uint res1_length= res1->length();
uint res2_length= res2->length();
- int cmp= memcmp(res1->ptr(), res2->ptr(), min(res1_length,res2_length));
+ int cmp= memcmp(res1->ptr(), res2->ptr(), MY_MIN(res1_length,res2_length));
return cmp ? cmp : (int) (res1_length - res2_length);
}
}
@@ -2377,7 +2377,7 @@ Item_func_ifnull::fix_length_and_dec()
uint32 char_length;
agg_result_type(&hybrid_type, args, 2);
maybe_null=args[1]->maybe_null;
- decimals= max(args[0]->decimals, args[1]->decimals);
+ decimals= MY_MAX(args[0]->decimals, args[1]->decimals);
unsigned_flag= args[0]->unsigned_flag && args[1]->unsigned_flag;
if (hybrid_type == DECIMAL_RESULT || hybrid_type == INT_RESULT)
@@ -2388,10 +2388,10 @@ Item_func_ifnull::fix_length_and_dec()
int len1= args[1]->max_char_length() - args[1]->decimals
- (args[1]->unsigned_flag ? 0 : 1);
- char_length= max(len0, len1) + decimals + (unsigned_flag ? 0 : 1);
+ char_length= MY_MAX(len0, len1) + decimals + (unsigned_flag ? 0 : 1);
}
else
- char_length= max(args[0]->max_char_length(), args[1]->max_char_length());
+ char_length= MY_MAX(args[0]->max_char_length(), args[1]->max_char_length());
switch (hybrid_type) {
case STRING_RESULT:
@@ -2418,9 +2418,9 @@ uint Item_func_ifnull::decimal_precision() const
{
int arg0_int_part= args[0]->decimal_int_part();
int arg1_int_part= args[1]->decimal_int_part();
- int max_int_part= max(arg0_int_part, arg1_int_part);
+ int max_int_part= MY_MAX(arg0_int_part, arg1_int_part);
int precision= max_int_part + decimals;
- return min(precision, DECIMAL_MAX_PRECISION);
+ return MY_MIN(precision, DECIMAL_MAX_PRECISION);
}
@@ -2597,7 +2597,7 @@ Item_func_if::fix_length_and_dec()
agg_result_type(&cached_result_type, args + 1, 2);
maybe_null= args[1]->maybe_null || args[2]->maybe_null;
- decimals= max(args[1]->decimals, args[2]->decimals);
+ decimals= MY_MAX(args[1]->decimals, args[2]->decimals);
unsigned_flag=args[1]->unsigned_flag && args[2]->unsigned_flag;
if (cached_result_type == STRING_RESULT)
@@ -2621,10 +2621,10 @@ Item_func_if::fix_length_and_dec()
int len2= args[2]->max_length - args[2]->decimals
- (args[2]->unsigned_flag ? 0 : 1);
- char_length= max(len1, len2) + decimals + (unsigned_flag ? 0 : 1);
+ char_length= MY_MAX(len1, len2) + decimals + (unsigned_flag ? 0 : 1);
}
else
- char_length= max(args[1]->max_char_length(), args[2]->max_char_length());
+ char_length= MY_MAX(args[1]->max_char_length(), args[2]->max_char_length());
fix_char_length(char_length);
}
@@ -2633,8 +2633,8 @@ uint Item_func_if::decimal_precision() const
{
int arg1_prec= args[1]->decimal_int_part();
int arg2_prec= args[2]->decimal_int_part();
- int precision=max(arg1_prec,arg2_prec) + decimals;
- return min(precision, DECIMAL_MAX_PRECISION);
+ int precision=MY_MAX(arg1_prec,arg2_prec) + decimals;
+ return MY_MIN(precision, DECIMAL_MAX_PRECISION);
}
@@ -2935,7 +2935,7 @@ bool Item_func_case::fix_fields(THD *thd, Item **ref)
void Item_func_case::agg_str_lengths(Item* arg)
{
- fix_char_length(max(max_char_length(), arg->max_char_length()));
+ fix_char_length(MY_MAX(max_char_length(), arg->max_char_length()));
set_if_bigger(decimals, arg->decimals);
unsigned_flag= unsigned_flag && arg->unsigned_flag;
}
@@ -3135,7 +3135,7 @@ uint Item_func_case::decimal_precision() const
if (else_expr_num != -1)
set_if_bigger(max_int_part, args[else_expr_num]->decimal_int_part());
- return min(max_int_part + decimals, DECIMAL_MAX_PRECISION);
+ return MY_MIN(max_int_part + decimals, DECIMAL_MAX_PRECISION);
}
@@ -5095,7 +5095,7 @@ void Item_func_like::turboBM_compute_suffixes(int *suff)
else
{
if (i < g)
- g = i; // g = min(i, g)
+ g = i; // g = MY_MIN(i, g)
f = i;
while (g >= 0 && pattern[g] == pattern[g + plm1 - f])
g--;
@@ -5114,7 +5114,7 @@ void Item_func_like::turboBM_compute_suffixes(int *suff)
else
{
if (i < g)
- g = i; // g = min(i, g)
+ g = i; // g = MY_MIN(i, g)
f = i;
while (g >= 0 &&
likeconv(cs, pattern[g]) == likeconv(cs, pattern[g + plm1 - f]))
@@ -5235,14 +5235,14 @@ bool Item_func_like::turboBM_matches(const char* text, int text_len) const
register const int v = plm1 - i;
turboShift = u - v;
bcShift = bmBc[(uint) (uchar) text[i + j]] - plm1 + i;
- shift = max(turboShift, bcShift);
- shift = max(shift, bmGs[i]);
+ shift = MY_MAX(turboShift, bcShift);
+ shift = MY_MAX(shift, bmGs[i]);
if (shift == bmGs[i])
- u = min(pattern_len - shift, v);
+ u = MY_MIN(pattern_len - shift, v);
else
{
if (turboShift < bcShift)
- shift = max(shift, u + 1);
+ shift = MY_MAX(shift, u + 1);
u = 0;
}
j+= shift;
@@ -5266,14 +5266,14 @@ bool Item_func_like::turboBM_matches(const char* text, int text_len) const
register const int v = plm1 - i;
turboShift = u - v;
bcShift = bmBc[(uint) likeconv(cs, text[i + j])] - plm1 + i;
- shift = max(turboShift, bcShift);
- shift = max(shift, bmGs[i]);
+ shift = MY_MAX(turboShift, bcShift);
+ shift = MY_MAX(shift, bmGs[i]);
if (shift == bmGs[i])
- u = min(pattern_len - shift, v);
+ u = MY_MIN(pattern_len - shift, v);
else
{
if (turboShift < bcShift)
- shift = max(shift, u + 1);
+ shift = MY_MAX(shift, u + 1);
u = 0;
}
j+= shift;
diff --git a/sql/item_create.cc b/sql/item_create.cc
index fc31b074055..1475a44f32a 100644
--- a/sql/item_create.cc
+++ b/sql/item_create.cc
@@ -55,7 +55,7 @@ static void wrong_precision_error(uint errcode, Item *a,
char buff[1024];
String buf(buff, sizeof(buff), system_charset_info);
- my_error(errcode, MYF(0), (uint) min(number, UINT_MAX32),
+ my_error(errcode, MYF(0), (uint) MY_MIN(number, UINT_MAX32),
item_name(a, &buf), maximum);
}
diff --git a/sql/item_func.cc b/sql/item_func.cc
index 390ece724cb..1692f2b3d89 100644
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -635,7 +635,7 @@ void Item_func::count_decimal_length()
set_if_bigger(max_int_part, args[i]->decimal_int_part());
set_if_smaller(unsigned_flag, args[i]->unsigned_flag);
}
- int precision= min(max_int_part + decimals, DECIMAL_MAX_PRECISION);
+ int precision= MY_MIN(max_int_part + decimals, DECIMAL_MAX_PRECISION);
fix_char_length(my_decimal_precision_to_length_no_truncation(precision,
decimals,
unsigned_flag));
@@ -1371,10 +1371,10 @@ my_decimal *Item_func_plus::decimal_op(my_decimal *decimal_value)
*/
void Item_func_additive_op::result_precision()
{
- decimals= max(args[0]->decimals, args[1]->decimals);
+ decimals= MY_MAX(args[0]->decimals, args[1]->decimals);
int arg1_int= args[0]->decimal_precision() - args[0]->decimals;
int arg2_int= args[1]->decimal_precision() - args[1]->decimals;
- int precision= max(arg1_int, arg2_int) + 1 + decimals;
+ int precision= MY_MAX(arg1_int, arg2_int) + 1 + decimals;
/* Integer operations keep unsigned_flag if one of arguments is unsigned */
if (result_type() == INT_RESULT)
@@ -1612,9 +1612,9 @@ void Item_func_mul::result_precision()
unsigned_flag= args[0]->unsigned_flag | args[1]->unsigned_flag;
else
unsigned_flag= args[0]->unsigned_flag & args[1]->unsigned_flag;
- decimals= min(args[0]->decimals + args[1]->decimals, DECIMAL_MAX_SCALE);
+ decimals= MY_MIN(args[0]->decimals + args[1]->decimals, DECIMAL_MAX_SCALE);
uint est_prec = args[0]->decimal_precision() + args[1]->decimal_precision();
- uint precision= min(est_prec, DECIMAL_MAX_PRECISION);
+ uint precision= MY_MIN(est_prec, DECIMAL_MAX_PRECISION);
max_length= my_decimal_precision_to_length_no_truncation(precision, decimals,
unsigned_flag);
}
@@ -1666,7 +1666,7 @@ my_decimal *Item_func_div::decimal_op(my_decimal *decimal_value)
void Item_func_div::result_precision()
{
- uint precision=min(args[0]->decimal_precision() +
+ uint precision=MY_MIN(args[0]->decimal_precision() +
args[1]->decimals + prec_increment,
DECIMAL_MAX_PRECISION);
@@ -1675,7 +1675,7 @@ void Item_func_div::result_precision()
unsigned_flag= args[0]->unsigned_flag | args[1]->unsigned_flag;
else
unsigned_flag= args[0]->unsigned_flag & args[1]->unsigned_flag;
- decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
+ decimals= MY_MIN(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
max_length= my_decimal_precision_to_length_no_truncation(precision, decimals,
unsigned_flag);
}
@@ -1689,7 +1689,7 @@ void Item_func_div::fix_length_and_dec()
switch (hybrid_type) {
case REAL_RESULT:
{
- decimals=max(args[0]->decimals,args[1]->decimals)+prec_increment;
+ decimals=MY_MAX(args[0]->decimals,args[1]->decimals)+prec_increment;
set_if_smaller(decimals, NOT_FIXED_DEC);
uint tmp=float_length(decimals);
if (decimals == NOT_FIXED_DEC)
@@ -1878,8 +1878,8 @@ my_decimal *Item_func_mod::decimal_op(my_decimal *decimal_value)
void Item_func_mod::result_precision()
{
- decimals= max(args[0]->decimals, args[1]->decimals);
- max_length= max(args[0]->max_length, args[1]->max_length);
+ decimals= MY_MAX(args[0]->decimals, args[1]->decimals);
+ max_length= MY_MAX(args[0]->max_length, args[1]->max_length);
}
@@ -2424,7 +2424,7 @@ void Item_func_round::fix_length_and_dec()
if (args[0]->decimals == NOT_FIXED_DEC)
{
- decimals= min(decimals_to_set, NOT_FIXED_DEC);
+ decimals= MY_MIN(decimals_to_set, NOT_FIXED_DEC);
max_length= float_length(decimals);
hybrid_type= REAL_RESULT;
return;
@@ -2434,7 +2434,7 @@ void Item_func_round::fix_length_and_dec()
case REAL_RESULT:
case STRING_RESULT:
hybrid_type= REAL_RESULT;
- decimals= min(decimals_to_set, NOT_FIXED_DEC);
+ decimals= MY_MIN(decimals_to_set, NOT_FIXED_DEC);
max_length= float_length(decimals);
break;
case INT_RESULT:
@@ -2451,13 +2451,13 @@ void Item_func_round::fix_length_and_dec()
case DECIMAL_RESULT:
{
hybrid_type= DECIMAL_RESULT;
- decimals_to_set= min(DECIMAL_MAX_SCALE, decimals_to_set);
+ decimals_to_set= MY_MIN(DECIMAL_MAX_SCALE, decimals_to_set);
int decimals_delta= args[0]->decimals - decimals_to_set;
int precision= args[0]->decimal_precision();
int length_increase= ((decimals_delta <= 0) || truncate) ? 0:1;
precision-= decimals_delta - length_increase;
- decimals= min(decimals_to_set, DECIMAL_MAX_SCALE);
+ decimals= MY_MIN(decimals_to_set, DECIMAL_MAX_SCALE);
max_length= my_decimal_precision_to_length_no_truncation(precision,
decimals,
unsigned_flag);
@@ -2568,7 +2568,7 @@ my_decimal *Item_func_round::decimal_op(my_decimal *decimal_value)
my_decimal val, *value= args[0]->val_decimal(&val);
longlong dec= args[1]->val_int();
if (dec >= 0 || args[1]->unsigned_flag)
- dec= min((ulonglong) dec, decimals);
+ dec= MY_MIN((ulonglong) dec, decimals);
else if (dec < INT_MIN)
dec= INT_MIN;
@@ -3428,7 +3428,7 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
free_udf(u_d);
DBUG_RETURN(TRUE);
}
- func->max_length=min(initid.max_length,MAX_BLOB_WIDTH);
+ func->max_length=MY_MIN(initid.max_length,MAX_BLOB_WIDTH);
func->maybe_null=initid.maybe_null;
const_item_cache=initid.const_item;
/*
@@ -3437,7 +3437,7 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
*/
if (!const_item_cache && !used_tables_cache)
used_tables_cache= RAND_TABLE_BIT;
- func->decimals=min(initid.decimals,NOT_FIXED_DEC);
+ func->decimals=MY_MIN(initid.decimals,NOT_FIXED_DEC);
}
initialized=1;
if (error)
diff --git a/sql/item_func.h b/sql/item_func.h
index f562c87fe1c..d7c065e56f3 100644
--- a/sql/item_func.h
+++ b/sql/item_func.h
@@ -563,7 +563,7 @@ public:
const char *func_name() const { return "cast_as_unsigned"; }
void fix_length_and_dec()
{
- fix_char_length(min(args[0]->max_char_length(),
+ fix_char_length(MY_MIN(args[0]->max_char_length(),
DECIMAL_MAX_PRECISION + 2));
unsigned_flag=1;
}
diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc
index 5071e494f04..0aafe2c3a74 100644
--- a/sql/item_strfunc.cc
+++ b/sql/item_strfunc.cc
@@ -585,7 +585,7 @@ String *Item_func_concat::val_str(String *str)
}
else
{
- uint new_len = max(tmp_value.alloced_length() * 2, concat_len);
+ uint new_len = MY_MAX(tmp_value.alloced_length() * 2, concat_len);
if (tmp_value.realloc(new_len))
goto null;
@@ -934,7 +934,7 @@ String *Item_func_concat_ws::val_str(String *str)
}
else
{
- uint new_len = max(tmp_value.alloced_length() * 2, concat_len);
+ uint new_len = MY_MAX(tmp_value.alloced_length() * 2, concat_len);
if (tmp_value.realloc(new_len))
goto null;
@@ -1426,7 +1426,7 @@ String *Item_func_substr::val_str(String *str)
length= res->charpos((int) length, (uint32) start);
tmp_length= res->length() - start;
- length= min(length, tmp_length);
+ length= MY_MIN(length, tmp_length);
if (!start && (longlong) res->length() == length)
return res;
@@ -1449,7 +1449,7 @@ void Item_func_substr::fix_length_and_dec()
else if (start < 0)
max_length= ((uint)(-start) > max_length) ? 0 : (uint)(-start);
else
- max_length-= min((uint)(start - 1), max_length);
+ max_length-= MY_MIN((uint)(start - 1), max_length);
}
if (arg_count == 3 && args[2]->const_item())
{
@@ -2143,7 +2143,7 @@ String *Item_func_soundex::val_str(String *str)
if ((null_value= args[0]->null_value))
return 0; /* purecov: inspected */
- if (tmp_value.alloc(max(res->length(), 4 * cs->mbminlen)))
+ if (tmp_value.alloc(MY_MAX(res->length(), 4 * cs->mbminlen)))
return str; /* purecov: inspected */
char *to= (char *) tmp_value.ptr();
char *to_end= to + tmp_value.alloced_length();
@@ -3363,7 +3363,7 @@ String* Item_func_export_set::val_str(String* str)
const ulong max_allowed_packet= current_thd->variables.max_allowed_packet;
const uint num_separators= num_set_values > 0 ? num_set_values - 1 : 0;
const ulonglong max_total_length=
- num_set_values * max(yes->length(), no->length()) +
+ num_set_values * MY_MAX(yes->length(), no->length()) +
num_separators * sep->length();
if (unlikely(max_total_length > max_allowed_packet))
@@ -3392,11 +3392,11 @@ String* Item_func_export_set::val_str(String* str)
void Item_func_export_set::fix_length_and_dec()
{
- uint32 length= max(args[1]->max_char_length(), args[2]->max_char_length());
+ uint32 length= MY_MAX(args[1]->max_char_length(), args[2]->max_char_length());
uint32 sep_length= (arg_count > 3 ? args[3]->max_char_length() : 1);
if (agg_arg_charsets_for_string_result(collation,
- args + 1, min(4, arg_count) - 1))
+ args + 1, MY_MIN(4, arg_count) - 1))
return;
fix_char_length(length * 64 + sep_length * 63);
}
@@ -4464,7 +4464,7 @@ longlong Item_dyncol_get::val_int()
if (end != org_end || error > 0)
{
char buff[80];
- strmake(buff, val.x.string.value.str, min(sizeof(buff)-1,
+ strmake(buff, val.x.string.value.str, MY_MIN(sizeof(buff)-1,
val.x.string.value.length));
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_BAD_DATA,
@@ -4528,7 +4528,7 @@ double Item_dyncol_get::val_real()
error)
{
char buff[80];
- strmake(buff, val.x.string.value.str, min(sizeof(buff)-1,
+ strmake(buff, val.x.string.value.str, MY_MIN(sizeof(buff)-1,
val.x.string.value.length));
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_BAD_DATA,
@@ -4584,7 +4584,7 @@ my_decimal *Item_dyncol_get::val_decimal(my_decimal *decimal_value)
rc= str2my_decimal(0, val.x.string.value.str, val.x.string.value.length,
val.x.string.charset, decimal_value);
char buff[80];
- strmake(buff, val.x.string.value.str, min(sizeof(buff)-1,
+ strmake(buff, val.x.string.value.str, MY_MIN(sizeof(buff)-1,
val.x.string.value.length));
if (rc != E_DEC_OK)
{
diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h
index 486b7cf36ef..c6b8397100b 100644
--- a/sql/item_strfunc.h
+++ b/sql/item_strfunc.h
@@ -812,7 +812,7 @@ public:
collation.set(args[0]->collation);
ulonglong max_result_length= (ulonglong) args[0]->max_length * 2 +
2 * collation.collation->mbmaxlen;
- max_length= (uint32) min(max_result_length, MAX_BLOB_WIDTH);
+ max_length= (uint32) MY_MIN(max_result_length, MAX_BLOB_WIDTH);
}
};
diff --git a/sql/item_sum.cc b/sql/item_sum.cc
index 8816e1352a9..165f9f4a5f8 100644
--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -36,7 +36,7 @@
ulonglong Item_sum::ram_limitation(THD *thd)
{
- return min(thd->variables.tmp_table_size,
+ return MY_MIN(thd->variables.tmp_table_size,
thd->variables.max_heap_table_size);
}
@@ -1581,16 +1581,16 @@ void Item_sum_avg::fix_length_and_dec()
if (hybrid_type == DECIMAL_RESULT)
{
int precision= args[0]->decimal_precision() + prec_increment;
- decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
+ decimals= MY_MIN(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
max_length= my_decimal_precision_to_length_no_truncation(precision,
decimals,
unsigned_flag);
- f_precision= min(precision+DECIMAL_LONGLONG_DIGITS, DECIMAL_MAX_PRECISION);
+ f_precision= MY_MIN(precision+DECIMAL_LONGLONG_DIGITS, DECIMAL_MAX_PRECISION);
f_scale= args[0]->decimals;
dec_bin_size= my_decimal_get_binary_size(f_precision, f_scale);
}
else {
- decimals= min(args[0]->decimals + prec_increment, NOT_FIXED_DEC);
+ decimals= MY_MIN(args[0]->decimals + prec_increment, NOT_FIXED_DEC);
max_length= args[0]->max_length + prec_increment;
}
}
@@ -1787,13 +1787,13 @@ void Item_sum_variance::fix_length_and_dec()
switch (args[0]->result_type()) {
case REAL_RESULT:
case STRING_RESULT:
- decimals= min(args[0]->decimals + 4, NOT_FIXED_DEC);
+ decimals= MY_MIN(args[0]->decimals + 4, NOT_FIXED_DEC);
break;
case INT_RESULT:
case DECIMAL_RESULT:
{
int precision= args[0]->decimal_precision()*2 + prec_increment;
- decimals= min(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
+ decimals= MY_MIN(args[0]->decimals + prec_increment, DECIMAL_MAX_SCALE);
max_length= my_decimal_precision_to_length_no_truncation(precision,
decimals,
unsigned_flag);
@@ -3488,7 +3488,7 @@ bool Item_func_group_concat::setup(THD *thd)
syntax of this function). If there is no ORDER BY clause, we don't
create this tree.
*/
- init_tree(tree, (uint) min(thd->variables.max_heap_table_size,
+ init_tree(tree, (uint) MY_MIN(thd->variables.max_heap_table_size,
thd->variables.sortbuff_size/16), 0,
tree_key_length,
group_concat_key_cmp_with_order, NULL, (void*) this,
diff --git a/sql/item_sum.h b/sql/item_sum.h
index 40a28d8beae..1c692014652 100644
--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -1073,7 +1073,7 @@ public:
enum Sumfunctype sum_func () const {return MIN_FUNC;}
bool add();
- const char *func_name() const { return "min("; }
+ const char *func_name() const { return "MY_MIN("; }
Item *copy_or_same(THD* thd);
};
@@ -1086,7 +1086,7 @@ public:
enum Sumfunctype sum_func () const {return MAX_FUNC;}
bool add();
- const char *func_name() const { return "max("; }
+ const char *func_name() const { return "MY_MAX("; }
Item *copy_or_same(THD* thd);
};
diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc
index 02a7b8511af..c3e8204fd37 100644
--- a/sql/item_timefunc.cc
+++ b/sql/item_timefunc.cc
@@ -146,14 +146,14 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
switch (*++ptr) {
/* Year */
case 'Y':
- tmp= (char*) val + min(4, val_len);
+ tmp= (char*) val + MY_MIN(4, val_len);
l_time->year= (int) my_strtoll10(val, &tmp, &error);
if ((int) (tmp-val) <= 2)
l_time->year= year_2000_handling(l_time->year);
val= tmp;
break;
case 'y':
- tmp= (char*) val + min(2, val_len);
+ tmp= (char*) val + MY_MIN(2, val_len);
l_time->year= (int) my_strtoll10(val, &tmp, &error);
val= tmp;
l_time->year= year_2000_handling(l_time->year);
@@ -162,7 +162,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
/* Month */
case 'm':
case 'c':
- tmp= (char*) val + min(2, val_len);
+ tmp= (char*) val + MY_MIN(2, val_len);
l_time->month= (int) my_strtoll10(val, &tmp, &error);
val= tmp;
break;
@@ -179,15 +179,15 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
/* Day */
case 'd':
case 'e':
- tmp= (char*) val + min(2, val_len);
+ tmp= (char*) val + MY_MIN(2, val_len);
l_time->day= (int) my_strtoll10(val, &tmp, &error);
val= tmp;
break;
case 'D':
- tmp= (char*) val + min(2, val_len);
+ tmp= (char*) val + MY_MIN(2, val_len);
l_time->day= (int) my_strtoll10(val, &tmp, &error);
/* Skip 'st, 'nd, 'th .. */
- val= tmp + min((int) (val_end-tmp), 2);
+ val= tmp + MY_MIN((int) (val_end-tmp), 2);
break;
/* Hour */
@@ -198,14 +198,14 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
/* fall through */
case 'k':
case 'H':
- tmp= (char*) val + min(2, val_len);
+ tmp= (char*) val + MY_MIN(2, val_len);
l_time->hour= (int) my_strtoll10(val, &tmp, &error);
val= tmp;
break;
/* Minute */
case 'i':
- tmp= (char*) val + min(2, val_len);
+ tmp= (char*) val + MY_MIN(2, val_len);
l_time->minute= (int) my_strtoll10(val, &tmp, &error);
val= tmp;
break;
@@ -213,7 +213,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
/* Second */
case 's':
case 'S':
- tmp= (char*) val + min(2, val_len);
+ tmp= (char*) val + MY_MIN(2, val_len);
l_time->second= (int) my_strtoll10(val, &tmp, &error);
val= tmp;
break;
@@ -265,7 +265,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
val= tmp;
break;
case 'j':
- tmp= (char*) val + min(val_len, 3);
+ tmp= (char*) val + MY_MIN(val_len, 3);
yearday= (int) my_strtoll10(val, &tmp, &error);
val= tmp;
break;
@@ -277,7 +277,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
case 'u':
sunday_first_n_first_week_non_iso= (*ptr=='U' || *ptr== 'V');
strict_week_number= (*ptr=='V' || *ptr=='v');
- tmp= (char*) val + min(val_len, 2);
+ tmp= (char*) val + MY_MIN(val_len, 2);
if ((week_number= (int) my_strtoll10(val, &tmp, &error)) < 0 ||
(strict_week_number && !week_number) ||
week_number > 53)
@@ -289,7 +289,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
case 'X':
case 'x':
strict_week_number_year_type= (*ptr=='X');
- tmp= (char*) val + min(4, val_len);
+ tmp= (char*) val + MY_MIN(4, val_len);
strict_week_number_year= (int) my_strtoll10(val, &tmp, &error);
val= tmp;
break;
@@ -437,7 +437,7 @@ static bool extract_date_time(DATE_TIME_FORMAT *format,
err:
{
char buff[128];
- strmake(buff, val_begin, min(length, sizeof(buff)-1));
+ strmake(buff, val_begin, MY_MIN(length, sizeof(buff)-1));
push_warning_printf(current_thd, MYSQL_ERROR::WARN_LEVEL_WARN,
ER_WRONG_VALUE_FOR_TYPE, ER(ER_WRONG_VALUE_FOR_TYPE),
date_time_type, buff, "str_to_date");
@@ -1751,7 +1751,7 @@ void Item_func_date_format::fix_length_and_dec()
else
{
fixed_length=0;
- max_length=min(arg1->max_length, MAX_BLOB_WIDTH) * 10 *
+ max_length=MY_MIN(arg1->max_length, MAX_BLOB_WIDTH) * 10 *
collation.collation->mbmaxlen;
set_if_smaller(max_length,MAX_BLOB_WIDTH);
}
@@ -2525,7 +2525,7 @@ err:
void Item_func_add_time::fix_length_and_dec()
{
enum_field_types arg0_field_type;
- decimals= max(args[0]->decimals, args[1]->decimals);
+ decimals= MY_MAX(args[0]->decimals, args[1]->decimals);
/*
The field type for the result of an Item_func_add_time function is defined
diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h
index 3e3cd698efc..f25f4544e47 100644
--- a/sql/item_timefunc.h
+++ b/sql/item_timefunc.h
@@ -115,7 +115,7 @@ public:
{
int *input_version= (int*)int_arg;
/* This function was introduced in 5.5 */
- int output_version= max(*input_version, 50500);
+ int output_version= MY_MAX(*input_version, 50500);
*input_version= output_version;
return 0;
}
@@ -933,7 +933,7 @@ public:
const char *func_name() const { return "timediff"; }
void fix_length_and_dec()
{
- decimals= max(args[0]->decimals, args[1]->decimals);
+ decimals= MY_MAX(args[0]->decimals, args[1]->decimals);
Item_timefunc::fix_length_and_dec();
}
bool get_date(MYSQL_TIME *ltime, ulonglong fuzzy_date);
diff --git a/sql/key.cc b/sql/key.cc
index dd7818119c8..ebf9259d469 100644
--- a/sql/key.cc
+++ b/sql/key.cc
@@ -132,7 +132,7 @@ void key_copy(uchar *to_key, uchar *from_record, KEY *key_info,
Don't copy data for null values
The -1 below is to subtract the null byte which is already handled
*/
- length= min(key_length, (uint) key_part->store_length-1);
+ length= MY_MIN(key_length, (uint) key_part->store_length-1);
if (with_zerofill)
bzero((char*) to_key, length);
continue;
@@ -142,7 +142,7 @@ void key_copy(uchar *to_key, uchar *from_record, KEY *key_info,
key_part->key_part_flag & HA_VAR_LENGTH_PART)
{
key_length-= HA_KEY_BLOB_LENGTH;
- length= min(key_length, key_part->length);
+ length= MY_MIN(key_length, key_part->length);
uint bytes= key_part->field->get_key_image(to_key, length, Field::itRAW);
if (with_zerofill && bytes < length)
bzero((char*) to_key + bytes, length - bytes);
@@ -150,7 +150,7 @@ void key_copy(uchar *to_key, uchar *from_record, KEY *key_info,
}
else
{
- length= min(key_length, key_part->length);
+ length= MY_MIN(key_length, key_part->length);
Field *field= key_part->field;
CHARSET_INFO *cs= field->charset();
uint bytes= field->get_key_image(to_key, length, Field::itRAW);
@@ -202,7 +202,7 @@ void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
Don't copy data for null bytes
The -1 below is to subtract the null byte which is already handled
*/
- length= min(key_length, (uint) key_part->store_length-1);
+ length= MY_MIN(key_length, (uint) key_part->store_length-1);
continue;
}
}
@@ -244,7 +244,7 @@ void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
my_ptrdiff_t ptrdiff= to_record - field->table->record[0];
field->move_field_offset(ptrdiff);
key_length-= HA_KEY_BLOB_LENGTH;
- length= min(key_length, key_part->length);
+ length= MY_MIN(key_length, key_part->length);
old_map= dbug_tmp_use_all_columns(field->table, field->table->write_set);
field->set_key_image(from_key, length);
dbug_tmp_restore_column_map(field->table->write_set, old_map);
@@ -253,7 +253,7 @@ void key_restore(uchar *to_record, uchar *from_key, KEY *key_info,
}
else
{
- length= min(key_length, key_part->length);
+ length= MY_MIN(key_length, key_part->length);
/* skip the byte with 'uneven' bits, if used */
memcpy(to_record + key_part->offset, from_key + used_uneven_bits
, (size_t) length - used_uneven_bits);
@@ -311,7 +311,7 @@ bool key_cmp_if_same(TABLE *table,const uchar *key,uint idx,uint key_length)
return 1;
continue;
}
- length= min((uint) (key_end-key), store_length);
+ length= MY_MIN((uint) (key_end-key), store_length);
if (!(key_part->key_type & (FIELDFLAG_NUMBER+FIELDFLAG_BINARY+
FIELDFLAG_PACK)))
{
@@ -403,7 +403,7 @@ void key_unpack(String *to,TABLE *table,uint idx)
tmp.length(charpos);
}
if (key_part->length < field->pack_length())
- tmp.length(min(tmp.length(),key_part->length));
+ tmp.length(MY_MIN(tmp.length(),key_part->length));
ErrConvString err(&tmp);
to->append(err.ptr());
}
@@ -558,8 +558,8 @@ int key_rec_cmp(void *key_p, uchar *first_rec, uchar *second_rec)
if (key_part->null_bit)
{
/* The key_part can contain NULL values */
- bool first_is_null= field->is_null_in_record_with_offset(first_diff);
- bool sec_is_null= field->is_null_in_record_with_offset(sec_diff);
+ bool first_is_null= field->is_real_null(first_diff);
+ bool sec_is_null= field->is_real_null(sec_diff);
/*
NULL is smaller then everything so if first is NULL and the other
not then we know that we should return -1 and for the opposite
diff --git a/sql/log.cc b/sql/log.cc
index 254449da05a..2572dc61894 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -781,8 +781,8 @@ bool Log_to_csv_event_handler::
Open_tables_backup open_tables_backup;
CHARSET_INFO *client_cs= thd->variables.character_set_client;
bool save_time_zone_used;
- long query_time= (long) min(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
- long lock_time= (long) min(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
+ long query_time= (long) MY_MIN(query_utime/1000000, TIME_MAX_VALUE_SECONDS);
+ long lock_time= (long) MY_MIN(lock_utime/1000000, TIME_MAX_VALUE_SECONDS);
long query_time_micro= (long) (query_utime % 1000000);
long lock_time_micro= (long) (lock_utime % 1000000);
@@ -2925,7 +2925,7 @@ const char *MYSQL_LOG::generate_name(const char *log_name,
{
char *p= fn_ext(log_name);
uint length= (uint) (p - log_name);
- strmake(buff, log_name, min(length, FN_REFLEN-1));
+ strmake(buff, log_name, MY_MIN(length, FN_REFLEN-1));
return (const char*)buff;
}
return log_name;
@@ -6992,7 +6992,7 @@ static void print_buffer_to_nt_eventlog(enum loglevel level, char *buff,
DBUG_ENTER("print_buffer_to_nt_eventlog");
/* Add ending CR/LF's to string, overwrite last chars if necessary */
- strmov(buffptr+min(length, buffLen-5), "\r\n\r\n");
+ strmov(buffptr+MY_MIN(length, buffLen-5), "\r\n\r\n");
setup_windows_event_source();
if ((event= RegisterEventSource(NULL,"MySQL")))
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 7de72338d97..2dafd754293 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -1347,7 +1347,7 @@ Log_event* Log_event::read_log_event(IO_CACHE* file,
of 13 bytes, whereas LOG_EVENT_MINIMAL_HEADER_LEN is 19 bytes (it's
"minimal" over the set {MySQL >=4.0}).
*/
- uint header_size= min(description_event->common_header_len,
+ uint header_size= MY_MIN(description_event->common_header_len,
LOG_EVENT_MINIMAL_HEADER_LEN);
LOCK_MUTEX;
@@ -3090,7 +3090,7 @@ Query_log_event::Query_log_event(const char* buf, uint event_len,
be even bigger, but this will suffice to catch most corruption
errors that can lead to a crash.
*/
- if (status_vars_len > min(data_len, MAX_SIZE_LOG_EVENT_STATUS))
+ if (status_vars_len > MY_MIN(data_len, MAX_SIZE_LOG_EVENT_STATUS))
{
DBUG_PRINT("info", ("status_vars_len (%u) > data_len (%lu); query= 0",
status_vars_len, data_len));
@@ -6602,7 +6602,7 @@ bool User_var_log_event::write(IO_CACHE* file)
char buf[UV_NAME_LEN_SIZE];
char buf1[UV_VAL_IS_NULL + UV_VAL_TYPE_SIZE +
UV_CHARSET_NUMBER_SIZE + UV_VAL_LEN_SIZE];
- uchar buf2[max(8, DECIMAL_MAX_FIELD_SIZE + 2)], *pos= buf2;
+ uchar buf2[MY_MAX(8, DECIMAL_MAX_FIELD_SIZE + 2)], *pos= buf2;
uint unsigned_len= 0;
uint buf1_length;
ulong event_length;
@@ -8371,7 +8371,7 @@ int Rows_log_event::do_add_row_data(uchar *row_data, size_t length)
trigger false warnings.
*/
#ifndef HAVE_valgrind
- DBUG_DUMP("row_data", row_data, min(length, 32));
+ DBUG_DUMP("row_data", row_data, MY_MIN(length, 32));
#endif
DBUG_ASSERT(m_rows_buf <= m_rows_cur);
@@ -9453,7 +9453,7 @@ int Table_map_log_event::rewrite_db(const char* new_db, size_t new_len,
DBUG_ENTER("Table_map_log_event::rewrite_db");
DBUG_ASSERT(temp_buf);
- uint header_len= min(desc->common_header_len,
+ uint header_len= MY_MIN(desc->common_header_len,
LOG_EVENT_MINIMAL_HEADER_LEN) + TABLE_MAP_HEADER_LEN;
int len_diff;
diff --git a/sql/log_event_old.cc b/sql/log_event_old.cc
index e9afe474418..566a367430c 100644
--- a/sql/log_event_old.cc
+++ b/sql/log_event_old.cc
@@ -1406,7 +1406,7 @@ int Old_rows_log_event::do_add_row_data(uchar *row_data, size_t length)
trigger false warnings.
*/
#ifndef HAVE_valgrind
- DBUG_DUMP("row_data", row_data, min(length, 32));
+ DBUG_DUMP("row_data", row_data, MY_MIN(length, 32));
#endif
DBUG_ASSERT(m_rows_buf <= m_rows_cur);
diff --git a/sql/mdl.h b/sql/mdl.h
index 477f4df7807..c778dbbc1d7 100644
--- a/sql/mdl.h
+++ b/sql/mdl.h
@@ -288,7 +288,7 @@ public:
character set is utf-8, we can safely assume that no
character starts with a zero byte.
*/
- return memcmp(m_ptr, rhs->m_ptr, min(m_length, rhs->m_length));
+ return memcmp(m_ptr, rhs->m_ptr, MY_MIN(m_length, rhs->m_length));
}
MDL_key(const MDL_key *rhs)
diff --git a/sql/multi_range_read.cc b/sql/multi_range_read.cc
index e6cbed7eb13..04557a636d5 100644
--- a/sql/multi_range_read.cc
+++ b/sql/multi_range_read.cc
@@ -1647,7 +1647,7 @@ int DsMrr_impl::dsmrr_explain_info(uint mrr_mode, char *str, size_t size)
used_str= rowid_ordered;
uint used_str_len= strlen(used_str);
- uint copy_len= min(used_str_len, size);
+ uint copy_len= MY_MIN(used_str_len, size);
memcpy(str, used_str, size);
return copy_len;
}
@@ -1708,7 +1708,7 @@ bool DsMrr_impl::get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
else
{
cost->reset();
- *buffer_size= max(*buffer_size,
+ *buffer_size= MY_MAX(*buffer_size,
(size_t)(1.2*rows_in_last_step) * elem_size +
primary_file->ref_length + table->key_info[keynr].key_length);
}
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 2607297f2c9..8e8414a7acc 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -3915,7 +3915,7 @@ static int init_common_variables()
can't get max_connections*5 but still got no less than was
requested (value of wanted_files).
*/
- max_open_files= max(max(wanted_files,
+ max_open_files= MY_MAX(MY_MAX(wanted_files,
(max_connections + extra_max_connections)*5),
open_files_limit);
files= my_set_max_open_files(max_open_files);
@@ -3928,15 +3928,15 @@ static int init_common_variables()
If we have requested too much file handles than we bring
max_connections in supported bounds.
*/
- max_connections= (ulong) min(files-10-TABLE_OPEN_CACHE_MIN*2,
+ max_connections= (ulong) MY_MIN(files-10-TABLE_OPEN_CACHE_MIN*2,
max_connections);
/*
Decrease table_cache_size according to max_connections, but
- not below TABLE_OPEN_CACHE_MIN. Outer min() ensures that we
+ not below TABLE_OPEN_CACHE_MIN. Outer MY_MIN() ensures that we
never increase table_cache_size automatically (that could
happen if max_connections is decreased above).
*/
- table_cache_size= (ulong) min(max((files-10-max_connections)/2,
+ table_cache_size= (ulong) MY_MIN(MY_MAX((files-10-max_connections)/2,
TABLE_OPEN_CACHE_MIN),
table_cache_size);
DBUG_PRINT("warning",
diff --git a/sql/mysqld.h b/sql/mysqld.h
index 716423f9bd2..67c9f4e68ba 100644
--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -24,6 +24,7 @@
#include "mysql/psi/mysql_file.h" /* MYSQL_FILE */
#include "sql_list.h" /* I_List */
#include "sql_cmd.h"
+#include <my_rnd.h>
class THD;
struct handlerton;
diff --git a/sql/net_serv.cc b/sql/net_serv.cc
index b6890ab9fda..a9e3af13403 100644
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@@ -792,7 +792,7 @@ static my_bool my_net_skip_rest(NET *net, uint32 remain, thr_alarm_t *alarmed,
{
while (remain > 0)
{
- size_t length= min(remain, net->max_packet);
+ size_t length= MY_MIN(remain, net->max_packet);
if (net_safe_read(net, net->buff, length, alarmed))
DBUG_RETURN(1);
update_statistics(thd_increment_bytes_received(length));
@@ -989,7 +989,7 @@ my_real_read(NET *net, size_t *complen)
len=uint3korr(net->buff+net->where_b);
if (!len) /* End of big multi-packet */
goto end;
- helping = max(len,*complen) + net->where_b;
+ helping = MY_MAX(len,*complen) + net->where_b;
/* The necessary size of net->buff */
if (helping >= net->max_packet)
{
diff --git a/sql/opt_range.cc b/sql/opt_range.cc
index 2205d2fcab4..e3bca89d6df 100644
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3079,7 +3079,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
group_trp= get_best_group_min_max(&param, tree, best_read_time);
if (group_trp)
{
- param.table->quick_condition_rows= min(group_trp->records,
+ param.table->quick_condition_rows= MY_MIN(group_trp->records,
head->stat_records());
if (group_trp->read_cost < best_read_time)
{
@@ -4757,7 +4757,7 @@ TABLE_READ_PLAN *get_best_disjunct_quick(PARAM *param, SEL_IMERGE *imerge,
{
imerge_trp->read_cost= imerge_cost;
imerge_trp->records= non_cpk_scan_records + cpk_scan_records;
- imerge_trp->records= min(imerge_trp->records,
+ imerge_trp->records= MY_MIN(imerge_trp->records,
param->table->stat_records());
imerge_trp->range_scans= range_scans;
imerge_trp->range_scans_end= range_scans + n_child_scans;
@@ -5345,7 +5345,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
this number by #r.
If we do not make any assumptions then we can only state that
- #r<=min(#r1,#r2).
+ #r<=MY_MIN(#r1,#r2).
With this estimate we can't say that the index intersection scan will be
cheaper than the cheapest index scan.
@@ -5378,7 +5378,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
#rt2_0 of the same range for sub-index idx2_0(dept) of the index idx2.
The current code does not make an estimate either for #rt1_0, or for #rt2_0,
but it can be adjusted to provide those numbers.
- Alternatively, min(rec_per_key) for (dept) could be used to get an upper
+ Alternatively, MY_MIN(rec_per_key) for (dept) could be used to get an upper
bound for the value of sel(Rt1&Rt2). Yet this statistics is not provided
now.
@@ -5389,7 +5389,7 @@ bool prepare_search_best_index_intersect(PARAM *param,
sel(Rt1&Rt2)=sel(dept=5)*sel(last_name='Sm5')*sel(first_name='Robert')
=sel(Rt2)*sel(dept=5)
- Here max(rec_per_key) for (dept) could be used to get an upper bound for
+ Here MY_MAX(rec_per_key) for (dept) could be used to get an upper bound for
the value of sel(Rt1&Rt2).
When the intersected indexes have different major columns, but some
@@ -5442,9 +5442,9 @@ bool prepare_search_best_index_intersect(PARAM *param,
f_1 = rec_per_key[first_name]/rec_per_key[last_name].
The the number of records in the range tree:
Rt_0: (first_name='Robert' OR first_name='Bob')
- for the sub-index (first_name) is not greater than max(#r*f_1, #t).
+ for the sub-index (first_name) is not greater than MY_MAX(#r*f_1, #t).
Strictly speaking, we can state only that it's not greater than
- max(#r*max_f_1, #t), where
+ MY_MAX(#r*max_f_1, #t), where
max_f_1= max_rec_per_key[first_name]/min_rec_per_key[last_name].
Yet, if #r/#t is big enough (and this is the case of an index intersection,
because using this index range with a single index scan is cheaper than
@@ -8641,7 +8641,7 @@ and_all_keys(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2,
if (!key1)
return &null_element; // Impossible ranges
key1->use_count++;
- key1->max_part_no= max(key2->max_part_no, key2->part+1);
+ key1->max_part_no= MY_MAX(key2->max_part_no, key2->part+1);
return key1;
}
@@ -8734,7 +8734,7 @@ key_and(RANGE_OPT_PARAM *param, SEL_ARG *key1, SEL_ARG *key2, uint clone_flag)
key1->use_count--;
key2->use_count--;
SEL_ARG *e1=key1->first(), *e2=key2->first(), *new_tree=0;
- uint max_part_no= max(key1->max_part_no, key2->max_part_no);
+ uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no);
while (e1 && e2)
{
@@ -8932,7 +8932,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
b: [----
*/
- uint max_part_no= max(key1->max_part_no, key2->max_part_no);
+ uint max_part_no= MY_MAX(key1->max_part_no, key2->max_part_no);
for (key2=key2->first(); key2; )
{
@@ -9142,11 +9142,11 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
are merged into one range by deleting first...last-1 from
the key1 tree. In the figure, this applies to first and the
two consecutive ranges. The range of last is then extended:
- * last.min: Set to min(key2.min, first.min)
+ * last.min: Set to MY_MIN(key2.min, first.min)
* last.max: If there is a last->next that overlaps key2 (i.e.,
last->next has a different next_key_part):
Set adjacent to last->next.min
- Otherwise: Set to max(key2.max, last.max)
+ Otherwise: Set to MY_MAX(key2.max, last.max)
Result:
key2: [****----------------------*******]
@@ -9200,7 +9200,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *key1,SEL_ARG *key2)
^ ^
last different next_key_part
- Extend range of last up to max(last.max, key2.max):
+ Extend range of last up to MY_MAX(last.max, key2.max):
key2: [--------*****]
key1: [***----------**] [xxxx]
*/
@@ -10041,7 +10041,7 @@ ha_rows check_quick_select(PARAM *param, uint idx, bool index_only,
param->table->quick_key_parts[keynr]= param->max_key_part+1;
param->table->quick_n_ranges[keynr]= param->range_count;
param->table->quick_condition_rows=
- min(param->table->quick_condition_rows, rows);
+ MY_MIN(param->table->quick_condition_rows, rows);
param->table->quick_rows[keynr]= rows;
}
}
@@ -11814,7 +11814,7 @@ cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
TODO
- What happens if the query groups by the MIN/MAX field, and there is no
- other field as in: "select min(a) from t1 group by a" ?
+ other field as in: "select MY_MIN(a) from t1 group by a" ?
- We assume that the general correctness of the GROUP-BY query was checked
before this point. Is this correct, or do we have to check it completely?
- Lift the limitation in condition (B3), that is, make this access method
@@ -12075,7 +12075,7 @@ get_best_group_min_max(PARAM *param, SEL_TREE *tree, double read_time)
cur_group_prefix_len+= cur_part->store_length;
used_key_parts_map.set_bit(key_part_nr);
++cur_group_key_parts;
- max_key_part= max(max_key_part,key_part_nr);
+ max_key_part= MY_MAX(max_key_part,key_part_nr);
}
/*
Check that used key parts forms a prefix of the index.
@@ -12741,9 +12741,9 @@ void cost_group_min_max(TABLE* table, KEY *index_info, uint used_key_parts,
{
double blocks_per_group= (double) num_blocks / (double) num_groups;
p_overlap= (blocks_per_group * (keys_per_subgroup - 1)) / keys_per_group;
- p_overlap= min(p_overlap, 1.0);
+ p_overlap= MY_MIN(p_overlap, 1.0);
}
- io_cost= (double) min(num_groups * (1 + p_overlap), num_blocks);
+ io_cost= (double) MY_MIN(num_groups * (1 + p_overlap), num_blocks);
}
else
io_cost= (keys_per_group > keys_per_block) ?
diff --git a/sql/opt_range.h b/sql/opt_range.h
index c59b2a7eb02..fd9d0b3923f 100644
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -104,7 +104,7 @@ class QUICK_RANGE :public Sql_alloc {
void make_min_endpoint(key_range *kr, uint prefix_length,
key_part_map keypart_map) {
make_min_endpoint(kr);
- kr->length= min(kr->length, prefix_length);
+ kr->length= MY_MIN(kr->length, prefix_length);
kr->keypart_map&= keypart_map;
}
@@ -142,7 +142,7 @@ class QUICK_RANGE :public Sql_alloc {
void make_max_endpoint(key_range *kr, uint prefix_length,
key_part_map keypart_map) {
make_max_endpoint(kr);
- kr->length= min(kr->length, prefix_length);
+ kr->length= MY_MIN(kr->length, prefix_length);
kr->keypart_map&= keypart_map;
}
diff --git a/sql/opt_range_mrr.cc b/sql/opt_range_mrr.cc
index 1f4e36178db..e03a1e2e644 100644
--- a/sql/opt_range_mrr.cc
+++ b/sql/opt_range_mrr.cc
@@ -293,7 +293,7 @@ walk_up_n_right:
}
}
seq->param->range_count++;
- seq->param->max_key_part=max(seq->param->max_key_part,key_tree->part);
+ seq->param->max_key_part=MY_MAX(seq->param->max_key_part,key_tree->part);
return 0;
}
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 8cd4ba08ff3..7780a7921e5 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -2175,7 +2175,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
double rows= 1.0;
while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
rows *= join->map2table[tableno]->table->quick_condition_rows;
- sjm->rows= min(sjm->rows, rows);
+ sjm->rows= MY_MIN(sjm->rows, rows);
}
memcpy(sjm->positions, join->best_positions + join->const_tables,
sizeof(POSITION) * n_tables);
@@ -4041,7 +4041,7 @@ SJ_TMP_TABLE::create_sj_weedout_tmp_table(THD *thd)
share->max_rows= ~(ha_rows) 0;
else
share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ?
- min(thd->variables.tmp_table_size,
+ MY_MIN(thd->variables.tmp_table_size,
thd->variables.max_heap_table_size) :
thd->variables.tmp_table_size) /
share->reclength);
@@ -5153,7 +5153,7 @@ bool setup_jtbm_semi_joins(JOIN *join, List<TABLE_LIST> *join_list,
0 or 1 record. Examples of both cases:
select * from ot where col in (select ... from it where 2>3)
- select * from ot where col in (select min(it.key) from it)
+ select * from ot where col in (select MY_MIN(it.key) from it)
in this case, the subquery predicate has not been setup for
materialization. In particular, there is no materialized temp.table.
diff --git a/sql/opt_table_elimination.cc b/sql/opt_table_elimination.cc
index 33164c1ed12..e44b6fdf5e0 100644
--- a/sql/opt_table_elimination.cc
+++ b/sql/opt_table_elimination.cc
@@ -328,7 +328,7 @@ const size_t Dep_value_table::iterator_size=
ALIGN_SIZE(sizeof(Dep_value_table::Module_iter));
const size_t Dep_value::iterator_size=
- max(Dep_value_table::iterator_size, Dep_value_field::iterator_size);
+ MY_MAX(Dep_value_table::iterator_size, Dep_value_field::iterator_size);
/*
@@ -441,7 +441,7 @@ const size_t Dep_module_key::iterator_size=
ALIGN_SIZE(sizeof(Dep_module_key::Value_iter));
const size_t Dep_module::iterator_size=
- max(Dep_module_expr::iterator_size, Dep_module_key::iterator_size);
+ MY_MAX(Dep_module_expr::iterator_size, Dep_module_key::iterator_size);
/*
diff --git a/sql/password.c b/sql/password.c
index 947620ddf7a..954daf2d8d1 100644
--- a/sql/password.c
+++ b/sql/password.c
@@ -60,12 +60,14 @@
*****************************************************************************/
-#include <password.h>
#include <my_global.h>
#include <my_sys.h>
#include <m_string.h>
+#include <password.h>
+#include <mysql.h>
+#include <my_rnd.h>
#include <sha1.h>
-#include "mysql.h"
+#include <crypt_genhash_impl.h>
/************ MySQL 3.23-4.0 authentication routines: untouched ***********/
@@ -372,6 +374,47 @@ my_crypt(char *to, const uchar *s1, const uchar *s2, uint len)
}
+#if defined(HAVE_OPENSSL)
+void my_make_scrambled_password(char *to, const char *password,
+ size_t pass_len)
+{
+
+ char salt[CRYPT_SALT_LENGTH + 1];
+
+ generate_user_salt(salt, CRYPT_SALT_LENGTH + 1);
+ my_crypt_genhash(to,
+ CRYPT_MAX_PASSWORD_SIZE,
+ password,
+ pass_len,
+ salt,
+ 0);
+
+}
+#endif
+/**
+ Compute two stage SHA1 hash of the password :
+
+ hash_stage1=sha1("password")
+ hash_stage2=sha1(hash_stage1)
+
+ @param password [IN] Password string.
+ @param pass_len [IN] Length of the password.
+ @param hash_stage1 [OUT] sha1(password)
+ @param hash_stage2 [OUT] sha1(hash_stage1)
+*/
+
+inline static
+void compute_two_stage_sha1_hash(const char *password, size_t pass_len,
+ uint8 *hash_stage1, uint8 *hash_stage2)
+{
+ /* Stage 1: hash password */
+ compute_sha1_hash(hash_stage1, password, pass_len);
+
+ /* Stage 2 : hash first stage's output. */
+ compute_sha1_hash(hash_stage2, (const char *) hash_stage1, SHA1_HASH_SIZE);
+}
+
+
/*
MySQL 4.1.1 password hashing: SHA conversion (see RFC 2289, 3174) twice
applied to the password string, and then produced octet sequence is
@@ -379,27 +422,20 @@ my_crypt(char *to, const uchar *s1, const uchar *s2, uint len)
The result of this function is used as return value from PASSWORD() and
is stored in the database.
SYNOPSIS
- my_make_scrambled_password()
+ my_make_scrambled_password_sha1()
buf OUT buffer of size 2*SHA1_HASH_SIZE + 2 to store hex string
password IN password string
pass_len IN length of password string
*/
-void my_make_scrambled_password(char *to, const char *password,
- size_t pass_len)
+void my_make_scrambled_password_sha1(char *to, const char *password,
+ size_t pass_len)
{
- SHA1_CONTEXT sha1_context;
uint8 hash_stage2[SHA1_HASH_SIZE];
- mysql_sha1_reset(&sha1_context);
- /* stage 1: hash password */
- mysql_sha1_input(&sha1_context, (uint8 *) password, (uint) pass_len);
- mysql_sha1_result(&sha1_context, (uint8 *) to);
- /* stage 2: hash stage1 output */
- mysql_sha1_reset(&sha1_context);
- mysql_sha1_input(&sha1_context, (uint8 *) to, SHA1_HASH_SIZE);
- /* separate buffer is used to pass 'to' in octet2hex */
- mysql_sha1_result(&sha1_context, hash_stage2);
+ /* Two stage SHA1 hash of the password. */
+ compute_two_stage_sha1_hash(password, pass_len, (uint8 *) to, hash_stage2);
+
/* convert hash_stage2 to hex string */
*to++= PVERSION41_CHAR;
octet2hex(to, (const char*) hash_stage2, SHA1_HASH_SIZE);
@@ -419,7 +455,7 @@ void my_make_scrambled_password(char *to, const char *password,
void make_scrambled_password(char *to, const char *password)
{
- my_make_scrambled_password(to, password, strlen(password));
+ my_make_scrambled_password_sha1(to, password, strlen(password));
}
@@ -443,24 +479,16 @@ void make_scrambled_password(char *to, const char *password)
void
scramble(char *to, const char *message, const char *password)
{
- SHA1_CONTEXT sha1_context;
uint8 hash_stage1[SHA1_HASH_SIZE];
uint8 hash_stage2[SHA1_HASH_SIZE];
- mysql_sha1_reset(&sha1_context);
- /* stage 1: hash password */
- mysql_sha1_input(&sha1_context, (uint8 *) password, (uint) strlen(password));
- mysql_sha1_result(&sha1_context, hash_stage1);
- /* stage 2: hash stage 1; note that hash_stage2 is stored in the database */
- mysql_sha1_reset(&sha1_context);
- mysql_sha1_input(&sha1_context, hash_stage1, SHA1_HASH_SIZE);
- mysql_sha1_result(&sha1_context, hash_stage2);
+ /* Two stage SHA1 hash of the password. */
+ compute_two_stage_sha1_hash(password, strlen(password), hash_stage1,
+ hash_stage2);
+
/* create crypt string as sha1(message, hash_stage2) */;
- mysql_sha1_reset(&sha1_context);
- mysql_sha1_input(&sha1_context, (const uint8 *) message, SCRAMBLE_LENGTH);
- mysql_sha1_input(&sha1_context, hash_stage2, SHA1_HASH_SIZE);
- /* xor allows 'from' and 'to' overlap: lets take advantage of it */
- mysql_sha1_result(&sha1_context, (uint8 *) to);
+ compute_sha1_hash_multi((uint8 *) to, message, SCRAMBLE_LENGTH,
+ (const char *) hash_stage2, SHA1_HASH_SIZE);
my_crypt(to, (const uchar *) to, hash_stage1, SCRAMBLE_LENGTH);
}
@@ -472,7 +500,7 @@ scramble(char *to, const char *message, const char *password)
null-terminated, reply and hash_stage2 must be at least SHA1_HASH_SIZE
long (if not, something fishy is going on).
SYNOPSIS
- check_scramble()
+ check_scramble_sha1()
scramble clients' reply, presumably produced by scramble()
message original random string, previously sent to client
(presumably second argument of scramble()), must be
@@ -486,27 +514,30 @@ scramble(char *to, const char *message, const char *password)
*/
my_bool
-check_scramble(const uchar *scramble_arg, const char *message,
- const uint8 *hash_stage2)
+check_scramble_sha1(const uchar *scramble_arg, const char *message,
+ const uint8 *hash_stage2)
{
- SHA1_CONTEXT sha1_context;
uint8 buf[SHA1_HASH_SIZE];
uint8 hash_stage2_reassured[SHA1_HASH_SIZE];
- mysql_sha1_reset(&sha1_context);
/* create key to encrypt scramble */
- mysql_sha1_input(&sha1_context, (const uint8 *) message, SCRAMBLE_LENGTH);
- mysql_sha1_input(&sha1_context, hash_stage2, SHA1_HASH_SIZE);
- mysql_sha1_result(&sha1_context, buf);
+ compute_sha1_hash_multi(buf, message, SCRAMBLE_LENGTH,
+ (const char *) hash_stage2, SHA1_HASH_SIZE);
/* encrypt scramble */
- my_crypt((char *) buf, buf, scramble_arg, SCRAMBLE_LENGTH);
+ my_crypt((char *) buf, buf, scramble_arg, SCRAMBLE_LENGTH);
+
/* now buf supposedly contains hash_stage1: so we can get hash_stage2 */
- mysql_sha1_reset(&sha1_context);
- mysql_sha1_input(&sha1_context, buf, SHA1_HASH_SIZE);
- mysql_sha1_result(&sha1_context, hash_stage2_reassured);
+ compute_sha1_hash(hash_stage2_reassured, (const char *) buf, SHA1_HASH_SIZE);
+
return test(memcmp(hash_stage2, hash_stage2_reassured, SHA1_HASH_SIZE));
}
+my_bool
+check_scramble(const uchar *scramble_arg, const char *message,
+ const uint8 *hash_stage2)
+{
+ return check_scramble_sha1(scramble_arg, message, hash_stage2);
+}
/*
Convert scrambled password from asciiz hex string to binary form.
diff --git a/sql/protocol.cc b/sql/protocol.cc
index f6e9e9e62e1..bc3133881af 100644
--- a/sql/protocol.cc
+++ b/sql/protocol.cc
@@ -233,7 +233,7 @@ net_send_ok(THD *thd,
pos+=2;
/* We can only return up to 65535 warnings in two bytes */
- uint tmp= min(statement_warn_count, 65535);
+ uint tmp= MY_MIN(statement_warn_count, 65535);
int2store(pos, tmp);
pos+= 2;
}
@@ -329,7 +329,7 @@ static bool write_eof_packet(THD *thd, NET *net,
Don't send warn count during SP execution, as the warn_list
is cleared between substatements, and mysqltest gets confused
*/
- uint tmp= min(statement_warn_count, 65535);
+ uint tmp= MY_MIN(statement_warn_count, 65535);
buff[0]= 254;
int2store(buff+1, tmp);
/*
@@ -606,17 +606,17 @@ void net_send_progress_packet(THD *thd)
*pos++= (uchar) 1; // Number of strings
*pos++= (uchar) thd->progress.stage + 1;
/*
- We have the max() here to avoid problems if max_stage is not set,
+ We have the MY_MAX() here to avoid problems if max_stage is not set,
which may happen during automatic repair of table
*/
- *pos++= (uchar) max(thd->progress.max_stage, thd->progress.stage + 1);
+ *pos++= (uchar) MY_MAX(thd->progress.max_stage, thd->progress.stage + 1);
progress= 0;
if (thd->progress.max_counter)
progress= 100000ULL * thd->progress.counter / thd->progress.max_counter;
int3store(pos, progress); // Between 0 & 100000
pos+= 3;
pos= net_store_data(pos, (const uchar*) proc_info,
- min(length, sizeof(buff)-7));
+ MY_MIN(length, sizeof(buff)-7));
net_write_command(&thd->net, (uchar) 255, progress_header,
sizeof(progress_header), (uchar*) buff,
(uint) (pos - buff));
diff --git a/sql/rpl_mi.cc b/sql/rpl_mi.cc
index 3e02b555dc0..c0393300fcf 100644
--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@@ -151,7 +151,7 @@ void init_master_log_pos(Master_info* mi)
if CHANGE MASTER did not specify it. (no data loss in conversion
as hb period has a max)
*/
- mi->heartbeat_period= (float) min(SLAVE_MAX_HEARTBEAT_PERIOD,
+ mi->heartbeat_period= (float) MY_MIN(SLAVE_MAX_HEARTBEAT_PERIOD,
(slave_net_timeout/2.0));
DBUG_ASSERT(mi->heartbeat_period > (float) 0.001
|| mi->heartbeat_period == 0);
@@ -702,7 +702,7 @@ void create_logfile_name_with_suffix(char *res_file_name, uint length,
length-= (suffix->length - ext_pos); /* Leave place for extension */
p= res_file_name + ext_pos;
*p++= '-'; /* Add separator */
- p= strmake(p, res, min((size_t) (length - (p - res_file_name)),
+ p= strmake(p, res, MY_MIN((size_t) (length - (p - res_file_name)),
res_length));
/* Add back extension. We have checked above that there is space for it */
strmov(p, ext);
diff --git a/sql/rpl_record.cc b/sql/rpl_record.cc
index 99bf8a82004..e219177cd80 100644
--- a/sql/rpl_record.cc
+++ b/sql/rpl_record.cc
@@ -362,7 +362,7 @@ unpack_row(Relay_log_info const *rli,
/*
throw away master's extra fields
*/
- uint max_cols= min(tabledef->size(), cols->n_bits);
+ uint max_cols= MY_MIN(tabledef->size(), cols->n_bits);
for (; i < max_cols; i++)
{
if (bitmap_is_set(cols, i))
diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc
index 2e74acc0345..5a9e342e458 100644
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -735,7 +735,7 @@ int Relay_log_info::wait_for_pos(THD* thd, String* log_name,
ulong log_name_extension;
char log_name_tmp[FN_REFLEN]; //make a char[] from String
- strmake(log_name_tmp, log_name->ptr(), min(log_name->length(), FN_REFLEN-1));
+ strmake(log_name_tmp, log_name->ptr(), MY_MIN(log_name->length(), FN_REFLEN-1));
char *p= fn_ext(log_name_tmp);
char *p_end;
@@ -745,7 +745,7 @@ int Relay_log_info::wait_for_pos(THD* thd, String* log_name,
goto err;
}
// Convert 0-3 to 4
- log_pos= max(log_pos, BIN_LOG_HEADER_SIZE);
+ log_pos= MY_MAX(log_pos, BIN_LOG_HEADER_SIZE);
/* p points to '.' */
log_name_extension= strtoul(++p, &p_end, 10);
/*
diff --git a/sql/rpl_utility.cc b/sql/rpl_utility.cc
index 1b9e744bcc1..33e04e488cb 100644
--- a/sql/rpl_utility.cc
+++ b/sql/rpl_utility.cc
@@ -779,7 +779,7 @@ table_def::compatible_with(THD *thd, Relay_log_info *rli,
/*
We only check the initial columns for the tables.
*/
- uint const cols_to_check= min(table->s->fields, size());
+ uint const cols_to_check= MY_MIN(table->s->fields, size());
TABLE *tmp_table= NULL;
for (uint col= 0 ; col < cols_to_check ; ++col)
diff --git a/sql/rpl_utility.h b/sql/rpl_utility.h
index 79f4517c492..19a5f621f69 100644
--- a/sql/rpl_utility.h
+++ b/sql/rpl_utility.h
@@ -287,7 +287,7 @@ public:
do { \
char buf[256]; \
uint i; \
- for (i = 0 ; i < min(sizeof(buf) - 1, (BS)->n_bits) ; i++) \
+ for (i = 0 ; i < MY_MIN(sizeof(buf) - 1, (BS)->n_bits) ; i++) \
buf[i] = bitmap_is_set((BS), i) ? '1' : '0'; \
buf[i] = '\0'; \
DBUG_PRINT((N), ((FRM), buf)); \
diff --git a/sql/share/errmsg-utf8.txt b/sql/share/errmsg-utf8.txt
index 49f35719a77..156ec759099 100644
--- a/sql/share/errmsg-utf8.txt
+++ b/sql/share/errmsg-utf8.txt
@@ -1,4 +1,4 @@
-languages czech=cze latin2, danish=dan latin1, dutch=nla latin1, english=eng latin1, estonian=est latin7, french=fre latin1, german=ger latin1, greek=greek greek, hungarian=hun latin2, italian=ita latin1, japanese=jpn ujis, japanese-sjis=jps sjis, korean=kor euckr, norwegian-ny=norwegian-ny latin1, norwegian=nor latin1, polish=pol latin2, portuguese=por latin1, romanian=rum latin2, russian=rus koi8r, serbian=serbian cp1250, slovak=slo latin2, spanish=spa latin1, swedish=swe latin1, ukrainian=ukr koi8u;
+languages czech=cze latin2, danish=dan latin1, dutch=nla latin1, english=eng latin1, estonian=est latin7, french=fre latin1, german=ger latin1, greek=greek greek, hungarian=hun latin2, italian=ita latin1, japanese=jpn ujis, korean=kor euckr, norwegian-ny=norwegian-ny latin1, norwegian=nor latin1, polish=pol latin2, portuguese=por latin1, romanian=rum latin2, russian=rus koi8r, serbian=serbian cp1250, slovak=slo latin2, spanish=spa latin1, swedish=swe latin1, ukrainian=ukr koi8u, bulgarian=bgn cp1251;
default-language eng
@@ -51,7 +51,7 @@ ER_YES
spa "SI"
ukr "ТАК"
ER_CANT_CREATE_FILE
- cze "Nemohu vytvo-Břit soubor '%-.200s' (chybový kód: %M)"
+ cze "Nemohu vytvořit soubor '%-.200s' (chybový kód: %M)"
dan "Kan ikke oprette filen '%-.200s' (Fejlkode: %M)"
nla "Kan file '%-.200s' niet aanmaken (Errcode: %M)"
eng "Can't create file '%-.200s' (errno: %M)"
@@ -61,7 +61,7 @@ ER_CANT_CREATE_FILE
greek "Αδύνατη η δημιουργία του αρχείου '%-.200s' (κωδικός λάθους: %M)"
hun "A '%-.200s' file nem hozhato letre (hibakod: %M)"
ita "Impossibile creare il file '%-.200s' (errno: %M)"
- jpn "'%-.200s' ファイルが作れません (errno: %M)"
+ jpn "ファイル '%-.200s' を作成できません。(エラー番号: %M)"
kor "화일 '%-.200s'를 만들지 못했습니다. (에러번호: %M)"
nor "Kan ikke opprette fila '%-.200s' (Feilkode: %M)"
norwegian-ny "Kan ikkje opprette fila '%-.200s' (Feilkode: %M)"
@@ -75,18 +75,17 @@ ER_CANT_CREATE_FILE
swe "Kan inte skapa filen '%-.200s' (Felkod: %M)"
ukr "Не можу створити файл '%-.200s' (помилка: %M)"
ER_CANT_CREATE_TABLE
- cze "Nemohu vytvo-Břit tabulku '%-.200s' (chybový kód: %M)"
+ cze "Nemohu vytvořit tabulku '%-.200s' (chybový kód: %M)"
dan "Kan ikke oprette tabellen '%-.200s' (Fejlkode: %M)"
nla "Kan tabel '%-.200s' niet aanmaken (Errcode: %M)"
eng "Can't create table '%-.200s' (errno: %M)"
- jps "'%-.200s' テーブルが作れません.(errno: %M)",
est "Ei suuda luua tabelit '%-.200s' (veakood: %M)"
fre "Ne peut créer la table '%-.200s' (Errcode: %M)"
ger "Kann Tabelle '%-.200s' nicht erzeugen (Fehler: %M)"
greek "Αδύνατη η δημιουργία του πίνακα '%-.200s' (κωδικός λάθους: %M)"
hun "A '%-.200s' tabla nem hozhato letre (hibakod: %M)"
ita "Impossibile creare la tabella '%-.200s' (errno: %M)"
- jpn "'%-.200s' テーブルが作れません.(errno: %M)"
+ jpn "表 '%-.200s' を作成できません。(エラー番号: %M)"
kor "테이블 '%-.200s'를 만들지 못했습니다. (에러번호: %M)"
nor "Kan ikke opprette tabellen '%-.200s' (Feilkode: %M)"
norwegian-ny "Kan ikkje opprette tabellen '%-.200s' (Feilkode: %M)"
@@ -100,18 +99,17 @@ ER_CANT_CREATE_TABLE
swe "Kan inte skapa tabellen '%-.200s' (Felkod: %M)"
ukr "Не можу створити таблицю '%-.200s' (помилка: %M)"
ER_CANT_CREATE_DB
- cze "Nemohu vytvo-Břit databázi '%-.192s' (chybový kód: %M)"
+ cze "Nemohu vytvořit databázi '%-.192s' (chybový kód: %M)"
dan "Kan ikke oprette databasen '%-.192s' (Fejlkode: %M)"
nla "Kan database '%-.192s' niet aanmaken (Errcode: %M)"
eng "Can't create database '%-.192s' (errno: %M)"
- jps "'%-.192s' データベースが作れません (errno: %M)",
est "Ei suuda luua andmebaasi '%-.192s' (veakood: %M)"
fre "Ne peut créer la base '%-.192s' (Erreur %M)"
ger "Kann Datenbank '%-.192s' nicht erzeugen (Fehler: %M)"
greek "Αδύνατη η δημιουργία της βάσης δεδομένων '%-.192s' (κωδικός λάθους: %M)"
hun "Az '%-.192s' adatbazis nem hozhato letre (hibakod: %M)"
ita "Impossibile creare il database '%-.192s' (errno: %M)"
- jpn "'%-.192s' データベースが作れません (errno: %M)"
+ jpn "データベース '%-.192s' を作成できません。(エラー番号: %M)"
kor "데이타베이스 '%-.192s'를 만들지 못했습니다.. (에러번호: %M)"
nor "Kan ikke opprette databasen '%-.192s' (Feilkode: %M)"
norwegian-ny "Kan ikkje opprette databasen '%-.192s' (Feilkode: %M)"
@@ -125,18 +123,17 @@ ER_CANT_CREATE_DB
swe "Kan inte skapa databasen '%-.192s' (Felkod: %M)"
ukr "Не можу створити базу данних '%-.192s' (помилка: %M)"
ER_DB_CREATE_EXISTS
- cze "Nemohu vytvo-Břit databázi '%-.192s'; databáze již existuje"
+ cze "Nemohu vytvořit databázi '%-.192s'; databáze již existuje"
dan "Kan ikke oprette databasen '%-.192s'; databasen eksisterer"
nla "Kan database '%-.192s' niet aanmaken; database bestaat reeds"
eng "Can't create database '%-.192s'; database exists"
- jps "'%-.192s' データベースが作れません.既にそのデータベースが存在します",
est "Ei suuda luua andmebaasi '%-.192s': andmebaas juba eksisteerib"
fre "Ne peut créer la base '%-.192s'; elle existe déjà"
ger "Kann Datenbank '%-.192s' nicht erzeugen. Datenbank existiert bereits"
greek "Αδύνατη η δημιουργία της βάσης δεδομένων '%-.192s'; Η βάση δεδομένων υπάρχει ήδη"
hun "Az '%-.192s' adatbazis nem hozhato letre Az adatbazis mar letezik"
ita "Impossibile creare il database '%-.192s'; il database esiste"
- jpn "'%-.192s' データベースが作れません.既にそのデータベースが存在します"
+ jpn "データベース '%-.192s' を作成できません。データベースはすでに存在します。"
kor "데이타베이스 '%-.192s'를 만들지 못했습니다.. 데이타베이스가 존재함"
nor "Kan ikke opprette databasen '%-.192s'; databasen eksisterer"
norwegian-ny "Kan ikkje opprette databasen '%-.192s'; databasen eksisterer"
@@ -150,18 +147,17 @@ ER_DB_CREATE_EXISTS
swe "Databasen '%-.192s' existerar redan"
ukr "Не можу створити базу данних '%-.192s'. База данних існує"
ER_DB_DROP_EXISTS
- cze "Nemohu zru-Bšit databázi '%-.192s', databáze neexistuje"
+ cze "Nemohu zrušit databázi '%-.192s', databáze neexistuje"
dan "Kan ikke slette (droppe) '%-.192s'; databasen eksisterer ikke"
nla "Kan database '%-.192s' niet verwijderen; database bestaat niet"
eng "Can't drop database '%-.192s'; database doesn't exist"
- jps "'%-.192s' データベースを破棄できません. そのデータベースがないのです.",
est "Ei suuda kustutada andmebaasi '%-.192s': andmebaasi ei eksisteeri"
fre "Ne peut effacer la base '%-.192s'; elle n'existe pas"
ger "Kann Datenbank '%-.192s' nicht löschen; Datenbank nicht vorhanden"
greek "Αδύνατη η διαγραφή της βάσης δεδομένων '%-.192s'. Η βάση δεδομένων δεν υπάρχει"
hun "A(z) '%-.192s' adatbazis nem szuntetheto meg. Az adatbazis nem letezik"
ita "Impossibile cancellare '%-.192s'; il database non esiste"
- jpn "'%-.192s' データベースを破棄できません. そのデータベースがないのです."
+ jpn "データベース '%-.192s' を削除できません。データベースは存在しません。"
kor "데이타베이스 '%-.192s'를 제거하지 못했습니다. 데이타베이스가 존재하지 않음 "
nor "Kan ikke fjerne (drop) '%-.192s'; databasen eksisterer ikke"
norwegian-ny "Kan ikkje fjerne (drop) '%-.192s'; databasen eksisterer ikkje"
@@ -175,18 +171,17 @@ ER_DB_DROP_EXISTS
swe "Kan inte radera databasen '%-.192s'; databasen finns inte"
ukr "Не можу видалити базу данних '%-.192s'. База данних не існує"
ER_DB_DROP_DELETE
- cze "Chyba p-Bři rušení databáze (nemohu vymazat '%-.192s', chyba %M)"
+ cze "Chyba při rušení databáze (nemohu vymazat '%-.192s', chyba %M)"
dan "Fejl ved sletning (drop) af databasen (kan ikke slette '%-.192s', Fejlkode %M)"
nla "Fout bij verwijderen database (kan '%-.192s' niet verwijderen, Errcode: %M)"
eng "Error dropping database (can't delete '%-.192s', errno: %M)"
- jps "データベース破棄エラー ('%-.192s' を削除できません, errno: %M)",
est "Viga andmebaasi kustutamisel (ei suuda kustutada faili '%-.192s', veakood: %M)"
fre "Ne peut effacer la base '%-.192s' (erreur %M)"
ger "Fehler beim Löschen der Datenbank ('%-.192s' kann nicht gelöscht werden, Fehler: %M)"
greek "Παρουσιάστηκε πρόβλημα κατά τη διαγραφή της βάσης δεδομένων (αδύνατη η διαγραφή '%-.192s', κωδικός λάθους: %M)"
hun "Adatbazis megszuntetesi hiba ('%-.192s' nem torolheto, hibakod: %M)"
ita "Errore durante la cancellazione del database (impossibile cancellare '%-.192s', errno: %M)"
- jpn "データベース破棄エラー ('%-.192s' を削除できません, errno: %M)"
+ jpn "データベース削除エラー ('%-.192s' を削除できません。エラー番号: %M)"
kor "데이타베이스 제거 에러('%-.192s'를 삭제할 수 없읍니다, 에러번호: %M)"
nor "Feil ved fjerning (drop) av databasen (kan ikke slette '%-.192s', feil %M)"
norwegian-ny "Feil ved fjerning (drop) av databasen (kan ikkje slette '%-.192s', feil %M)"
@@ -200,18 +195,17 @@ ER_DB_DROP_DELETE
swe "Fel vid radering av databasen (Kan inte radera '%-.192s'. Felkod: %M)"
ukr "Не можу видалити базу данних (Не можу видалити '%-.192s', помилка: %M)"
ER_DB_DROP_RMDIR
- cze "Chyba p-Bři rušení databáze (nemohu vymazat adresář '%-.192s', chyba %M)"
+ cze "Chyba při rušení databáze (nemohu vymazat adresář '%-.192s', chyba %M)"
dan "Fejl ved sletting af database (kan ikke slette folderen '%-.192s', Fejlkode %M)"
nla "Fout bij verwijderen database (kan rmdir '%-.192s' niet uitvoeren, Errcode: %M)"
eng "Error dropping database (can't rmdir '%-.192s', errno: %M)"
- jps "データベース破棄エラー ('%-.192s' を rmdir できません, errno: %M)",
est "Viga andmebaasi kustutamisel (ei suuda kustutada kataloogi '%-.192s', veakood: %M)"
fre "Erreur en effaçant la base (rmdir '%-.192s', erreur %M)"
ger "Fehler beim Löschen der Datenbank (Verzeichnis '%-.192s' kann nicht gelöscht werden, Fehler: %M)"
greek "Παρουσιάστηκε πρόβλημα κατά τη διαγραφή της βάσης δεδομένων (αδύνατη η διαγραφή του φακέλλου '%-.192s', κωδικός λάθους: %M)"
hun "Adatbazis megszuntetesi hiba ('%-.192s' nem szuntetheto meg, hibakod: %M)"
ita "Errore durante la cancellazione del database (impossibile rmdir '%-.192s', errno: %M)"
- jpn "データベース破棄エラー ('%-.192s' を rmdir できません, errno: %M)"
+ jpn "データベース削除エラー (ディレクトリ '%-.192s' を削除できません。エラー番号: %M)"
kor "데이타베이스 제거 에러(rmdir '%-.192s'를 할 수 없읍니다, 에러번호: %M)"
nor "Feil ved sletting av database (kan ikke slette katalogen '%-.192s', feil %M)"
norwegian-ny "Feil ved sletting av database (kan ikkje slette katalogen '%-.192s', feil %M)"
@@ -225,18 +219,17 @@ ER_DB_DROP_RMDIR
swe "Fel vid radering av databasen (Kan inte radera biblioteket '%-.192s'. Felkod: %M)"
ukr "Не можу видалити базу данних (Не можу видалити теку '%-.192s', помилка: %M)"
ER_CANT_DELETE_FILE
- cze "Chyba p-Bři výmazu '%-.192s' (chybový kód: %M)"
+ cze "Chyba při výmazu '%-.192s' (chybový kód: %M)"
dan "Fejl ved sletning af '%-.192s' (Fejlkode: %M)"
nla "Fout bij het verwijderen van '%-.192s' (Errcode: %M)"
eng "Error on delete of '%-.192s' (errno: %M)"
- jps "'%-.192s' の削除がエラー (errno: %M)",
est "Viga '%-.192s' kustutamisel (veakood: %M)"
fre "Erreur en effaçant '%-.192s' (Errcode: %M)"
ger "Fehler beim Löschen von '%-.192s' (Fehler: %M)"
greek "Παρουσιάστηκε πρόβλημα κατά τη διαγραφή '%-.192s' (κωδικός λάθους: %M)"
hun "Torlesi hiba: '%-.192s' (hibakod: %M)"
ita "Errore durante la cancellazione di '%-.192s' (errno: %M)"
- jpn "'%-.192s' の削除がエラー (errno: %M)"
+ jpn "ファイル '%-.192s' の削除エラー (エラー番号: %M)"
kor "'%-.192s' 삭제 중 에러 (에러번호: %M)"
nor "Feil ved sletting av '%-.192s' (Feilkode: %M)"
norwegian-ny "Feil ved sletting av '%-.192s' (Feilkode: %M)"
@@ -250,18 +243,17 @@ ER_CANT_DELETE_FILE
swe "Kan inte radera filen '%-.192s' (Felkod: %M)"
ukr "Не можу видалити '%-.192s' (помилка: %M)"
ER_CANT_FIND_SYSTEM_REC
- cze "Nemohu -Bčíst záznam v systémové tabulce"
+ cze "Nemohu číst záznam v systémové tabulce"
dan "Kan ikke læse posten i systemfolderen"
nla "Kan record niet lezen in de systeem tabel"
eng "Can't read record in system table"
- jps "system table のレコードを読む事ができませんでした",
est "Ei suuda lugeda kirjet süsteemsest tabelist"
fre "Ne peut lire un enregistrement de la table 'system'"
ger "Datensatz in der Systemtabelle nicht lesbar"
greek "Αδύνατη η ανάγνωση εγγραφής από πίνακα του συστήματος"
hun "Nem olvashato rekord a rendszertablaban"
ita "Impossibile leggere il record dalla tabella di sistema"
- jpn "system table のレコードを読む事ができませんでした"
+ jpn "システム表のレコードを読み込めません。"
kor "system 테이블에서 레코드를 읽을 수 없습니다."
nor "Kan ikke lese posten i systemkatalogen"
norwegian-ny "Kan ikkje lese posten i systemkatalogen"
@@ -275,18 +267,17 @@ ER_CANT_FIND_SYSTEM_REC
swe "Hittar inte posten i systemregistret"
ukr "Не можу зчитати запис з системної таблиці"
ER_CANT_GET_STAT
- cze "Nemohu z-Bískat stav '%-.200s' (chybový kód: %M)"
+ cze "Nemohu získat stav '%-.200s' (chybový kód: %M)"
dan "Kan ikke læse status af '%-.200s' (Fejlkode: %M)"
nla "Kan de status niet krijgen van '%-.200s' (Errcode: %M)"
eng "Can't get status of '%-.200s' (errno: %M)"
- jps "'%-.200s' のステイタスが得られません. (errno: %M)",
est "Ei suuda lugeda '%-.200s' olekut (veakood: %M)"
fre "Ne peut obtenir le status de '%-.200s' (Errcode: %M)"
ger "Kann Status von '%-.200s' nicht ermitteln (Fehler: %M)"
greek "Αδύνατη η λήψη πληροφοριών για την κατάσταση του '%-.200s' (κωδικός λάθους: %M)"
hun "A(z) '%-.200s' statusza nem allapithato meg (hibakod: %M)"
ita "Impossibile leggere lo stato di '%-.200s' (errno: %M)"
- jpn "'%-.200s' のステイタスが得られません. (errno: %M)"
+ jpn "'%-.200s' の状態を取得できません。(エラー番号: %M)"
kor "'%-.200s'의 상태를 얻지 못했습니다. (에러번호: %M)"
nor "Kan ikke lese statusen til '%-.200s' (Feilkode: %M)"
norwegian-ny "Kan ikkje lese statusen til '%-.200s' (Feilkode: %M)"
@@ -300,18 +291,17 @@ ER_CANT_GET_STAT
swe "Kan inte läsa filinformationen (stat) från '%-.200s' (Felkod: %M)"
ukr "Не можу отримати статус '%-.200s' (помилка: %M)"
ER_CANT_GET_WD
- cze "Chyba p-Bři zjišťování pracovní adresář (chybový kód: %M)"
+ cze "Chyba při zjišťování pracovní adresář (chybový kód: %M)"
dan "Kan ikke læse aktive folder (Fejlkode: %M)"
nla "Kan de werkdirectory niet krijgen (Errcode: %M)"
eng "Can't get working directory (errno: %M)"
- jps "working directory を得る事ができませんでした (errno: %M)",
est "Ei suuda identifitseerida jooksvat kataloogi (veakood: %M)"
fre "Ne peut obtenir le répertoire de travail (Errcode: %M)"
ger "Kann Arbeitsverzeichnis nicht ermitteln (Fehler: %M)"
greek "Ο φάκελλος εργασίας δεν βρέθηκε (κωδικός λάθους: %M)"
hun "A munkakonyvtar nem allapithato meg (hibakod: %M)"
ita "Impossibile leggere la directory di lavoro (errno: %M)"
- jpn "working directory を得る事ができませんでした (errno: %M)"
+ jpn "作業ディレクトリを取得できません。(エラー番号: %M)"
kor "수행 디렉토리를 찾지 못했습니다. (에러번호: %M)"
nor "Kan ikke lese aktiv katalog(Feilkode: %M)"
norwegian-ny "Kan ikkje lese aktiv katalog(Feilkode: %M)"
@@ -325,18 +315,17 @@ ER_CANT_GET_WD
swe "Kan inte inte läsa aktivt bibliotek. (Felkod: %M)"
ukr "Не можу визначити робочу теку (помилка: %M)"
ER_CANT_LOCK
- cze "Nemohu uzamknout soubor (chybov-Bý kód: %M)"
+ cze "Nemohu uzamknout soubor (chybový kód: %M)"
dan "Kan ikke låse fil (Fejlkode: %M)"
nla "Kan de file niet blokeren (Errcode: %M)"
eng "Can't lock file (errno: %M)"
- jps "ファイルをロックできません (errno: %M)",
est "Ei suuda lukustada faili (veakood: %M)"
fre "Ne peut verrouiller le fichier (Errcode: %M)"
ger "Datei kann nicht gesperrt werden (Fehler: %M)"
greek "Το αρχείο δεν μπορεί να κλειδωθεί (κωδικός λάθους: %M)"
hun "A file nem zarolhato. (hibakod: %M)"
ita "Impossibile il locking il file (errno: %M)"
- jpn "ファイルをロックできません (errno: %M)"
+ jpn "ファイルをロックできません。(エラー番号: %M)"
kor "화일을 잠그지(lock) 못했습니다. (에러번호: %M)"
nor "Kan ikke låse fila (Feilkode: %M)"
norwegian-ny "Kan ikkje låse fila (Feilkode: %M)"
@@ -350,18 +339,17 @@ ER_CANT_LOCK
swe "Kan inte låsa filen. (Felkod: %M)"
ukr "Не можу заблокувати файл (помилка: %M)"
ER_CANT_OPEN_FILE
- cze "Nemohu otev-Břít soubor '%-.200s' (chybový kód: %M)"
+ cze "Nemohu otevřít soubor '%-.200s' (chybový kód: %M)"
dan "Kan ikke åbne fil: '%-.200s' (Fejlkode: %M)"
nla "Kan de file '%-.200s' niet openen (Errcode: %M)"
eng "Can't open file: '%-.200s' (errno: %M)"
- jps "'%-.200s' ファイルを開く事ができません (errno: %M)",
est "Ei suuda avada faili '%-.200s' (veakood: %M)"
fre "Ne peut ouvrir le fichier: '%-.200s' (Errcode: %M)"
ger "Kann Datei '%-.200s' nicht öffnen (Fehler: %M)"
greek "Δεν είναι δυνατό να ανοιχτεί το αρχείο: '%-.200s' (κωδικός λάθους: %M)"
hun "A '%-.200s' file nem nyithato meg (hibakod: %M)"
ita "Impossibile aprire il file: '%-.200s' (errno: %M)"
- jpn "'%-.200s' ファイルを開く事ができません (errno: %M)"
+ jpn "ファイル '%-.200s' をオープンできません。(エラー番号: %M)"
kor "화일을 열지 못했습니다.: '%-.200s' (에러번호: %M)"
nor "Kan ikke åpne fila: '%-.200s' (Feilkode: %M)"
norwegian-ny "Kan ikkje åpne fila: '%-.200s' (Feilkode: %M)"
@@ -375,18 +363,17 @@ ER_CANT_OPEN_FILE
swe "Kan inte använda '%-.200s' (Felkod: %M)"
ukr "Не можу відкрити файл: '%-.200s' (помилка: %M)"
ER_FILE_NOT_FOUND
- cze "Nemohu naj-Bít soubor '%-.200s' (chybový kód: %M)"
+ cze "Nemohu najít soubor '%-.200s' (chybový kód: %M)"
dan "Kan ikke finde fila: '%-.200s' (Fejlkode: %M)"
nla "Kan de file: '%-.200s' niet vinden (Errcode: %M)"
eng "Can't find file: '%-.200s' (errno: %M)"
- jps "'%-.200s' ファイルを見付ける事ができません.(errno: %M)",
est "Ei suuda leida faili '%-.200s' (veakood: %M)"
fre "Ne peut trouver le fichier: '%-.200s' (Errcode: %M)"
ger "Kann Datei '%-.200s' nicht finden (Fehler: %M)"
greek "Δεν βρέθηκε το αρχείο: '%-.200s' (κωδικός λάθους: %M)"
hun "A(z) '%-.200s' file nem talalhato (hibakod: %M)"
ita "Impossibile trovare il file: '%-.200s' (errno: %M)"
- jpn "'%-.200s' ファイルを見付ける事ができません.(errno: %M)"
+ jpn "ファイル '%-.200s' が見つかりません。(エラー番号: %M)"
kor "화일을 찾지 못했습니다.: '%-.200s' (에러번호: %M)"
nor "Kan ikke finne fila: '%-.200s' (Feilkode: %M)"
norwegian-ny "Kan ikkje finne fila: '%-.200s' (Feilkode: %M)"
@@ -400,18 +387,17 @@ ER_FILE_NOT_FOUND
swe "Hittar inte filen '%-.200s' (Felkod: %M)"
ukr "Не можу знайти файл: '%-.200s' (помилка: %M)"
ER_CANT_READ_DIR
- cze "Nemohu -Bčíst adresář '%-.192s' (chybový kód: %M)"
+ cze "Nemohu číst adresář '%-.192s' (chybový kód: %M)"
dan "Kan ikke læse folder '%-.192s' (Fejlkode: %M)"
nla "Kan de directory niet lezen van '%-.192s' (Errcode: %M)"
eng "Can't read dir of '%-.192s' (errno: %M)"
- jps "'%-.192s' ディレクトリが読めません.(errno: %M)",
est "Ei suuda lugeda kataloogi '%-.192s' (veakood: %M)"
fre "Ne peut lire le répertoire de '%-.192s' (Errcode: %M)"
ger "Verzeichnis von '%-.192s' nicht lesbar (Fehler: %M)"
greek "Δεν είναι δυνατό να διαβαστεί ο φάκελλος του '%-.192s' (κωδικός λάθους: %M)"
hun "A(z) '%-.192s' konyvtar nem olvashato. (hibakod: %M)"
ita "Impossibile leggere la directory di '%-.192s' (errno: %M)"
- jpn "'%-.192s' ディレクトリが読めません.(errno: %M)"
+ jpn "ディレクトリ '%-.192s' を読み込めません。(エラー番号: %M)"
kor "'%-.192s'디렉토리를 읽지 못했습니다. (에러번호: %M)"
nor "Kan ikke lese katalogen '%-.192s' (Feilkode: %M)"
norwegian-ny "Kan ikkje lese katalogen '%-.192s' (Feilkode: %M)"
@@ -425,18 +411,17 @@ ER_CANT_READ_DIR
swe "Kan inte läsa från bibliotek '%-.192s' (Felkod: %M)"
ukr "Не можу прочитати теку '%-.192s' (помилка: %M)"
ER_CANT_SET_WD
- cze "Nemohu zm-Běnit adresář na '%-.192s' (chybový kód: %M)"
+ cze "Nemohu změnit adresář na '%-.192s' (chybový kód: %M)"
dan "Kan ikke skifte folder til '%-.192s' (Fejlkode: %M)"
nla "Kan de directory niet veranderen naar '%-.192s' (Errcode: %M)"
eng "Can't change dir to '%-.192s' (errno: %M)"
- jps "'%-.192s' ディレクトリに chdir できません.(errno: %M)",
est "Ei suuda siseneda kataloogi '%-.192s' (veakood: %M)"
fre "Ne peut changer le répertoire pour '%-.192s' (Errcode: %M)"
ger "Kann nicht in das Verzeichnis '%-.192s' wechseln (Fehler: %M)"
greek "Αδύνατη η αλλαγή του τρέχοντος καταλόγου σε '%-.192s' (κωδικός λάθους: %M)"
hun "Konyvtarvaltas nem lehetseges a(z) '%-.192s'-ba. (hibakod: %M)"
ita "Impossibile cambiare la directory in '%-.192s' (errno: %M)"
- jpn "'%-.192s' ディレクトリに chdir できません.(errno: %M)"
+ jpn "ディレクトリ '%-.192s' に移動できません。(エラー番号: %M)"
kor "'%-.192s'디렉토리로 이동할 수 없었습니다. (에러번호: %M)"
nor "Kan ikke skifte katalog til '%-.192s' (Feilkode: %M)"
norwegian-ny "Kan ikkje skifte katalog til '%-.192s' (Feilkode: %M)"
@@ -450,7 +435,7 @@ ER_CANT_SET_WD
swe "Kan inte byta till '%-.192s' (Felkod: %M)"
ukr "Не можу перейти у теку '%-.192s' (помилка: %M)"
ER_CHECKREAD
- cze "Z-Báznam byl změněn od posledního čtení v tabulce '%-.192s'"
+ cze "Záznam byl změněn od posledního čtení v tabulce '%-.192s'"
dan "Posten er ændret siden sidste læsning '%-.192s'"
nla "Record is veranderd sinds de laatste lees activiteit in de tabel '%-.192s'"
eng "Record has changed since last read in table '%-.192s'"
@@ -460,6 +445,7 @@ ER_CHECKREAD
greek "Η εγγραφή έχει αλλάξει από την τελευταία φορά που ανασύρθηκε από τον πίνακα '%-.192s'"
hun "A(z) '%-.192s' tablaban talalhato rekord megvaltozott az utolso olvasas ota"
ita "Il record e` cambiato dall'ultima lettura della tabella '%-.192s'"
+ jpn "表 '%-.192s' の最後の読み込み時点から、レコードが変化しました。"
kor "테이블 '%-.192s'에서 마지막으로 읽은 후 Record가 변경되었습니다."
nor "Posten har blitt endret siden den ble lest '%-.192s'"
norwegian-ny "Posten har vorte endra sidan den sist vart lesen '%-.192s'"
@@ -472,44 +458,42 @@ ER_CHECKREAD
spa "El registro ha cambiado desde la ultima lectura de la tabla '%-.192s'"
swe "Posten har förändrats sedan den lästes i register '%-.192s'"
ukr "Запис було змінено з часу останнього читання з таблиці '%-.192s'"
-ER_DISK_FULL
- cze "Disk je pln-Bý (%s), čekám na uvolnění nějakého místa ..."
- dan "Ikke mere diskplads (%s). Venter på at få frigjort plads..."
- nla "Schijf vol (%s). Aan het wachten totdat er ruimte vrij wordt gemaakt..."
- eng "Disk full (%s); waiting for someone to free some space..."
- jps "Disk full (%s). 誰かが何かを減らすまでまってください...",
- est "Ketas täis (%s). Ootame kuni tekib vaba ruumi..."
- fre "Disque plein (%s). J'attend que quelqu'un libère de l'espace..."
- ger "Festplatte voll (%s). Warte, bis jemand Platz schafft ..."
- greek "Δεν υπάρχει χώρος στο δίσκο (%s). Παρακαλώ, περιμένετε να ελευθερωθεί χώρος..."
- hun "A lemez megtelt (%s)."
- ita "Disco pieno (%s). In attesa che qualcuno liberi un po' di spazio..."
- jpn "Disk full (%s). 誰かが何かを減らすまでまってください..."
- kor "Disk full (%s). 다른 사람이 지울때까지 기다립니다..."
- nor "Ikke mer diskplass (%s). Venter på å få frigjort plass..."
- norwegian-ny "Ikkje meir diskplass (%s). Ventar på å få frigjort plass..."
- pol "Dysk pełny (%s). Oczekiwanie na zwolnienie miejsca..."
- por "Disco cheio (%s). Aguardando alguém liberar algum espaço..."
- rum "Hard-disk-ul este plin (%s). Astept sa se elibereze ceva spatiu..."
- rus "Диск заполнен. (%s). Ожидаем, пока кто-то не уберет после себя мусор..."
- serbian "Disk je pun (%s). Čekam nekoga da dođe i oslobodi nešto mesta..."
- slo "Disk je plný (%s), čakám na uvoľnenie miesta..."
- spa "Disco lleno (%s). Esperando para que se libere algo de espacio..."
- swe "Disken är full (%s). Väntar tills det finns ledigt utrymme..."
- ukr "Диск заповнений (%s). Вичикую, доки звільниться трохи місця..."
+ER_DISK_FULL
+ cze "Disk je plný (%s), čekám na uvolnění nějakého místa ... (chybový kód: %M)"
+ dan "Ikke mere diskplads (%s). Venter på at få frigjort plads... (Fejlkode: %M)"
+ nla "Schijf vol (%s). Aan het wachten totdat er ruimte vrij wordt gemaakt... (Errcode: %M)"
+ eng "Disk full (%s); waiting for someone to free some space... (errno: %M)"
+ est "Ketas täis (%s). Ootame kuni tekib vaba ruumi... (veakood: %M)"
+ fre "Disque plein (%s). J'attend que quelqu'un libère de l'espace... (Errcode: %M)"
+ ger "Festplatte voll (%s). Warte, bis jemand Platz schafft ... (Fehler: %M)"
+ greek "Δεν υπάρχει χώρος στο δίσκο (%s). Παρακαλώ, περιμένετε να ελευθερωθεί χώρος... (κωδικός λάθους: %M)"
+ hun "A lemez megtelt (%s). (hibakod: %M)"
+ ita "Disco pieno (%s). In attesa che qualcuno liberi un po' di spazio... (errno: %M)"
+ jpn "ディスク領域不足です(%s)。(エラー番号: %M)"
+ kor "Disk full (%s). 다른 사람이 지울때까지 기다립니다... (에러번호: %M)"
+ nor "Ikke mer diskplass (%s). Venter på å få frigjort plass... (Feilkode: %M)"
+ norwegian-ny "Ikkje meir diskplass (%s). Ventar på å få frigjort plass... (Feilkode: %M)"
+ pol "Dysk pełny (%s). Oczekiwanie na zwolnienie miejsca... (Kod błędu: %M)"
+ por "Disco cheio (%s). Aguardando alguém liberar algum espaço... (erro no. %M)"
+ rum "Hard-disk-ul este plin (%s). Astept sa se elibereze ceva spatiu... (Eroare: %M)"
+ rus "Диск заполнен. (%s). Ожидаем, пока кто-то не уберет после себя мусор... (ошибка: %M)"
+ serbian "Disk je pun (%s). Čekam nekoga da dođe i oslobodi nešto mesta... (errno: %M)"
+ slo "Disk je plný (%s), čakám na uvoľnenie miesta... (chybový kód: %M)"
+ spa "Disco lleno (%s). Esperando para que se libere algo de espacio... (Error: %M)"
+ swe "Disken är full (%s). Väntar tills det finns ledigt utrymme... (Felkod: %M)"
+ ukr "Диск заповнений (%s). Вичикую, доки звільниться трохи місця... (помилка: %M)"
ER_DUP_KEY 23000
- cze "Nemohu zapsat, zdvojen-Bý klíč v tabulce '%-.192s'"
+ cze "Nemohu zapsat, zdvojený klíč v tabulce '%-.192s'"
dan "Kan ikke skrive, flere ens nøgler i tabellen '%-.192s'"
nla "Kan niet schrijven, dubbele zoeksleutel in tabel '%-.192s'"
eng "Can't write; duplicate key in table '%-.192s'"
- jps "table '%-.192s' に key が重複していて書きこめません",
est "Ei saa kirjutada, korduv võti tabelis '%-.192s'"
fre "Ecriture impossible, doublon dans une clé de la table '%-.192s'"
ger "Kann nicht speichern, Grund: doppelter Schlüssel in Tabelle '%-.192s'"
greek "Δεν είναι δυνατή η καταχώρηση, η τιμή υπάρχει ήδη στον πίνακα '%-.192s'"
hun "Irasi hiba, duplikalt kulcs a '%-.192s' tablaban."
ita "Scrittura impossibile: chiave duplicata nella tabella '%-.192s'"
- jpn "table '%-.192s' に key が重複していて書きこめません"
+ jpn "書き込めません。表 '%-.192s' に重複するキーがあります。"
kor "기록할 수 없읍니다., 테이블 '%-.192s'에서 중복 키"
nor "Kan ikke skrive, flere like nøkler i tabellen '%-.192s'"
norwegian-ny "Kan ikkje skrive, flere like nyklar i tabellen '%-.192s'"
@@ -523,7 +507,7 @@ ER_DUP_KEY 23000
swe "Kan inte skriva, dubbel söknyckel i register '%-.192s'"
ukr "Не можу записати, дублюючийся ключ в таблиці '%-.192s'"
ER_ERROR_ON_CLOSE
- cze "Chyba p-Bři zavírání '%-.192s' (chybový kód: %M)"
+ cze "Chyba při zavírání '%-.192s' (chybový kód: %M)"
dan "Fejl ved lukning af '%-.192s' (Fejlkode: %M)"
nla "Fout bij het sluiten van '%-.192s' (Errcode: %M)"
eng "Error on close of '%-.192s' (errno: %M)"
@@ -533,6 +517,7 @@ ER_ERROR_ON_CLOSE
greek "Παρουσιάστηκε πρόβλημα κλείνοντας το '%-.192s' (κωδικός λάθους: %M)"
hun "Hiba a(z) '%-.192s' zarasakor. (hibakod: %M)"
ita "Errore durante la chiusura di '%-.192s' (errno: %M)"
+ jpn "'%-.192s' のクローズ時エラー (エラー番号: %M)"
kor "'%-.192s'닫는 중 에러 (에러번호: %M)"
nor "Feil ved lukking av '%-.192s' (Feilkode: %M)"
norwegian-ny "Feil ved lukking av '%-.192s' (Feilkode: %M)"
@@ -546,18 +531,17 @@ ER_ERROR_ON_CLOSE
swe "Fick fel vid stängning av '%-.192s' (Felkod: %M)"
ukr "Не можу закрити '%-.192s' (помилка: %M)"
ER_ERROR_ON_READ
- cze "Chyba p-Bři čtení souboru '%-.200s' (chybový kód: %M)"
+ cze "Chyba při čtení souboru '%-.200s' (chybový kód: %M)"
dan "Fejl ved læsning af '%-.200s' (Fejlkode: %M)"
nla "Fout bij het lezen van file '%-.200s' (Errcode: %M)"
eng "Error reading file '%-.200s' (errno: %M)"
- jps "'%-.200s' ファイルの読み込みエラー (errno: %M)",
est "Viga faili '%-.200s' lugemisel (veakood: %M)"
fre "Erreur en lecture du fichier '%-.200s' (Errcode: %M)"
ger "Fehler beim Lesen der Datei '%-.200s' (Fehler: %M)"
greek "Πρόβλημα κατά την ανάγνωση του αρχείου '%-.200s' (κωδικός λάθους: %M)"
hun "Hiba a '%-.200s'file olvasasakor. (hibakod: %M)"
ita "Errore durante la lettura del file '%-.200s' (errno: %M)"
- jpn "'%-.200s' ファイルの読み込みエラー (errno: %M)"
+ jpn "ファイル '%-.200s' の読み込みエラー (エラー番号: %M)"
kor "'%-.200s'화일 읽기 에러 (에러번호: %M)"
nor "Feil ved lesing av '%-.200s' (Feilkode: %M)"
norwegian-ny "Feil ved lesing av '%-.200s' (Feilkode: %M)"
@@ -571,18 +555,17 @@ ER_ERROR_ON_READ
swe "Fick fel vid läsning av '%-.200s' (Felkod %M)"
ukr "Не можу прочитати файл '%-.200s' (помилка: %M)"
ER_ERROR_ON_RENAME
- cze "Chyba p-Bři přejmenování '%-.210s' na '%-.210s' (chybový kód: %M)"
+ cze "Chyba při přejmenování '%-.210s' na '%-.210s' (chybový kód: %M)"
dan "Fejl ved omdøbning af '%-.210s' til '%-.210s' (Fejlkode: %M)"
nla "Fout bij het hernoemen van '%-.210s' naar '%-.210s' (Errcode: %M)"
eng "Error on rename of '%-.210s' to '%-.210s' (errno: %M)"
- jps "'%-.210s' を '%-.210s' に rename できません (errno: %M)",
est "Viga faili '%-.210s' ümbernimetamisel '%-.210s'-ks (veakood: %M)"
fre "Erreur en renommant '%-.210s' en '%-.210s' (Errcode: %M)"
ger "Fehler beim Umbenennen von '%-.210s' in '%-.210s' (Fehler: %M)"
greek "Πρόβλημα κατά την μετονομασία του αρχείου '%-.210s' to '%-.210s' (κωδικός λάθους: %M)"
hun "Hiba a '%-.210s' file atnevezesekor '%-.210s'. (hibakod: %M)"
ita "Errore durante la rinominazione da '%-.210s' a '%-.210s' (errno: %M)"
- jpn "'%-.210s' を '%-.210s' に rename できません (errno: %M)"
+ jpn "'%-.210s' の名前を '%-.210s' に変更できません (エラー番号: %M)"
kor "'%-.210s'를 '%-.210s'로 이름 변경중 에러 (에러번호: %M)"
nor "Feil ved omdøping av '%-.210s' til '%-.210s' (Feilkode: %M)"
norwegian-ny "Feil ved omdøyping av '%-.210s' til '%-.210s' (Feilkode: %M)"
@@ -596,18 +579,17 @@ ER_ERROR_ON_RENAME
swe "Kan inte byta namn från '%-.210s' till '%-.210s' (Felkod: %M)"
ukr "Не можу перейменувати '%-.210s' у '%-.210s' (помилка: %M)"
ER_ERROR_ON_WRITE
- cze "Chyba p-Bři zápisu do souboru '%-.200s' (chybový kód: %M)"
+ cze "Chyba při zápisu do souboru '%-.200s' (chybový kód: %M)"
dan "Fejl ved skriving av filen '%-.200s' (Fejlkode: %M)"
nla "Fout bij het wegschrijven van file '%-.200s' (Errcode: %M)"
eng "Error writing file '%-.200s' (errno: %M)"
- jps "'%-.200s' ファイルを書く事ができません (errno: %M)",
est "Viga faili '%-.200s' kirjutamisel (veakood: %M)"
fre "Erreur d'écriture du fichier '%-.200s' (Errcode: %M)"
ger "Fehler beim Speichern der Datei '%-.200s' (Fehler: %M)"
greek "Πρόβλημα κατά την αποθήκευση του αρχείου '%-.200s' (κωδικός λάθους: %M)"
hun "Hiba a '%-.200s' file irasakor. (hibakod: %M)"
ita "Errore durante la scrittura del file '%-.200s' (errno: %M)"
- jpn "'%-.200s' ファイルを書く事ができません (errno: %M)"
+ jpn "ファイル '%-.200s' の書き込みエラー (エラー番号: %M)"
kor "'%-.200s'화일 기록 중 에러 (에러번호: %M)"
nor "Feil ved skriving av fila '%-.200s' (Feilkode: %M)"
norwegian-ny "Feil ved skriving av fila '%-.200s' (Feilkode: %M)"
@@ -621,18 +603,17 @@ ER_ERROR_ON_WRITE
swe "Fick fel vid skrivning till '%-.200s' (Felkod %M)"
ukr "Не можу записати файл '%-.200s' (помилка: %M)"
ER_FILE_USED
- cze "'%-.192s' je zam-Bčen proti změnám"
+ cze "'%-.192s' je zamčen proti změnám"
dan "'%-.192s' er låst mod opdateringer"
nla "'%-.192s' is geblokeerd tegen veranderingen"
eng "'%-.192s' is locked against change"
- jps "'%-.192s' はロックされています",
est "'%-.192s' on lukustatud muudatuste vastu"
fre "'%-.192s' est verrouillé contre les modifications"
ger "'%-.192s' ist für Änderungen gesperrt"
greek "'%-.192s' δεν επιτρέπονται αλλαγές"
hun "'%-.192s' a valtoztatas ellen zarolva"
ita "'%-.192s' e` soggetto a lock contro i cambiamenti"
- jpn "'%-.192s' はロックされています"
+ jpn "'%-.192s' はロックされています。"
kor "'%-.192s'가 변경할 수 없도록 잠겨있읍니다."
nor "'%-.192s' er låst mot oppdateringer"
norwegian-ny "'%-.192s' er låst mot oppdateringar"
@@ -646,18 +627,17 @@ ER_FILE_USED
swe "'%-.192s' är låst mot användning"
ukr "'%-.192s' заблокований на внесення змін"
ER_FILSORT_ABORT
- cze "T-Břídění přerušeno"
+ cze "Třídění přerušeno"
dan "Sortering afbrudt"
nla "Sorteren afgebroken"
eng "Sort aborted"
- jps "Sort 中断",
est "Sorteerimine katkestatud"
fre "Tri alphabétique abandonné"
ger "Sortiervorgang abgebrochen"
greek "Η διαδικασία ταξινόμισης ακυρώθηκε"
hun "Sikertelen rendezes"
ita "Operazione di ordinamento abbandonata"
- jpn "Sort 中断"
+ jpn "ソート処理を中断しました。"
kor "소트가 중단되었습니다."
nor "Sortering avbrutt"
norwegian-ny "Sortering avbrote"
@@ -675,14 +655,13 @@ ER_FORM_NOT_FOUND
dan "View '%-.192s' eksisterer ikke for '%-.192s'"
nla "View '%-.192s' bestaat niet voor '%-.192s'"
eng "View '%-.192s' doesn't exist for '%-.192s'"
- jps "View '%-.192s' が '%-.192s' に定義されていません",
est "Vaade '%-.192s' ei eksisteeri '%-.192s' jaoks"
fre "La vue (View) '%-.192s' n'existe pas pour '%-.192s'"
ger "View '%-.192s' existiert für '%-.192s' nicht"
greek "Το View '%-.192s' δεν υπάρχει για '%-.192s'"
hun "A(z) '%-.192s' nezet nem letezik a(z) '%-.192s'-hoz"
ita "La view '%-.192s' non esiste per '%-.192s'"
- jpn "View '%-.192s' が '%-.192s' に定義されていません"
+ jpn "ビュー '%-.192s' は '%-.192s' に存在しません。"
kor "뷰 '%-.192s'가 '%-.192s'에서는 존재하지 않읍니다."
nor "View '%-.192s' eksisterer ikke for '%-.192s'"
norwegian-ny "View '%-.192s' eksisterar ikkje for '%-.192s'"
@@ -696,7 +675,7 @@ ER_FORM_NOT_FOUND
swe "Formulär '%-.192s' finns inte i '%-.192s'"
ukr "Вигляд '%-.192s' не існує для '%-.192s'"
ER_GET_ERRNO
- cze "Obsluha tabulky vr-Bátila chybu %M"
+ cze "Obsluha tabulky vrátila chybu %M"
dan "Modtog fejl %M fra tabel håndteringen"
nla "Fout %M van tabel handler"
eng "Got error %M from storage engine"
@@ -706,7 +685,7 @@ ER_GET_ERRNO
greek "Ελήφθη μήνυμα λάθους %M από τον χειριστή πίνακα (table handler)"
hun "%M hibajelzes a tablakezelotol"
ita "Rilevato l'errore %M dal gestore delle tabelle"
- jpn "Got error %M from table handler"
+ jpn "ストレージエンジンがエラー %M を返しました。"
kor "테이블 handler에서 %M 에러가 발생 하였습니다."
nor "Mottok feil %M fra tabell håndterer"
norwegian-ny "Mottok feil %M fra tabell handterar"
@@ -720,7 +699,7 @@ ER_GET_ERRNO
swe "Fick felkod %M från databashanteraren"
ukr "Отримано помилку %M від дескриптора таблиці"
ER_ILLEGAL_HA
- cze "Obsluha tabulky '%-.192s' nem-Bá tento parametr"
+ cze "Obsluha tabulky '%-.192s' nemá tento parametr"
dan "Denne mulighed eksisterer ikke for tabeltypen '%-.192s'"
nla "Tabel handler voor '%-.192s' heeft deze optie niet"
eng "Table storage engine for '%-.192s' doesn't have this option"
@@ -730,7 +709,7 @@ ER_ILLEGAL_HA
greek "Ο χειριστής πίνακα (table handler) για '%-.192s' δεν διαθέτει αυτή την επιλογή"
hun "A(z) '%-.192s' tablakezelonek nincs ilyen opcioja"
ita "Il gestore delle tabelle per '%-.192s' non ha questa opzione"
- jpn "Table handler for '%-.192s' doesn't have this option"
+ jpn "表 '%-.192s' のストレージエンジンでは提供されないオプションです。"
kor "'%-.192s'의 테이블 handler는 이러한 옵션을 제공하지 않읍니다."
nor "Tabell håndtereren for '%-.192s' har ikke denne muligheten"
norwegian-ny "Tabell håndteraren for '%-.192s' har ikkje denne moglegheita"
@@ -744,18 +723,17 @@ ER_ILLEGAL_HA
swe "Tabellhanteraren for tabell '%-.192s' stödjer ej detta"
ukr "Дескриптор таблиці '%-.192s' не має цієї властивості"
ER_KEY_NOT_FOUND
- cze "Nemohu naj-Bít záznam v '%-.192s'"
+ cze "Nemohu najít záznam v '%-.192s'"
dan "Kan ikke finde posten i '%-.192s'"
nla "Kan record niet vinden in '%-.192s'"
eng "Can't find record in '%-.192s'"
- jps "'%-.192s'のなかにレコードが見付かりません",
est "Ei suuda leida kirjet '%-.192s'-s"
fre "Ne peut trouver l'enregistrement dans '%-.192s'"
ger "Kann Datensatz in '%-.192s' nicht finden"
greek "Αδύνατη η ανεύρεση εγγραφής στο '%-.192s'"
hun "Nem talalhato a rekord '%-.192s'-ben"
ita "Impossibile trovare il record in '%-.192s'"
- jpn "'%-.192s'のなかにレコードが見付かりません"
+ jpn "'%-.192s' にレコードが見つかりません。"
kor "'%-.192s'에서 레코드를 찾을 수 없읍니다."
nor "Kan ikke finne posten i '%-.192s'"
norwegian-ny "Kan ikkje finne posten i '%-.192s'"
@@ -769,18 +747,17 @@ ER_KEY_NOT_FOUND
swe "Hittar inte posten '%-.192s'"
ukr "Не можу записати у '%-.192s'"
ER_NOT_FORM_FILE
- cze "Nespr-Bávná informace v souboru '%-.200s'"
+ cze "Nesprávná informace v souboru '%-.200s'"
dan "Forkert indhold i: '%-.200s'"
nla "Verkeerde info in file: '%-.200s'"
eng "Incorrect information in file: '%-.200s'"
- jps "ファイル '%-.200s' の info が間違っているようです",
est "Vigane informatsioon failis '%-.200s'"
fre "Information erronnée dans le fichier: '%-.200s'"
ger "Falsche Information in Datei '%-.200s'"
greek "Λάθος πληροφορίες στο αρχείο: '%-.200s'"
hun "Ervenytelen info a file-ban: '%-.200s'"
ita "Informazione errata nel file: '%-.200s'"
- jpn "ファイル '%-.200s' の info が間違っているようです"
+ jpn "ファイル '%-.200s' 内の情報が不正です。"
kor "화일의 부정확한 정보: '%-.200s'"
nor "Feil informasjon i filen: '%-.200s'"
norwegian-ny "Feil informasjon i fila: '%-.200s'"
@@ -794,18 +771,17 @@ ER_NOT_FORM_FILE
swe "Felaktig fil: '%-.200s'"
ukr "Хибна інформація у файлі: '%-.200s'"
ER_NOT_KEYFILE
- cze "Nespr-Bávný klíč pro tabulku '%-.200s'; pokuste se ho opravit"
+ cze "Nesprávný klíč pro tabulku '%-.200s'; pokuste se ho opravit"
dan "Fejl i indeksfilen til tabellen '%-.200s'; prøv at reparere den"
nla "Verkeerde zoeksleutel file voor tabel: '%-.200s'; probeer het te repareren"
eng "Incorrect key file for table '%-.200s'; try to repair it"
- jps "'%-.200s' テーブルの key file が間違っているようです. 修復をしてください",
est "Tabeli '%-.200s' võtmefail on vigane; proovi seda parandada"
fre "Index corrompu dans la table: '%-.200s'; essayez de le réparer"
ger "Fehlerhafte Index-Datei für Tabelle '%-.200s'; versuche zu reparieren"
greek "Λάθος αρχείο ταξινόμισης (key file) για τον πίνακα: '%-.200s'; Παρακαλώ, διορθώστε το!"
hun "Ervenytelen kulcsfile a tablahoz: '%-.200s'; probalja kijavitani!"
ita "File chiave errato per la tabella : '%-.200s'; prova a riparalo"
- jpn "'%-.200s' テーブルの key file が間違っているようです. 修復をしてください"
+ jpn "表 '%-.200s' の索引ファイル(key file)の内容が不正です。修復を試行してください。"
kor "'%-.200s' 테이블의 부정확한 키 존재. 수정하시오!"
nor "Tabellen '%-.200s' har feil i nøkkelfilen; forsøk å reparer den"
norwegian-ny "Tabellen '%-.200s' har feil i nykkelfila; prøv å reparere den"
@@ -819,18 +795,17 @@ ER_NOT_KEYFILE
swe "Fatalt fel vid hantering av register '%-.200s'; kör en reparation"
ukr "Хибний файл ключей для таблиці: '%-.200s'; Спробуйте його відновити"
ER_OLD_KEYFILE
- cze "Star-Bý klíčový soubor pro '%-.192s'; opravte ho."
+ cze "Starý klíčový soubor pro '%-.192s'; opravte ho."
dan "Gammel indeksfil for tabellen '%-.192s'; reparer den"
nla "Oude zoeksleutel file voor tabel '%-.192s'; repareer het!"
eng "Old key file for table '%-.192s'; repair it!"
- jps "'%-.192s' テーブルは古い形式の key file のようです; 修復をしてください",
est "Tabeli '%-.192s' võtmefail on aegunud; paranda see!"
fre "Vieux fichier d'index pour la table '%-.192s'; réparez le!"
ger "Alte Index-Datei für Tabelle '%-.192s'. Bitte reparieren"
greek "Παλαιό αρχείο ταξινόμισης (key file) για τον πίνακα '%-.192s'; Παρακαλώ, διορθώστε το!"
hun "Regi kulcsfile a '%-.192s'tablahoz; probalja kijavitani!"
ita "File chiave vecchio per la tabella '%-.192s'; riparalo!"
- jpn "'%-.192s' テーブルは古い形式の key file のようです; 修復をしてください"
+ jpn "表 '%-.192s' の索引ファイル(key file)は古い形式です。修復してください。"
kor "'%-.192s' 테이블의 이전버젼의 키 존재. 수정하시오!"
nor "Gammel nøkkelfil for tabellen '%-.192s'; reparer den!"
norwegian-ny "Gammel nykkelfil for tabellen '%-.192s'; reparer den!"
@@ -844,18 +819,17 @@ ER_OLD_KEYFILE
swe "Gammal nyckelfil '%-.192s'; reparera registret"
ukr "Старий файл ключей для таблиці '%-.192s'; Відновіть його!"
ER_OPEN_AS_READONLY
- cze "'%-.192s' je jen pro -Bčtení"
+ cze "'%-.192s' je jen pro čtení"
dan "'%-.192s' er skrivebeskyttet"
nla "'%-.192s' is alleen leesbaar"
eng "Table '%-.192s' is read only"
- jps "'%-.192s' は読み込み専用です",
est "Tabel '%-.192s' on ainult lugemiseks"
fre "'%-.192s' est en lecture seulement"
ger "Tabelle '%-.192s' ist nur lesbar"
greek "'%-.192s' επιτρέπεται μόνο η ανάγνωση"
hun "'%-.192s' irasvedett"
ita "'%-.192s' e` di sola lettura"
- jpn "'%-.192s' は読み込み専用です"
+ jpn "表 '%-.192s' は読み込み専用です。"
kor "테이블 '%-.192s'는 읽기전용 입니다."
nor "'%-.192s' er skrivebeskyttet"
norwegian-ny "'%-.192s' er skrivetryggja"
@@ -869,18 +843,17 @@ ER_OPEN_AS_READONLY
swe "'%-.192s' är skyddad mot förändring"
ukr "Таблиця '%-.192s' тільки для читання"
ER_OUTOFMEMORY HY001 S1001
- cze "M-Bálo paměti. Přestartujte daemona a zkuste znovu (je potřeba %d bytů)"
+ cze "Málo paměti. Přestartujte daemona a zkuste znovu (je potřeba %d bytů)"
dan "Ikke mere hukommelse. Genstart serveren og prøv igen (mangler %d bytes)"
nla "Geen geheugen meer. Herstart server en probeer opnieuw (%d bytes nodig)"
eng "Out of memory; restart server and try again (needed %d bytes)"
- jps "Out of memory. デーモンをリスタートしてみてください (%d bytes 必要)",
est "Mälu sai otsa. Proovi MariaDB uuesti käivitada (puudu jäi %d baiti)"
fre "Manque de mémoire. Redémarrez le démon et ré-essayez (%d octets nécessaires)"
ger "Kein Speicher vorhanden (%d Bytes benötigt). Bitte Server neu starten"
greek "Δεν υπάρχει διαθέσιμη μνήμη. Προσπαθήστε πάλι, επανεκινώντας τη διαδικασία (demon) (χρειάζονται %d bytes)"
hun "Nincs eleg memoria. Inditsa ujra a demont, es probalja ismet. (%d byte szukseges.)"
ita "Memoria esaurita. Fai ripartire il demone e riprova (richiesti %d bytes)"
- jpn "Out of memory. デーモンをリスタートしてみてください (%d bytes 必要)"
+ jpn "メモリが不足しています。サーバーを再起動してみてください。(%d バイトの割り当てに失敗)"
kor "Out of memory. 데몬을 재 실행 후 다시 시작하시오 (needed %d bytes)"
nor "Ikke mer minne. Star på nytt tjenesten og prøv igjen (trengte %d byter)"
norwegian-ny "Ikkje meir minne. Start på nytt tenesten og prøv igjen (trengte %d bytar)"
@@ -894,18 +867,17 @@ ER_OUTOFMEMORY HY001 S1001
swe "Oväntat slut på minnet, starta om programmet och försök på nytt (Behövde %d bytes)"
ukr "Брак пам'яті. Рестартуйте сервер та спробуйте знову (потрібно %d байтів)"
ER_OUT_OF_SORTMEMORY HY001 S1001
- cze "M-Bálo paměti pro třídění. Zvyšte velikost třídícího bufferu"
+ cze "Málo paměti pro třídění. Zvyšte velikost třídícího bufferu"
dan "Ikke mere sorteringshukommelse. Øg sorteringshukommelse (sort buffer size) for serveren"
nla "Geen geheugen om te sorteren. Verhoog de server sort buffer size"
eng "Out of sort memory, consider increasing server sort buffer size"
- jps "Out of sort memory. sort buffer size が足りないようです.",
est "Mälu sai sorteerimisel otsa. Suurenda MariaDB-i sorteerimispuhvrit"
fre "Manque de mémoire pour le tri. Augmentez-la."
ger "Kein Speicher zum Sortieren vorhanden. sort_buffer_size sollte im Server erhöht werden"
greek "Δεν υπάρχει διαθέσιμη μνήμη για ταξινόμιση. Αυξήστε το sort buffer size για τη διαδικασία (demon)"
hun "Nincs eleg memoria a rendezeshez. Novelje a rendezo demon puffermeretet"
ita "Memoria per gli ordinamenti esaurita. Incrementare il 'sort_buffer' al demone"
- jpn "Out of sort memory. sort buffer size が足りないようです."
+ jpn "ソートメモリが不足しています。ソートバッファサイズ(sort buffer size)の増加を検討してください。"
kor "Out of sort memory. daemon sort buffer의 크기를 증가시키세요"
nor "Ikke mer sorteringsminne. Vurder å øke sorteringsminnet (sort buffer size) for tjenesten"
norwegian-ny "Ikkje meir sorteringsminne. Vurder å auke sorteringsminnet (sorteringsbuffer storleik) for tenesten"
@@ -919,18 +891,17 @@ ER_OUT_OF_SORTMEMORY HY001 S1001
swe "Sorteringsbufferten räcker inte till. Kontrollera startparametrarna"
ukr "Брак пам'яті для сортування. Треба збільшити розмір буфера сортування у сервера"
ER_UNEXPECTED_EOF
- cze "Neo-Bčekávaný konec souboru při čtení '%-.192s' (chybový kód: %M)"
+ cze "Neočekávaný konec souboru při čtení '%-.192s' (chybový kód: %M)"
dan "Uventet afslutning på fil (eof) ved læsning af filen '%-.192s' (Fejlkode: %M)"
nla "Onverwachte eof gevonden tijdens het lezen van file '%-.192s' (Errcode: %M)"
eng "Unexpected EOF found when reading file '%-.192s' (errno: %M)"
- jps "'%-.192s' ファイルを読み込み中に EOF が予期せぬ所で現れました. (errno: %M)",
est "Ootamatu faililõpumärgend faili '%-.192s' lugemisel (veakood: %M)"
fre "Fin de fichier inattendue en lisant '%-.192s' (Errcode: %M)"
ger "Unerwartetes Ende beim Lesen der Datei '%-.192s' (Fehler: %M)"
greek "Κατά τη διάρκεια της ανάγνωσης, βρέθηκε απροσδόκητα το τέλος του αρχείου '%-.192s' (κωδικός λάθους: %M)"
hun "Varatlan filevege-jel a '%-.192s'olvasasakor. (hibakod: %M)"
ita "Fine del file inaspettata durante la lettura del file '%-.192s' (errno: %M)"
- jpn "'%-.192s' ファイルを読み込み中に EOF が予期せぬ所で現れました. (errno: %M)"
+ jpn "ファイル '%-.192s' を読み込み中に予期せずファイルの終端に達しました。(エラー番号: %M)"
kor "'%-.192s' 화일을 읽는 도중 잘못된 eof을 발견 (에러번호: %M)"
nor "Uventet slutt på fil (eof) ved lesing av filen '%-.192s' (Feilkode: %M)"
norwegian-ny "Uventa slutt på fil (eof) ved lesing av fila '%-.192s' (Feilkode: %M)"
@@ -944,18 +915,17 @@ ER_UNEXPECTED_EOF
swe "Oväntat filslut vid läsning från '%-.192s' (Felkod: %M)"
ukr "Хибний кінець файлу '%-.192s' (помилка: %M)"
ER_CON_COUNT_ERROR 08004
- cze "P-Bříliš mnoho spojení"
+ cze "Příliš mnoho spojení"
dan "For mange forbindelser (connections)"
nla "Te veel verbindingen"
eng "Too many connections"
- jps "接続が多すぎます",
est "Liiga palju samaaegseid ühendusi"
fre "Trop de connexions"
ger "Zu viele Verbindungen"
greek "Υπάρχουν πολλές συνδέσεις..."
hun "Tul sok kapcsolat"
ita "Troppe connessioni"
- jpn "接続が多すぎます"
+ jpn "接続が多すぎます。"
kor "너무 많은 연결... max_connection을 증가 시키시오..."
nor "For mange tilkoblinger (connections)"
norwegian-ny "For mange tilkoplingar (connections)"
@@ -969,18 +939,17 @@ ER_CON_COUNT_ERROR 08004
swe "För många anslutningar"
ukr "Забагато з'єднань"
ER_OUT_OF_RESOURCES
- cze "M-Bálo prostoru/paměti pro thread"
+ cze "Málo prostoru/paměti pro thread"
dan "Udgået for tråde/hukommelse"
nla "Geen thread geheugen meer; controleer of mysqld of andere processen al het beschikbare geheugen gebruikt. Zo niet, dan moet u wellicht 'ulimit' gebruiken om mysqld toe te laten meer geheugen te benutten, of u kunt extra swap ruimte toevoegen"
eng "Out of memory; check if mysqld or some other process uses all available memory; if not, you may have to use 'ulimit' to allow mysqld to use more memory or you can add more swap space"
- jps "Out of memory; mysqld かその他のプロセスがメモリーを全て使っているか確認してください. メモリーを使い切っていない場合、'ulimit' を設定して mysqld のメモリー使用限界量を多くするか、swap space を増やしてみてください",
est "Mälu sai otsa. Võimalik, et aitab swap-i lisamine või käsu 'ulimit' abil MariaDB-le rohkema mälu kasutamise lubamine"
fre "Manque de 'threads'/mémoire"
ger "Kein Speicher mehr vorhanden. Prüfen Sie, ob mysqld oder ein anderer Prozess den gesamten Speicher verbraucht. Wenn nicht, sollten Sie mit 'ulimit' dafür sorgen, dass mysqld mehr Speicher benutzen darf, oder mehr Swap-Speicher einrichten"
greek "Πρόβλημα με τη διαθέσιμη μνήμη (Out of thread space/memory)"
hun "Elfogyott a thread-memoria"
ita "Fine dello spazio/memoria per i thread"
- jpn "Out of memory; mysqld かその他のプロセスがメモリーを全て使っているか確認してください. メモリーを使い切っていない場合、'ulimit' を設定して mysqld のメモリー使用限界量を多くするか、swap space を増やしてみてください"
+ jpn "メモリが不足しています。mysqld やその他のプロセスがメモリーを使い切っていないか確認して下さい。メモリーを使い切っていない場合、'ulimit'の設定等で mysqld のメモリー使用最大量を多くするか、スワップ領域を増やす必要があるかもしれません。"
# This message failed to convert from euc-kr, skipped
nor "Tomt for tråd plass/minne"
norwegian-ny "Tomt for tråd plass/minne"
@@ -994,18 +963,17 @@ ER_OUT_OF_RESOURCES
swe "Fick slut på minnet. Kontrollera om mysqld eller någon annan process använder allt tillgängligt minne. Om inte, försök använda 'ulimit' eller allokera mera swap"
ukr "Брак пам'яті; Перевірте чи mysqld або якісь інші процеси використовують усю доступну пам'ять. Як ні, то ви можете скористатися 'ulimit', аби дозволити mysqld використовувати більше пам'яті або ви можете додати більше місця під свап"
ER_BAD_HOST_ERROR 08S01
- cze "Nemohu zjistit jm-Béno stroje pro Vaši adresu"
+ cze "Nemohu zjistit jméno stroje pro Vaši adresu"
dan "Kan ikke få værtsnavn for din adresse"
nla "Kan de hostname niet krijgen van uw adres"
eng "Can't get hostname for your address"
- jps "その address の hostname が引けません.",
est "Ei suuda lahendada IP aadressi masina nimeks"
fre "Ne peut obtenir de hostname pour votre adresse"
ger "Kann Hostnamen für diese Adresse nicht erhalten"
greek "Δεν έγινε γνωστό το hostname για την address σας"
hun "A gepnev nem allapithato meg a cimbol"
ita "Impossibile risalire al nome dell'host dall'indirizzo (risoluzione inversa)"
- jpn "その address の hostname が引けません."
+ jpn "IPアドレスからホスト名を解決できません。"
kor "당신의 컴퓨터의 호스트이름을 얻을 수 없읍니다."
nor "Kan ikke få tak i vertsnavn for din adresse"
norwegian-ny "Kan ikkje få tak i vertsnavn for di adresse"
@@ -1019,7 +987,7 @@ ER_BAD_HOST_ERROR 08S01
swe "Kan inte hitta 'hostname' för din adress"
ukr "Не можу визначити ім'я хосту для вашої адреси"
ER_HANDSHAKE_ERROR 08S01
- cze "Chyba p-Bři ustavování spojení"
+ cze "Chyba při ustavování spojení"
dan "Forkert håndtryk (handshake)"
nla "Verkeerde handshake"
eng "Bad handshake"
@@ -1029,6 +997,7 @@ ER_HANDSHAKE_ERROR 08S01
greek "Η αναγνώριση (handshake) δεν έγινε σωστά"
hun "A kapcsolatfelvetel nem sikerult (Bad handshake)"
ita "Negoziazione impossibile"
+ jpn "ハンドシェイクエラー"
nor "Feil håndtrykk (handshake)"
norwegian-ny "Feil handtrykk (handshake)"
pol "Zły uchwyt(handshake)"
@@ -1041,18 +1010,17 @@ ER_HANDSHAKE_ERROR 08S01
swe "Fel vid initiering av kommunikationen med klienten"
ukr "Невірна установка зв'язку"
ER_DBACCESS_DENIED_ERROR 42000
- cze "P-Břístup pro uživatele '%-.48s'@'%-.64s' k databázi '%-.192s' není povolen"
+ cze "Přístup pro uživatele '%-.48s'@'%-.64s' k databázi '%-.192s' není povolen"
dan "Adgang nægtet bruger: '%-.48s'@'%-.64s' til databasen '%-.192s'"
nla "Toegang geweigerd voor gebruiker: '%-.48s'@'%-.64s' naar database '%-.192s'"
eng "Access denied for user '%-.48s'@'%-.64s' to database '%-.192s'"
- jps "ユーザー '%-.48s'@'%-.64s' の '%-.192s' データベースへのアクセスを拒否します",
est "Ligipääs keelatud kasutajale '%-.48s'@'%-.64s' andmebaasile '%-.192s'"
fre "Accès refusé pour l'utilisateur: '%-.48s'@'@%-.64s'. Base '%-.192s'"
ger "Benutzer '%-.48s'@'%-.64s' hat keine Zugriffsberechtigung für Datenbank '%-.192s'"
greek "Δεν επιτέρεται η πρόσβαση στο χρήστη: '%-.48s'@'%-.64s' στη βάση δεδομένων '%-.192s'"
hun "A(z) '%-.48s'@'%-.64s' felhasznalo szamara tiltott eleres az '%-.192s' adabazishoz."
ita "Accesso non consentito per l'utente: '%-.48s'@'%-.64s' al database '%-.192s'"
- jpn "ユーザー '%-.48s'@'%-.64s' の '%-.192s' データベースへのアクセスを拒否します"
+ jpn "ユーザー '%-.48s'@'%-.64s' によるデータベース '%-.192s' へのアクセスは拒否されました。"
kor "'%-.48s'@'%-.64s' 사용자는 '%-.192s' 데이타베이스에 접근이 거부 되었습니다."
nor "Tilgang nektet for bruker: '%-.48s'@'%-.64s' til databasen '%-.192s' nektet"
norwegian-ny "Tilgang ikkje tillate for brukar: '%-.48s'@'%-.64s' til databasen '%-.192s' nekta"
@@ -1065,18 +1033,17 @@ ER_DBACCESS_DENIED_ERROR 42000
swe "Användare '%-.48s'@'%-.64s' är ej berättigad att använda databasen %-.192s"
ukr "Доступ заборонено для користувача: '%-.48s'@'%-.64s' до бази данних '%-.192s'"
ER_ACCESS_DENIED_ERROR 28000
- cze "P-Břístup pro uživatele '%-.48s'@'%-.64s' (s heslem %s)"
+ cze "Přístup pro uživatele '%-.48s'@'%-.64s' (s heslem %s)"
dan "Adgang nægtet bruger: '%-.48s'@'%-.64s' (Bruger adgangskode: %s)"
nla "Toegang geweigerd voor gebruiker: '%-.48s'@'%-.64s' (Wachtwoord gebruikt: %s)"
eng "Access denied for user '%-.48s'@'%-.64s' (using password: %s)"
- jps "ユーザー '%-.48s'@'%-.64s' を拒否します.uUsing password: %s)",
est "Ligipääs keelatud kasutajale '%-.48s'@'%-.64s' (kasutab parooli: %s)"
fre "Accès refusé pour l'utilisateur: '%-.48s'@'@%-.64s' (mot de passe: %s)"
ger "Benutzer '%-.48s'@'%-.64s' hat keine Zugriffsberechtigung (verwendetes Passwort: %s)"
greek "Δεν επιτέρεται η πρόσβαση στο χρήστη: '%-.48s'@'%-.64s' (χρήση password: %s)"
hun "A(z) '%-.48s'@'%-.64s' felhasznalo szamara tiltott eleres. (Hasznalja a jelszot: %s)"
ita "Accesso non consentito per l'utente: '%-.48s'@'%-.64s' (Password: %s)"
- jpn "ユーザー '%-.48s'@'%-.64s' を拒否します.uUsing password: %s)"
+ jpn "ユーザー '%-.48s'@'%-.64s' のアクセスは拒否されました。(using password: %s)"
kor "'%-.48s'@'%-.64s' 사용자는 접근이 거부 되었습니다. (using password: %s)"
nor "Tilgang nektet for bruker: '%-.48s'@'%-.64s' (Bruker passord: %s)"
norwegian-ny "Tilgang ikke tillate for brukar: '%-.48s'@'%-.64s' (Brukar passord: %s)"
@@ -1089,18 +1056,17 @@ ER_ACCESS_DENIED_ERROR 28000
swe "Användare '%-.48s'@'%-.64s' är ej berättigad att logga in (Använder lösen: %s)"
ukr "Доступ заборонено для користувача: '%-.48s'@'%-.64s' (Використано пароль: %s)"
ER_NO_DB_ERROR 3D000
- cze "Nebyla vybr-Bána žádná databáze"
+ cze "Nebyla vybrána žádná databáze"
dan "Ingen database valgt"
nla "Geen database geselecteerd"
eng "No database selected"
- jps "データベースが選択されていません.",
est "Andmebaasi ei ole valitud"
fre "Aucune base n'a été sélectionnée"
ger "Keine Datenbank ausgewählt"
greek "Δεν επιλέχθηκε βάση δεδομένων"
hun "Nincs kivalasztott adatbazis"
ita "Nessun database selezionato"
- jpn "データベースが選択されていません."
+ jpn "データベースが選択されていません。"
kor "선택된 데이타베이스가 없습니다."
nor "Ingen database valgt"
norwegian-ny "Ingen database vald"
@@ -1114,18 +1080,17 @@ ER_NO_DB_ERROR 3D000
swe "Ingen databas i användning"
ukr "Базу данних не вибрано"
ER_UNKNOWN_COM_ERROR 08S01
- cze "Nezn-Bámý příkaz"
+ cze "Neznámý příkaz"
dan "Ukendt kommando"
nla "Onbekend commando"
eng "Unknown command"
- jps "そのコマンドは何?",
est "Tundmatu käsk"
fre "Commande inconnue"
ger "Unbekannter Befehl"
greek "Αγνωστη εντολή"
hun "Ervenytelen parancs"
ita "Comando sconosciuto"
- jpn "そのコマンドは何?"
+ jpn "不明なコマンドです。"
kor "명령어가 뭔지 모르겠어요..."
nor "Ukjent kommando"
norwegian-ny "Ukjent kommando"
@@ -1136,21 +1101,20 @@ ER_UNKNOWN_COM_ERROR 08S01
serbian "Nepoznata komanda"
slo "Neznámy príkaz"
spa "Comando desconocido"
- swe "Okänt commando"
+ swe "Okänt kommando"
ukr "Невідома команда"
ER_BAD_NULL_ERROR 23000
- cze "Sloupec '%-.192s' nem-Bůže být null"
+ cze "Sloupec '%-.192s' nemůže být null"
dan "Kolonne '%-.192s' kan ikke være NULL"
nla "Kolom '%-.192s' kan niet null zijn"
eng "Column '%-.192s' cannot be null"
- jps "Column '%-.192s' は null にはできないのです",
est "Tulp '%-.192s' ei saa omada nullväärtust"
fre "Le champ '%-.192s' ne peut être vide (null)"
ger "Feld '%-.192s' darf nicht NULL sein"
greek "Το πεδίο '%-.192s' δεν μπορεί να είναι κενό (null)"
hun "A(z) '%-.192s' oszlop erteke nem lehet nulla"
ita "La colonna '%-.192s' non puo` essere nulla"
- jpn "Column '%-.192s' は null にはできないのです"
+ jpn "列 '%-.192s' は null にできません。"
kor "칼럼 '%-.192s'는 널(Null)이 되면 안됩니다. "
nor "Kolonne '%-.192s' kan ikke vere null"
norwegian-ny "Kolonne '%-.192s' kan ikkje vere null"
@@ -1164,18 +1128,17 @@ ER_BAD_NULL_ERROR 23000
swe "Kolumn '%-.192s' får inte vara NULL"
ukr "Стовбець '%-.192s' не може бути нульовим"
ER_BAD_DB_ERROR 42000
- cze "Nezn-Bámá databáze '%-.192s'"
+ cze "Neznámá databáze '%-.192s'"
dan "Ukendt database '%-.192s'"
nla "Onbekende database '%-.192s'"
eng "Unknown database '%-.192s'"
- jps "'%-.192s' なんてデータベースは知りません.",
est "Tundmatu andmebaas '%-.192s'"
fre "Base '%-.192s' inconnue"
ger "Unbekannte Datenbank '%-.192s'"
greek "Αγνωστη βάση δεδομένων '%-.192s'"
hun "Ervenytelen adatbazis: '%-.192s'"
ita "Database '%-.192s' sconosciuto"
- jpn "'%-.192s' なんてデータベースは知りません."
+ jpn "'%-.192s' は不明なデータベースです。"
kor "데이타베이스 '%-.192s'는 알수 없음"
nor "Ukjent database '%-.192s'"
norwegian-ny "Ukjent database '%-.192s'"
@@ -1189,18 +1152,17 @@ ER_BAD_DB_ERROR 42000
swe "Okänd databas: '%-.192s'"
ukr "Невідома база данних '%-.192s'"
ER_TABLE_EXISTS_ERROR 42S01
- cze "Tabulka '%-.192s' ji-Bž existuje"
+ cze "Tabulka '%-.192s' již existuje"
dan "Tabellen '%-.192s' findes allerede"
nla "Tabel '%-.192s' bestaat al"
eng "Table '%-.192s' already exists"
- jps "Table '%-.192s' は既にあります",
est "Tabel '%-.192s' juba eksisteerib"
fre "La table '%-.192s' existe déjà"
ger "Tabelle '%-.192s' bereits vorhanden"
greek "Ο πίνακας '%-.192s' υπάρχει ήδη"
hun "A(z) '%-.192s' tabla mar letezik"
ita "La tabella '%-.192s' esiste gia`"
- jpn "Table '%-.192s' は既にあります"
+ jpn "表 '%-.192s' はすでに存在します。"
kor "테이블 '%-.192s'는 이미 존재함"
nor "Tabellen '%-.192s' eksisterer allerede"
norwegian-ny "Tabellen '%-.192s' eksisterar allereide"
@@ -1214,18 +1176,17 @@ ER_TABLE_EXISTS_ERROR 42S01
swe "Tabellen '%-.192s' finns redan"
ukr "Таблиця '%-.192s' вже існує"
ER_BAD_TABLE_ERROR 42S02
- cze "Nezn-Bámá tabulka '%-.100s'"
+ cze "Neznámá tabulka '%-.100s'"
dan "Ukendt tabel '%-.100s'"
nla "Onbekende tabel '%-.100s'"
eng "Unknown table '%-.100s'"
- jps "table '%-.100s' はありません.",
est "Tundmatu tabel '%-.100s'"
fre "Table '%-.100s' inconnue"
ger "Unbekannte Tabelle '%-.100s'"
greek "Αγνωστος πίνακας '%-.100s'"
hun "Ervenytelen tabla: '%-.100s'"
ita "Tabella '%-.100s' sconosciuta"
- jpn "table '%-.100s' はありません."
+ jpn "'%-.100s' は不明な表です。"
kor "테이블 '%-.100s'는 알수 없음"
nor "Ukjent tabell '%-.100s'"
norwegian-ny "Ukjent tabell '%-.100s'"
@@ -1239,7 +1200,7 @@ ER_BAD_TABLE_ERROR 42S02
swe "Okänd tabell '%-.100s'"
ukr "Невідома таблиця '%-.100s'"
ER_NON_UNIQ_ERROR 23000
- cze "Sloupec '%-.192s' v %-.192s nen-Bí zcela jasný"
+ cze "Sloupec '%-.192s' v %-.192s není zcela jasný"
dan "Felt: '%-.192s' i tabel %-.192s er ikke entydigt"
nla "Kolom: '%-.192s' in %-.192s is niet eenduidig"
eng "Column '%-.192s' in %-.192s is ambiguous"
@@ -1249,7 +1210,7 @@ ER_NON_UNIQ_ERROR 23000
greek "Το πεδίο: '%-.192s' σε %-.192s δεν έχει καθοριστεί"
hun "A(z) '%-.192s' oszlop %-.192s-ben ketertelmu"
ita "Colonna: '%-.192s' di %-.192s e` ambigua"
- jpn "Column: '%-.192s' in %-.192s is ambiguous"
+ jpn "列 '%-.192s' は %-.192s 内で曖昧です。"
kor "칼럼: '%-.192s' in '%-.192s' 이 모호함"
nor "Felt: '%-.192s' i tabell %-.192s er ikke entydig"
norwegian-ny "Kolonne: '%-.192s' i tabell %-.192s er ikkje eintydig"
@@ -1263,18 +1224,17 @@ ER_NON_UNIQ_ERROR 23000
swe "Kolumn '%-.192s' i %-.192s är inte unik"
ukr "Стовбець '%-.192s' у %-.192s визначений неоднозначно"
ER_SERVER_SHUTDOWN 08S01
- cze "Prob-Bíhá ukončování práce serveru"
+ cze "Probíhá ukončování práce serveru"
dan "Database nedlukning er i gang"
nla "Bezig met het stoppen van de server"
eng "Server shutdown in progress"
- jps "Server を shutdown 中...",
est "Serveri seiskamine käib"
fre "Arrêt du serveur en cours"
ger "Der Server wird heruntergefahren"
greek "Εναρξη διαδικασίας αποσύνδεσης του εξυπηρετητή (server shutdown)"
hun "A szerver leallitasa folyamatban"
ita "Shutdown del server in corso"
- jpn "Server を shutdown 中..."
+ jpn "サーバーをシャットダウン中です。"
kor "Server가 셧다운 중입니다."
nor "Database nedkobling er i gang"
norwegian-ny "Tenar nedkopling er i gang"
@@ -1288,18 +1248,17 @@ ER_SERVER_SHUTDOWN 08S01
swe "Servern går nu ned"
ukr "Завершується работа сервера"
ER_BAD_FIELD_ERROR 42S22 S0022
- cze "Nezn-Bámý sloupec '%-.192s' v %-.192s"
+ cze "Neznámý sloupec '%-.192s' v %-.192s"
dan "Ukendt kolonne '%-.192s' i tabel %-.192s"
nla "Onbekende kolom '%-.192s' in %-.192s"
eng "Unknown column '%-.192s' in '%-.192s'"
- jps "'%-.192s' column は '%-.192s' にはありません.",
est "Tundmatu tulp '%-.192s' '%-.192s'-s"
fre "Champ '%-.192s' inconnu dans %-.192s"
ger "Unbekanntes Tabellenfeld '%-.192s' in %-.192s"
greek "Αγνωστο πεδίο '%-.192s' σε '%-.192s'"
hun "A(z) '%-.192s' oszlop ervenytelen '%-.192s'-ben"
ita "Colonna sconosciuta '%-.192s' in '%-.192s'"
- jpn "'%-.192s' column は '%-.192s' にはありません."
+ jpn "列 '%-.192s' は '%-.192s' にはありません。"
kor "Unknown 칼럼 '%-.192s' in '%-.192s'"
nor "Ukjent kolonne '%-.192s' i tabell %-.192s"
norwegian-ny "Ukjent felt '%-.192s' i tabell %-.192s"
@@ -1313,17 +1272,17 @@ ER_BAD_FIELD_ERROR 42S22 S0022
swe "Okänd kolumn '%-.192s' i %-.192s"
ukr "Невідомий стовбець '%-.192s' у '%-.192s'"
ER_WRONG_FIELD_WITH_GROUP 42000 S1009
- cze "Pou-Bžité '%-.192s' nebylo v group by"
+ cze "Použité '%-.192s' nebylo v group by"
dan "Brugte '%-.192s' som ikke var i group by"
nla "Opdracht gebruikt '%-.192s' dat niet in de GROUP BY voorkomt"
eng "'%-.192s' isn't in GROUP BY"
- jps "'%-.192s' isn't in GROUP BY",
est "'%-.192s' puudub GROUP BY klauslis"
fre "'%-.192s' n'est pas dans 'group by'"
ger "'%-.192s' ist nicht in GROUP BY vorhanden"
greek "Χρησιμοποιήθηκε '%-.192s' που δεν υπήρχε στο group by"
hun "Used '%-.192s' with wasn't in group by"
ita "Usato '%-.192s' che non e` nel GROUP BY"
+ jpn "'%-.192s' はGROUP BY句で指定されていません。"
kor "'%-.192s'은 GROUP BY속에 없음"
nor "Brukte '%-.192s' som ikke var i group by"
norwegian-ny "Brukte '%-.192s' som ikkje var i group by"
@@ -1337,7 +1296,7 @@ ER_WRONG_FIELD_WITH_GROUP 42000 S1009
swe "'%-.192s' finns inte i GROUP BY"
ukr "'%-.192s' не є у GROUP BY"
ER_WRONG_GROUP_FIELD 42000 S1009
- cze "Nemohu pou-Bžít group na '%-.192s'"
+ cze "Nemohu použít group na '%-.192s'"
dan "Kan ikke gruppere på '%-.192s'"
nla "Kan '%-.192s' niet groeperen"
eng "Can't group on '%-.192s'"
@@ -1347,6 +1306,7 @@ ER_WRONG_GROUP_FIELD 42000 S1009
greek "Αδύνατη η ομαδοποίηση (group on) '%-.192s'"
hun "A group nem hasznalhato: '%-.192s'"
ita "Impossibile raggruppare per '%-.192s'"
+ jpn "'%-.192s' でのグループ化はできません。"
kor "'%-.192s'를 그룹할 수 없음"
nor "Kan ikke gruppere på '%-.192s'"
norwegian-ny "Kan ikkje gruppere på '%-.192s'"
@@ -1360,7 +1320,7 @@ ER_WRONG_GROUP_FIELD 42000 S1009
swe "Kan inte använda GROUP BY med '%-.192s'"
ukr "Не можу групувати по '%-.192s'"
ER_WRONG_SUM_SELECT 42000 S1009
- cze "P-Bříkaz obsahuje zároveň funkci sum a sloupce"
+ cze "Příkaz obsahuje zároveň funkci sum a sloupce"
dan "Udtrykket har summer (sum) funktioner og kolonner i samme udtryk"
nla "Opdracht heeft totaliseer functies en kolommen in dezelfde opdracht"
eng "Statement has sum functions and columns in same statement"
@@ -1369,6 +1329,7 @@ ER_WRONG_SUM_SELECT 42000 S1009
ger "Die Verwendung von Summierungsfunktionen und Spalten im selben Befehl ist nicht erlaubt"
greek "Η διατύπωση περιέχει sum functions και columns στην ίδια διατύπωση"
ita "Il comando ha una funzione SUM e una colonna non specificata nella GROUP BY"
+ jpn "集計関数と通常の列が同時に指定されています。"
kor "Statement 가 sum기능을 동작중이고 칼럼도 동일한 statement입니다."
nor "Uttrykket har summer (sum) funksjoner og kolonner i samme uttrykk"
norwegian-ny "Uttrykket har summer (sum) funksjoner og kolonner i same uttrykk"
@@ -1382,7 +1343,7 @@ ER_WRONG_SUM_SELECT 42000 S1009
swe "Kommandot har både sum functions och enkla funktioner"
ukr "У виразі використано підсумовуючі функції поряд з іменами стовбців"
ER_WRONG_VALUE_COUNT 21S01
- cze "Po-Bčet sloupců neodpovídá zadané hodnotě"
+ cze "Počet sloupců neodpovídá zadané hodnotě"
dan "Kolonne tæller stemmer ikke med antallet af værdier"
nla "Het aantal kolommen komt niet overeen met het aantal opgegeven waardes"
eng "Column count doesn't match value count"
@@ -1391,6 +1352,7 @@ ER_WRONG_VALUE_COUNT 21S01
greek "Το Column count δεν ταιριάζει με το value count"
hun "Az oszlopban levo ertek nem egyezik meg a szamitott ertekkel"
ita "Il numero delle colonne non e` uguale al numero dei valori"
+ jpn "列数が値の個数と一致しません。"
kor "칼럼의 카운트가 값의 카운트와 일치하지 않습니다."
nor "Felt telling stemmer verdi telling"
norwegian-ny "Kolonne telling stemmer verdi telling"
@@ -1404,18 +1366,17 @@ ER_WRONG_VALUE_COUNT 21S01
swe "Antalet kolumner motsvarar inte antalet värden"
ukr "Кількість стовбців не співпадає з кількістю значень"
ER_TOO_LONG_IDENT 42000 S1009
- cze "Jm-Béno identifikátoru '%-.100s' je příliš dlouhé"
+ cze "Jméno identifikátoru '%-.100s' je příliš dlouhé"
dan "Navnet '%-.100s' er for langt"
nla "Naam voor herkenning '%-.100s' is te lang"
eng "Identifier name '%-.100s' is too long"
- jps "Identifier name '%-.100s' は長すぎます",
est "Identifikaatori '%-.100s' nimi on liiga pikk"
fre "Le nom de l'identificateur '%-.100s' est trop long"
ger "Name des Bezeichners '%-.100s' ist zu lang"
greek "Το identifier name '%-.100s' είναι πολύ μεγάλο"
hun "A(z) '%-.100s' azonositonev tul hosszu."
ita "Il nome dell'identificatore '%-.100s' e` troppo lungo"
- jpn "Identifier name '%-.100s' は長すぎます"
+ jpn "識別子名 '%-.100s' は長すぎます。"
kor "Identifier '%-.100s'는 너무 길군요."
nor "Identifikator '%-.100s' er for lang"
norwegian-ny "Identifikator '%-.100s' er for lang"
@@ -1429,18 +1390,17 @@ ER_TOO_LONG_IDENT 42000 S1009
swe "Kolumnnamn '%-.100s' är för långt"
ukr "Ім'я ідентифікатора '%-.100s' задовге"
ER_DUP_FIELDNAME 42S21 S1009
- cze "Zdvojen-Bé jméno sloupce '%-.192s'"
+ cze "Zdvojené jméno sloupce '%-.192s'"
dan "Feltnavnet '%-.192s' findes allerede"
nla "Dubbele kolom naam '%-.192s'"
eng "Duplicate column name '%-.192s'"
- jps "'%-.192s' という column 名は重複してます",
est "Kattuv tulba nimi '%-.192s'"
fre "Nom du champ '%-.192s' déjà utilisé"
ger "Doppelter Spaltenname: '%-.192s'"
greek "Επανάληψη column name '%-.192s'"
hun "Duplikalt oszlopazonosito: '%-.192s'"
ita "Nome colonna duplicato '%-.192s'"
- jpn "'%-.192s' という column 名は重複してます"
+ jpn "列名 '%-.192s' は重複してます。"
kor "중복된 칼럼 이름: '%-.192s'"
nor "Feltnavnet '%-.192s' eksisterte fra før"
norwegian-ny "Feltnamnet '%-.192s' eksisterte frå før"
@@ -1454,18 +1414,17 @@ ER_DUP_FIELDNAME 42S21 S1009
swe "Kolumnnamn '%-.192s finns flera gånger"
ukr "Дублююче ім'я стовбця '%-.192s'"
ER_DUP_KEYNAME 42000 S1009
- cze "Zdvojen-Bé jméno klíče '%-.192s'"
+ cze "Zdvojené jméno klíče '%-.192s'"
dan "Indeksnavnet '%-.192s' findes allerede"
nla "Dubbele zoeksleutel naam '%-.192s'"
eng "Duplicate key name '%-.192s'"
- jps "'%-.192s' という key の名前は重複しています",
est "Kattuv võtme nimi '%-.192s'"
fre "Nom de clef '%-.192s' déjà utilisé"
ger "Doppelter Name für Schlüssel vorhanden: '%-.192s'"
greek "Επανάληψη key name '%-.192s'"
hun "Duplikalt kulcsazonosito: '%-.192s'"
ita "Nome chiave duplicato '%-.192s'"
- jpn "'%-.192s' という key の名前は重複しています"
+ jpn "索引名 '%-.192s' は重複しています。"
kor "중복된 키 이름 : '%-.192s'"
nor "Nøkkelnavnet '%-.192s' eksisterte fra før"
norwegian-ny "Nøkkelnamnet '%-.192s' eksisterte frå før"
@@ -1481,32 +1440,31 @@ ER_DUP_KEYNAME 42000 S1009
# When using this error code, please use ER(ER_DUP_ENTRY_WITH_KEY_NAME)
# for the message string. See, for example, code in handler.cc.
ER_DUP_ENTRY 23000 S1009
- cze "Zdvojen-Bý klíč '%-.192s' (číslo klíče %d)"
+ cze "Zdvojený klíč '%-.192s' (číslo klíče %d)"
dan "Ens værdier '%-.192s' for indeks %d"
nla "Dubbele ingang '%-.192s' voor zoeksleutel %d"
eng "Duplicate entry '%-.192s' for key %d"
- jps "'%-.192s' は key %d において重複しています",
est "Kattuv väärtus '%-.192s' võtmele %d"
fre "Duplicata du champ '%-.192s' pour la clef %d"
ger "Doppelter Eintrag '%-.192s' für Schlüssel %d"
greek "Διπλή εγγραφή '%-.192s' για το κλειδί %d"
hun "Duplikalt bejegyzes '%-.192s' a %d kulcs szerint."
ita "Valore duplicato '%-.192s' per la chiave %d"
- jpn "'%-.192s' は key %d において重複しています"
+ jpn "'%-.192s' は索引 %d で重複しています。"
kor "중복된 입력 값 '%-.192s': key %d"
nor "Like verdier '%-.192s' for nøkkel %d"
norwegian-ny "Like verdiar '%-.192s' for nykkel %d"
- pol "Powtórzone wyst?pienie '%-.192s' dla klucza %d"
+ pol "Powtórzone wystąpienie '%-.192s' dla klucza %d"
por "Entrada '%-.192s' duplicada para a chave %d"
rum "Cimpul '%-.192s' e duplicat pentru cheia %d"
rus "Дублирующаяся запись '%-.192s' по ключу %d"
serbian "Dupliran unos '%-.192s' za ključ '%d'"
slo "Opakovaný kľúč '%-.192s' (číslo kľúča %d)"
spa "Entrada duplicada '%-.192s' para la clave %d"
- swe "Dubbel nyckel '%-.192s' för nyckel %d"
+ swe "Dublett '%-.192s' för nyckel %d"
ukr "Дублюючий запис '%-.192s' для ключа %d"
ER_WRONG_FIELD_SPEC 42000 S1009
- cze "Chybn-Bá specifikace sloupce '%-.192s'"
+ cze "Chybná specifikace sloupce '%-.192s'"
dan "Forkert kolonnespecifikaton for felt '%-.192s'"
nla "Verkeerde kolom specificatie voor kolom '%-.192s'"
eng "Incorrect column specifier for column '%-.192s'"
@@ -1516,6 +1474,7 @@ ER_WRONG_FIELD_SPEC 42000 S1009
greek "Εσφαλμένο column specifier για το πεδίο '%-.192s'"
hun "Rossz oszlopazonosito: '%-.192s'"
ita "Specifica errata per la colonna '%-.192s'"
+ jpn "列 '%-.192s' の定義が不正です。"
kor "칼럼 '%-.192s'의 부정확한 칼럼 정의자"
nor "Feil kolonne spesifikator for felt '%-.192s'"
norwegian-ny "Feil kolonne spesifikator for kolonne '%-.192s'"
@@ -1529,18 +1488,17 @@ ER_WRONG_FIELD_SPEC 42000 S1009
swe "Felaktigt kolumntyp för kolumn '%-.192s'"
ukr "Невірний специфікатор стовбця '%-.192s'"
ER_PARSE_ERROR 42000 s1009
- cze "%s bl-Bízko '%-.80s' na řádku %d"
+ cze "%s blízko '%-.80s' na řádku %d"
dan "%s nær '%-.80s' på linje %d"
nla "%s bij '%-.80s' in regel %d"
eng "%s near '%-.80s' at line %d"
- jps "%s : '%-.80s' 付近 : %d 行目",
est "%s '%-.80s' ligidal real %d"
fre "%s près de '%-.80s' à la ligne %d"
ger "%s bei '%-.80s' in Zeile %d"
greek "%s πλησίον '%-.80s' στη γραμμή %d"
hun "A %s a '%-.80s'-hez kozeli a %d sorban"
ita "%s vicino a '%-.80s' linea %d"
- jpn "%s : '%-.80s' 付近 : %d 行目"
+ jpn "%s : '%-.80s' 付近 %d 行目"
kor "'%s' 에러 같읍니다. ('%-.80s' 명령어 라인 %d)"
nor "%s nær '%-.80s' på linje %d"
norwegian-ny "%s attmed '%-.80s' på line %d"
@@ -1554,18 +1512,17 @@ ER_PARSE_ERROR 42000 s1009
swe "%s nära '%-.80s' på rad %d"
ukr "%s біля '%-.80s' в строці %d"
ER_EMPTY_QUERY 42000
- cze "V-Býsledek dotazu je prázdný"
+ cze "Výsledek dotazu je prázdný"
dan "Forespørgsel var tom"
nla "Query was leeg"
eng "Query was empty"
- jps "Query が空です.",
est "Tühi päring"
fre "Query est vide"
ger "Leere Abfrage"
greek "Το ερώτημα (query) που θέσατε ήταν κενό"
hun "Ures lekerdezes."
ita "La query e` vuota"
- jpn "Query が空です."
+ jpn "クエリが空です。"
kor "쿼리결과가 없습니다."
nor "Forespørsel var tom"
norwegian-ny "Førespurnad var tom"
@@ -1579,18 +1536,17 @@ ER_EMPTY_QUERY 42000
swe "Frågan var tom"
ukr "Пустий запит"
ER_NONUNIQ_TABLE 42000 S1009
- cze "Nejednozna-Bčná tabulka/alias: '%-.192s'"
+ cze "Nejednoznačná tabulka/alias: '%-.192s'"
dan "Tabellen/aliaset: '%-.192s' er ikke unikt"
nla "Niet unieke waarde tabel/alias: '%-.192s'"
eng "Not unique table/alias: '%-.192s'"
- jps "'%-.192s' は一意の table/alias 名ではありません",
est "Ei ole unikaalne tabel/alias '%-.192s'"
fre "Table/alias: '%-.192s' non unique"
ger "Tabellenname/Alias '%-.192s' nicht eindeutig"
greek "Αδύνατη η ανεύρεση unique table/alias: '%-.192s'"
hun "Nem egyedi tabla/alias: '%-.192s'"
ita "Tabella/alias non unico: '%-.192s'"
- jpn "'%-.192s' は一意の table/alias 名ではありません"
+ jpn "表名/別名 '%-.192s' は一意ではありません。"
kor "Unique 하지 않은 테이블/alias: '%-.192s'"
nor "Ikke unikt tabell/alias: '%-.192s'"
norwegian-ny "Ikkje unikt tabell/alias: '%-.192s'"
@@ -1604,7 +1560,7 @@ ER_NONUNIQ_TABLE 42000 S1009
swe "Icke unikt tabell/alias: '%-.192s'"
ukr "Неунікальна таблиця/псевдонім: '%-.192s'"
ER_INVALID_DEFAULT 42000 S1009
- cze "Chybn-Bá defaultní hodnota pro '%-.192s'"
+ cze "Chybná defaultní hodnota pro '%-.192s'"
dan "Ugyldig standardværdi for '%-.192s'"
nla "Foutieve standaard waarde voor '%-.192s'"
eng "Invalid default value for '%-.192s'"
@@ -1614,6 +1570,7 @@ ER_INVALID_DEFAULT 42000 S1009
greek "Εσφαλμένη προκαθορισμένη τιμή (default value) για '%-.192s'"
hun "Ervenytelen ertek: '%-.192s'"
ita "Valore di default non valido per '%-.192s'"
+ jpn "'%-.192s' へのデフォルト値が無効です。"
kor "'%-.192s'의 유효하지 못한 디폴트 값을 사용하셨습니다."
nor "Ugyldig standardverdi for '%-.192s'"
norwegian-ny "Ugyldig standardverdi for '%-.192s'"
@@ -1627,18 +1584,17 @@ ER_INVALID_DEFAULT 42000 S1009
swe "Ogiltigt DEFAULT värde för '%-.192s'"
ukr "Невірне значення по замовчуванню для '%-.192s'"
ER_MULTIPLE_PRI_KEY 42000 S1009
- cze "Definov-Báno více primárních klíčů"
+ cze "Definováno více primárních klíčů"
dan "Flere primærnøgler specificeret"
nla "Meerdere primaire zoeksleutels gedefinieerd"
eng "Multiple primary key defined"
- jps "複数の primary key が定義されました",
est "Mitut primaarset võtit ei saa olla"
fre "Plusieurs clefs primaires définies"
ger "Mehrere Primärschlüssel (PRIMARY KEY) definiert"
greek "Περισσότερα από ένα primary key ορίστηκαν"
hun "Tobbszoros elsodleges kulcs definialas."
ita "Definite piu` chiave primarie"
- jpn "複数の primary key が定義されました"
+ jpn "PRIMARY KEY が複数定義されています。"
kor "Multiple primary key가 정의되어 있슴"
nor "Fleire primærnøkle spesifisert"
norwegian-ny "Fleire primærnyklar spesifisert"
@@ -1652,18 +1608,17 @@ ER_MULTIPLE_PRI_KEY 42000 S1009
swe "Flera PRIMARY KEY använda"
ukr "Первинного ключа визначено неодноразово"
ER_TOO_MANY_KEYS 42000 S1009
- cze "Zad-Báno příliš mnoho klíčů, je povoleno nejvíce %d klíčů"
+ cze "Zadáno příliš mnoho klíčů, je povoleno nejvíce %d klíčů"
dan "For mange nøgler specificeret. Kun %d nøgler må bruges"
nla "Teveel zoeksleutels gedefinieerd. Maximaal zijn %d zoeksleutels toegestaan"
eng "Too many keys specified; max %d keys allowed"
- jps "key の指定が多すぎます. key は最大 %d までです",
est "Liiga palju võtmeid. Maksimaalselt võib olla %d võtit"
fre "Trop de clefs sont définies. Maximum de %d clefs alloué"
ger "Zu viele Schlüssel definiert. Maximal %d Schlüssel erlaubt"
greek "Πάρα πολλά key ορίσθηκαν. Το πολύ %d επιτρέπονται"
hun "Tul sok kulcs. Maximum %d kulcs engedelyezett."
ita "Troppe chiavi. Sono ammesse max %d chiavi"
- jpn "key の指定が多すぎます. key は最大 %d までです"
+ jpn "索引の数が多すぎます。最大 %d 個までです。"
kor "너무 많은 키가 정의되어 있읍니다.. 최대 %d의 키가 가능함"
nor "For mange nøkler spesifisert. Maks %d nøkler tillatt"
norwegian-ny "For mange nykler spesifisert. Maks %d nyklar tillatt"
@@ -1677,7 +1632,7 @@ ER_TOO_MANY_KEYS 42000 S1009
swe "För många nycklar använda. Man får ha högst %d nycklar"
ukr "Забагато ключів зазначено. Дозволено не більше %d ключів"
ER_TOO_MANY_KEY_PARTS 42000 S1009
- cze "Zad-Báno příliš mnoho část klíčů, je povoleno nejvíce %d částí"
+ cze "Zadáno příliš mnoho část klíčů, je povoleno nejvíce %d částí"
dan "For mange nøgledele specificeret. Kun %d dele må bruges"
nla "Teveel zoeksleutel onderdelen gespecificeerd. Maximaal %d onderdelen toegestaan"
eng "Too many key parts specified; max %d parts allowed"
@@ -1687,6 +1642,7 @@ ER_TOO_MANY_KEY_PARTS 42000 S1009
greek "Πάρα πολλά key parts ορίσθηκαν. Το πολύ %d επιτρέπονται"
hun "Tul sok kulcsdarabot definialt. Maximum %d resz engedelyezett"
ita "Troppe parti di chiave specificate. Sono ammesse max %d parti"
+ jpn "索引のキー列指定が多すぎます。最大 %d 個までです。"
kor "너무 많은 키 부분(parts)들이 정의되어 있읍니다.. 최대 %d 부분이 가능함"
nor "For mange nøkkeldeler spesifisert. Maks %d deler tillatt"
norwegian-ny "For mange nykkeldelar spesifisert. Maks %d delar tillatt"
@@ -1700,18 +1656,17 @@ ER_TOO_MANY_KEY_PARTS 42000 S1009
swe "För många nyckeldelar använda. Man får ha högst %d nyckeldelar"
ukr "Забагато частин ключа зазначено. Дозволено не більше %d частин"
ER_TOO_LONG_KEY 42000 S1009
- cze "Zadan-Bý klíč byl příliš dlouhý, největší délka klíče je %d"
+ cze "Zadaný klíč byl příliš dlouhý, největší délka klíče je %d"
dan "Specificeret nøgle var for lang. Maksimal nøglelængde er %d"
nla "Gespecificeerde zoeksleutel was te lang. De maximale lengte is %d"
eng "Specified key was too long; max key length is %d bytes"
- jps "key が長すぎます. key の長さは最大 %d です",
est "Võti on liiga pikk. Maksimaalne võtmepikkus on %d"
fre "La clé est trop longue. Longueur maximale: %d"
ger "Schlüssel ist zu lang. Die maximale Schlüssellänge beträgt %d"
greek "Το κλειδί που ορίσθηκε είναι πολύ μεγάλο. Το μέγιστο μήκος είναι %d"
hun "A megadott kulcs tul hosszu. Maximalis kulcshosszusag: %d"
ita "La chiave specificata e` troppo lunga. La max lunghezza della chiave e` %d"
- jpn "key が長すぎます. key の長さは最大 %d です"
+ jpn "索引のキーが長すぎます。最大 %d バイトまでです。"
kor "정의된 키가 너무 깁니다. 최대 키의 길이는 %d입니다."
nor "Spesifisert nøkkel var for lang. Maks nøkkellengde er is %d"
norwegian-ny "Spesifisert nykkel var for lang. Maks nykkellengde er %d"
@@ -1725,18 +1680,17 @@ ER_TOO_LONG_KEY 42000 S1009
swe "För lång nyckel. Högsta tillåtna nyckellängd är %d"
ukr "Зазначений ключ задовгий. Найбільша довжина ключа %d байтів"
ER_KEY_COLUMN_DOES_NOT_EXITS 42000 S1009
- cze "Kl-Bíčový sloupec '%-.192s' v tabulce neexistuje"
+ cze "Klíčový sloupec '%-.192s' v tabulce neexistuje"
dan "Nøglefeltet '%-.192s' eksisterer ikke i tabellen"
nla "Zoeksleutel kolom '%-.192s' bestaat niet in tabel"
eng "Key column '%-.192s' doesn't exist in table"
- jps "Key column '%-.192s' がテーブルにありません.",
est "Võtme tulp '%-.192s' puudub tabelis"
fre "La clé '%-.192s' n'existe pas dans la table"
ger "In der Tabelle gibt es kein Schlüsselfeld '%-.192s'"
greek "Το πεδίο κλειδί '%-.192s' δεν υπάρχει στον πίνακα"
hun "A(z) '%-.192s'kulcsoszlop nem letezik a tablaban"
ita "La colonna chiave '%-.192s' non esiste nella tabella"
- jpn "Key column '%-.192s' がテーブルにありません."
+ jpn "キー列 '%-.192s' は表にありません。"
kor "Key 칼럼 '%-.192s'는 테이블에 존재하지 않습니다."
nor "Nøkkel felt '%-.192s' eksiterer ikke i tabellen"
norwegian-ny "Nykkel kolonne '%-.192s' eksiterar ikkje i tabellen"
@@ -1750,7 +1704,7 @@ ER_KEY_COLUMN_DOES_NOT_EXITS 42000 S1009
swe "Nyckelkolumn '%-.192s' finns inte"
ukr "Ключовий стовбець '%-.192s' не існує у таблиці"
ER_BLOB_USED_AS_KEY 42000 S1009
- cze "Blob sloupec '%-.192s' nem-Bůže být použit jako klíč"
+ cze "Blob sloupec '%-.192s' nemůže být použit jako klíč"
dan "BLOB feltet '%-.192s' kan ikke bruges ved specifikation af indeks"
nla "BLOB kolom '%-.192s' kan niet gebruikt worden bij zoeksleutel specificatie"
eng "BLOB column '%-.192s' can't be used in key specification with the used table type"
@@ -1760,6 +1714,7 @@ ER_BLOB_USED_AS_KEY 42000 S1009
greek "Πεδίο τύπου Blob '%-.192s' δεν μπορεί να χρησιμοποιηθεί στον ορισμό ενός κλειδιού (key specification)"
hun "Blob objektum '%-.192s' nem hasznalhato kulcskent"
ita "La colonna BLOB '%-.192s' non puo` essere usata nella specifica della chiave"
+ jpn "指定されたストレージエンジンでは、BLOB列 '%-.192s' は索引キーにできません。"
kor "BLOB 칼럼 '%-.192s'는 키 정의에서 사용될 수 없습니다."
nor "Blob felt '%-.192s' kan ikke brukes ved spesifikasjon av nøkler"
norwegian-ny "Blob kolonne '%-.192s' kan ikkje brukast ved spesifikasjon av nyklar"
@@ -1773,18 +1728,17 @@ ER_BLOB_USED_AS_KEY 42000 S1009
swe "En BLOB '%-.192s' kan inte vara nyckel med den använda tabelltypen"
ukr "BLOB стовбець '%-.192s' не може бути використаний у визначенні ключа в цьому типі таблиці"
ER_TOO_BIG_FIELDLENGTH 42000 S1009
- cze "P-Bříliš velká délka sloupce '%-.192s' (nejvíce %lu). Použijte BLOB"
+ cze "Příliš velká délka sloupce '%-.192s' (nejvíce %lu). Použijte BLOB"
dan "For stor feltlængde for kolonne '%-.192s' (maks = %lu). Brug BLOB i stedet"
nla "Te grote kolomlengte voor '%-.192s' (max = %lu). Maak hiervoor gebruik van het type BLOB"
eng "Column length too big for column '%-.192s' (max = %lu); use BLOB or TEXT instead"
- jps "column '%-.192s' は,確保する column の大きさが多すぎます. (最大 %lu まで). BLOB をかわりに使用してください."
est "Tulba '%-.192s' pikkus on liiga pikk (maksimaalne pikkus: %lu). Kasuta BLOB väljatüüpi"
fre "Champ '%-.192s' trop long (max = %lu). Utilisez un BLOB"
ger "Feldlänge für Feld '%-.192s' zu groß (maximal %lu). BLOB- oder TEXT-Spaltentyp verwenden!"
greek "Πολύ μεγάλο μήκος για το πεδίο '%-.192s' (max = %lu). Παρακαλώ χρησιμοποιείστε τον τύπο BLOB"
hun "A(z) '%-.192s' oszlop tul hosszu. (maximum = %lu). Hasznaljon BLOB tipust inkabb."
ita "La colonna '%-.192s' e` troppo grande (max=%lu). Utilizza un BLOB."
- jpn "column '%-.192s' は,確保する column の大きさが多すぎます. (最大 %lu まで). BLOB をかわりに使用してください."
+ jpn "列 '%-.192s' のサイズ定義が大きすぎます (最大 %lu まで)。代わりに BLOB または TEXT を使用してください。"
kor "칼럼 '%-.192s'의 칼럼 길이가 너무 깁니다 (최대 = %lu). 대신에 BLOB를 사용하세요."
nor "For stor nøkkellengde for kolonne '%-.192s' (maks = %lu). Bruk BLOB istedenfor"
norwegian-ny "For stor nykkellengde for felt '%-.192s' (maks = %lu). Bruk BLOB istadenfor"
@@ -1798,18 +1752,17 @@ ER_TOO_BIG_FIELDLENGTH 42000 S1009
swe "För stor kolumnlängd angiven för '%-.192s' (max= %lu). Använd en BLOB instället"
ukr "Задовга довжина стовбця '%-.192s' (max = %lu). Використайте тип BLOB"
ER_WRONG_AUTO_KEY 42000 S1009
- cze "M-Bůžete mít pouze jedno AUTO pole a to musí být definováno jako klíč"
+ cze "Můžete mít pouze jedno AUTO pole a to musí být definováno jako klíč"
dan "Der kan kun specificeres eet AUTO_INCREMENT-felt, og det skal være indekseret"
nla "Er kan slechts 1 autofield zijn en deze moet als zoeksleutel worden gedefinieerd."
eng "Incorrect table definition; there can be only one auto column and it must be defined as a key"
- jps "テーブルの定義が違います; there can be only one auto column and it must be defined as a key",
est "Vigane tabelikirjeldus; Tabelis tohib olla üks auto_increment tüüpi tulp ning see peab olema defineeritud võtmena"
fre "Un seul champ automatique est permis et il doit être indexé"
ger "Falsche Tabellendefinition. Es darf nur eine AUTO_INCREMENT-Spalte geben, und diese muss als Schlüssel definiert werden"
greek "Μπορεί να υπάρχει μόνο ένα auto field και πρέπει να έχει ορισθεί σαν key"
hun "Csak egy auto mezo lehetseges, es azt kulcskent kell definialni."
ita "Puo` esserci solo un campo AUTO e deve essere definito come chiave"
- jpn "テーブルの定義が違います; there can be only one auto column and it must be defined as a key"
+ jpn "不正な表定義です。AUTO_INCREMENT列は1個までで、索引を定義する必要があります。"
kor "부정확한 테이블 정의; 테이블은 하나의 auto 칼럼이 존재하고 키로 정의되어져야 합니다."
nor "Bare ett auto felt kan være definert som nøkkel."
norwegian-ny "Bare eitt auto felt kan være definert som nøkkel."
@@ -1823,18 +1776,17 @@ ER_WRONG_AUTO_KEY 42000 S1009
swe "Det får finnas endast ett AUTO_INCREMENT-fält och detta måste vara en nyckel"
ukr "Невірне визначення таблиці; Може бути лише один автоматичний стовбець, що повинен бути визначений як ключ"
ER_READY
- cze "%s: p-Břipraven na spojení\nVersion: '%s' socket: '%s' port: %d""
+ cze "%s: připraven na spojení\nVersion: '%s' socket: '%s' port: %d""
dan "%s: klar til tilslutninger\nVersion: '%s' socket: '%s' port: %d""
nla "%s: klaar voor verbindingen\nVersion: '%s' socket: '%s' port: %d""
eng "%s: ready for connections.\nVersion: '%s' socket: '%s' port: %d"
- jps "%s: 準備完了¥nVersion: '%s' socket: '%s' port: %d"",
est "%s: ootab ühendusi\nVersion: '%s' socket: '%s' port: %d""
fre "%s: Prêt pour des connexions\nVersion: '%s' socket: '%s' port: %d""
ger "%s: Bereit für Verbindungen.\nVersion: '%s' Socket: '%s' Port: %d"
greek "%s: σε αναμονή συνδέσεων\nVersion: '%s' socket: '%s' port: %d""
hun "%s: kapcsolatra kesz\nVersion: '%s' socket: '%s' port: %d""
ita "%s: Pronto per le connessioni\nVersion: '%s' socket: '%s' port: %d""
- jpn "%s: 準備完了\nVersion: '%s' socket: '%s' port: %d""
+ jpn "%s: 接続準備完了。\nバージョン: '%s' socket: '%s' port: %d""
kor "%s: 연결 준비중입니다\nVersion: '%s' socket: '%s' port: %d""
nor "%s: klar for tilkoblinger\nVersion: '%s' socket: '%s' port: %d""
norwegian-ny "%s: klar for tilkoblingar\nVersion: '%s' socket: '%s' port: %d""
@@ -1848,7 +1800,7 @@ ER_READY
swe "%s: klar att ta emot klienter\nVersion: '%s' socket: '%s' port: %d""
ukr "%s: Готовий для з'єднань!\nVersion: '%s' socket: '%s' port: %d""
ER_NORMAL_SHUTDOWN
- cze "%s: norm-Bální ukončení\n"
+ cze "%s: normální ukončení\n"
dan "%s: Normal nedlukning\n"
nla "%s: Normaal afgesloten \n"
eng "%s: Normal shutdown\n"
@@ -1858,6 +1810,7 @@ ER_NORMAL_SHUTDOWN
greek "%s: Φυσιολογική διαδικασία shutdown\n"
hun "%s: Normal leallitas\n"
ita "%s: Shutdown normale\n"
+ jpn "%s: 通常シャットダウン\n"
kor "%s: 정상적인 shutdown\n"
nor "%s: Normal avslutning\n"
norwegian-ny "%s: Normal nedkopling\n"
@@ -1871,18 +1824,17 @@ ER_NORMAL_SHUTDOWN
swe "%s: Normal avslutning\n"
ukr "%s: Нормальне завершення\n"
ER_GOT_SIGNAL
- cze "%s: p-Břijat signal %d, končím\n"
+ cze "%s: přijat signal %d, končím\n"
dan "%s: Fangede signal %d. Afslutter!!\n"
nla "%s: Signaal %d. Systeem breekt af!\n"
eng "%s: Got signal %d. Aborting!\n"
- jps "%s: Got signal %d. 中断!¥n",
est "%s: sain signaali %d. Lõpetan!\n"
fre "%s: Reçu le signal %d. Abandonne!\n"
ger "%s: Signal %d erhalten. Abbruch!\n"
greek "%s: Ελήφθη το μήνυμα %d. Η διαδικασία εγκαταλείπεται!\n"
hun "%s: %d jelzes. Megszakitva!\n"
ita "%s: Ricevuto segnale %d. Interruzione!\n"
- jpn "%s: Got signal %d. 中断!\n"
+ jpn "%s: シグナル %d を受信しました。強制終了します!\n"
kor "%s: %d 신호가 들어왔음. 중지!\n"
nor "%s: Oppdaget signal %d. Avslutter!\n"
norwegian-ny "%s: Oppdaga signal %d. Avsluttar!\n"
@@ -1896,18 +1848,17 @@ ER_GOT_SIGNAL
swe "%s: Fick signal %d. Avslutar!\n"
ukr "%s: Отримано сигнал %d. Перериваюсь!\n"
ER_SHUTDOWN_COMPLETE
- cze "%s: ukon-Bčení práce hotovo\n"
+ cze "%s: ukončení práce hotovo\n"
dan "%s: Server lukket\n"
nla "%s: Afsluiten afgerond\n"
eng "%s: Shutdown complete\n"
- jps "%s: Shutdown 完了¥n",
est "%s: Lõpp\n"
fre "%s: Arrêt du serveur terminé\n"
ger "%s: Herunterfahren beendet\n"
greek "%s: Η διαδικασία Shutdown ολοκληρώθηκε\n"
hun "%s: A leallitas kesz\n"
ita "%s: Shutdown completato\n"
- jpn "%s: Shutdown 完了\n"
+ jpn "%s: シャットダウン完了\n"
kor "%s: Shutdown 이 완료됨!\n"
nor "%s: Avslutning komplett\n"
norwegian-ny "%s: Nedkopling komplett\n"
@@ -1921,18 +1872,17 @@ ER_SHUTDOWN_COMPLETE
swe "%s: Avslutning klar\n"
ukr "%s: Роботу завершено\n"
ER_FORCING_CLOSE 08S01
- cze "%s: n-Básilné uzavření threadu %ld uživatele '%-.48s'\n"
+ cze "%s: násilné uzavření threadu %ld uživatele '%-.48s'\n"
dan "%s: Forceret nedlukning af tråd: %ld bruger: '%-.48s'\n"
nla "%s: Afsluiten afgedwongen van thread %ld gebruiker: '%-.48s'\n"
eng "%s: Forcing close of thread %ld user: '%-.48s'\n"
- jps "%s: スレッド %ld 強制終了 user: '%-.48s'¥n",
est "%s: Sulgen jõuga lõime %ld kasutaja: '%-.48s'\n"
fre "%s: Arrêt forcé de la tâche (thread) %ld utilisateur: '%-.48s'\n"
ger "%s: Thread %ld zwangsweise beendet. Benutzer: '%-.48s'\n"
greek "%s: Το thread θα κλείσει %ld user: '%-.48s'\n"
hun "%s: A(z) %ld thread kenyszeritett zarasa. Felhasznalo: '%-.48s'\n"
ita "%s: Forzata la chiusura del thread %ld utente: '%-.48s'\n"
- jpn "%s: スレッド %ld 強制終了 user: '%-.48s'\n"
+ jpn "%s: スレッド %ld を強制終了します (ユーザー: '%-.48s')\n"
kor "%s: thread %ld의 강제 종료 user: '%-.48s'\n"
nor "%s: Påtvinget avslutning av tråd %ld bruker: '%-.48s'\n"
norwegian-ny "%s: Påtvinga avslutning av tråd %ld brukar: '%-.48s'\n"
@@ -1946,18 +1896,17 @@ ER_FORCING_CLOSE 08S01
swe "%s: Stänger av tråd %ld; användare: '%-.48s'\n"
ukr "%s: Прискорюю закриття гілки %ld користувача: '%-.48s'\n"
ER_IPSOCK_ERROR 08S01
- cze "Nemohu vytvo-Břit IP socket"
+ cze "Nemohu vytvořit IP socket"
dan "Kan ikke oprette IP socket"
nla "Kan IP-socket niet openen"
eng "Can't create IP socket"
- jps "IP socket が作れません",
est "Ei suuda luua IP socketit"
fre "Ne peut créer la connexion IP (socket)"
ger "Kann IP-Socket nicht erzeugen"
greek "Δεν είναι δυνατή η δημιουργία IP socket"
hun "Az IP socket nem hozhato letre"
ita "Impossibile creare il socket IP"
- jpn "IP socket が作れません"
+ jpn "IPソケットを作成できません。"
kor "IP 소켓을 만들지 못했습니다."
nor "Kan ikke opprette IP socket"
norwegian-ny "Kan ikkje opprette IP socket"
@@ -1971,18 +1920,17 @@ ER_IPSOCK_ERROR 08S01
swe "Kan inte skapa IP-socket"
ukr "Не можу створити IP роз'єм"
ER_NO_SUCH_INDEX 42S12 S1009
- cze "Tabulka '%-.192s' nem-Bá index odpovídající CREATE INDEX. Vytvořte tabulku znovu"
+ cze "Tabulka '%-.192s' nemá index odpovídající CREATE INDEX. Vytvořte tabulku znovu"
dan "Tabellen '%-.192s' har ikke den nøgle, som blev brugt i CREATE INDEX. Genopret tabellen"
nla "Tabel '%-.192s' heeft geen INDEX zoals deze gemaakt worden met CREATE INDEX. Maak de tabel opnieuw"
eng "Table '%-.192s' has no index like the one used in CREATE INDEX; recreate the table"
- jps "Table '%-.192s' はそのような index を持っていません(CREATE INDEX 実行時に指定されていません). テーブルを作り直してください",
est "Tabelil '%-.192s' puuduvad võtmed. Loo tabel uuesti"
fre "La table '%-.192s' n'a pas d'index comme celle utilisée dans CREATE INDEX. Recréez la table"
ger "Tabelle '%-.192s' besitzt keinen wie den in CREATE INDEX verwendeten Index. Tabelle neu anlegen"
greek "Ο πίνακας '%-.192s' δεν έχει ευρετήριο (index) σαν αυτό που χρησιμοποιείτε στην CREATE INDEX. Παρακαλώ, ξαναδημιουργήστε τον πίνακα"
hun "A(z) '%-.192s' tablahoz nincs meg a CREATE INDEX altal hasznalt index. Alakitsa at a tablat"
ita "La tabella '%-.192s' non ha nessun indice come quello specificatato dalla CREATE INDEX. Ricrea la tabella"
- jpn "Table '%-.192s' はそのような index を持っていません(CREATE INDEX 実行時に指定されていません). テーブルを作り直してください"
+ jpn "表 '%-.192s' に以前CREATE INDEXで作成された索引がありません。表を作り直してください。"
kor "테이블 '%-.192s'는 인덱스를 만들지 않았습니다. alter 테이블명령을 이용하여 테이블을 수정하세요..."
nor "Tabellen '%-.192s' har ingen index som den som er brukt i CREATE INDEX. Gjenopprett tabellen"
norwegian-ny "Tabellen '%-.192s' har ingen index som den som er brukt i CREATE INDEX. Oprett tabellen på nytt"
@@ -1996,7 +1944,7 @@ ER_NO_SUCH_INDEX 42S12 S1009
swe "Tabellen '%-.192s' har inget index som motsvarar det angivna i CREATE INDEX. Skapa om tabellen"
ukr "Таблиця '%-.192s' має індекс, що не співпадає з вказанним у CREATE INDEX. Створіть таблицю знову"
ER_WRONG_FIELD_TERMINATORS 42000 S1009
- cze "Argument separ-Bátoru položek nebyl očekáván. Přečtěte si manuál"
+ cze "Argument separátoru položek nebyl očekáván. Přečtěte si manuál"
dan "Felt adskiller er ikke som forventet, se dokumentationen"
nla "De argumenten om velden te scheiden zijn anders dan verwacht. Raadpleeg de handleiding"
eng "Field separator argument is not what is expected; check the manual"
@@ -2006,6 +1954,7 @@ ER_WRONG_FIELD_TERMINATORS 42000 S1009
greek "Ο διαχωριστής πεδίων δεν είναι αυτός που αναμενόταν. Παρακαλώ ανατρέξτε στο manual"
hun "A mezoelvalaszto argumentumok nem egyeznek meg a varttal. Nezze meg a kezikonyvben!"
ita "L'argomento 'Field separator' non e` quello atteso. Controlla il manuale"
+ jpn "フィールド区切り文字が予期せぬ使われ方をしています。マニュアルを確認して下さい。"
kor "필드 구분자 인수들이 완전하지 않습니다. 메뉴얼을 찾아 보세요."
nor "Felt skiller argumentene er ikke som forventet, se dokumentasjonen"
norwegian-ny "Felt skiljer argumenta er ikkje som venta, sjå dokumentasjonen"
@@ -2019,7 +1968,7 @@ ER_WRONG_FIELD_TERMINATORS 42000 S1009
swe "Fältseparatorerna är vad som förväntades. Kontrollera mot manualen"
ukr "Хибний розділювач полів. Почитайте документацію"
ER_BLOBS_AND_NO_TERMINATED 42000 S1009
- cze "Nen-Bí možné použít pevný rowlength s BLOBem. Použijte 'fields terminated by'."
+ cze "Není možné použít pevný rowlength s BLOBem. Použijte 'fields terminated by'."
dan "Man kan ikke bruge faste feltlængder med BLOB. Brug i stedet 'fields terminated by'."
nla "Bij het gebruik van BLOBs is het niet mogelijk om vaste rijlengte te gebruiken. Maak s.v.p. gebruik van 'fields terminated by'."
eng "You can't use fixed rowlength with BLOBs; please use 'fields terminated by'"
@@ -2029,7 +1978,7 @@ ER_BLOBS_AND_NO_TERMINATED 42000 S1009
greek "Δεν μπορείτε να χρησιμοποιήσετε fixed rowlength σε BLOBs. Παρακαλώ χρησιμοποιείστε 'fields terminated by'."
hun "Fix hosszusagu BLOB-ok nem hasznalhatok. Hasznalja a 'mezoelvalaszto jelet' ."
ita "Non possono essere usate righe a lunghezza fissa con i BLOB. Usa 'FIELDS TERMINATED BY'."
- jpn "You can't use fixed rowlength with BLOBs; please use 'fields terminated by'."
+ jpn "BLOBには固定長レコードが使用できません。'FIELDS TERMINATED BY'句を使用して下さい。"
kor "BLOB로는 고정길이의 lowlength를 사용할 수 없습니다. 'fields terminated by'를 사용하세요."
nor "En kan ikke bruke faste feltlengder med BLOB. Vennlisgt bruk 'fields terminated by'."
norwegian-ny "Ein kan ikkje bruke faste feltlengder med BLOB. Vennlisgt bruk 'fields terminated by'."
@@ -2043,18 +1992,17 @@ ER_BLOBS_AND_NO_TERMINATED 42000 S1009
swe "Man kan inte använda fast radlängd med blobs. Använd 'fields terminated by'"
ukr "Не можна використовувати сталу довжину строки з BLOB. Зкористайтеся 'fields terminated by'"
ER_TEXTFILE_NOT_READABLE
- cze "Soubor '%-.128s' mus-Bí být v adresáři databáze nebo čitelný pro všechny"
+ cze "Soubor '%-.128s' musí být v adresáři databáze nebo čitelný pro všechny"
dan "Filen '%-.128s' skal være i database-folderen, eller kunne læses af alle"
nla "Het bestand '%-.128s' dient in de database directory voor the komen of leesbaar voor iedereen te zijn."
eng "The file '%-.128s' must be in the database directory or be readable by all"
- jps "ファイル '%-.128s' は databse の directory にあるか全てのユーザーが読めるように許可されていなければなりません.",
est "Fail '%-.128s' peab asuma andmebaasi kataloogis või olema kõigile loetav"
fre "Le fichier '%-.128s' doit être dans le répertoire de la base et lisible par tous"
ger "Datei '%-.128s' muss im Datenbank-Verzeichnis vorhanden oder lesbar für alle sein"
greek "Το αρχείο '%-.128s' πρέπει να υπάρχει στο database directory ή να μπορεί να διαβαστεί από όλους"
hun "A(z) '%-.128s'-nak az adatbazis konyvtarban kell lennie, vagy mindenki szamara olvashatonak"
ita "Il file '%-.128s' deve essere nella directory del database e deve essere leggibile da tutti"
- jpn "ファイル '%-.128s' は databse の directory にあるか全てのユーザーが読めるように許可されていなければなりません."
+ jpn "ファイル '%-.128s' はデータベースディレクトリにあるか、全てのユーザーから読める必要があります。"
kor "'%-.128s' 화일는 데이타베이스 디렉토리에 존재하거나 모두에게 읽기 가능하여야 합니다."
nor "Filen '%-.128s' må være i database-katalogen for å være lesbar for alle"
norwegian-ny "Filen '%-.128s' må være i database-katalogen for å være lesbar for alle"
@@ -2068,18 +2016,17 @@ ER_TEXTFILE_NOT_READABLE
swe "Textfilen '%-.128s' måste finnas i databasbiblioteket eller vara läsbar för alla"
ukr "Файл '%-.128s' повинен бути у теці бази данних або мати встановлене право на читання для усіх"
ER_FILE_EXISTS_ERROR
- cze "Soubor '%-.200s' ji-Bž existuje"
+ cze "Soubor '%-.200s' již existuje"
dan "Filen '%-.200s' eksisterer allerede"
nla "Het bestand '%-.200s' bestaat reeds"
eng "File '%-.200s' already exists"
- jps "File '%-.200s' は既に存在します",
est "Fail '%-.200s' juba eksisteerib"
fre "Le fichier '%-.200s' existe déjà"
ger "Datei '%-.200s' bereits vorhanden"
greek "Το αρχείο '%-.200s' υπάρχει ήδη"
hun "A '%-.200s' file mar letezik."
ita "Il file '%-.200s' esiste gia`"
- jpn "File '%-.200s' は既に存在します"
+ jpn "ファイル '%-.200s' はすでに存在します。"
kor "'%-.200s' 화일은 이미 존재합니다."
nor "Filen '%-.200s' eksisterte allerede"
norwegian-ny "Filen '%-.200s' eksisterte allereide"
@@ -2093,18 +2040,17 @@ ER_FILE_EXISTS_ERROR
swe "Filen '%-.200s' existerar redan"
ukr "Файл '%-.200s' вже існує"
ER_LOAD_INFO
- cze "Z-Báznamů: %ld Vymazáno: %ld Přeskočeno: %ld Varování: %ld"
+ cze "Záznamů: %ld Vymazáno: %ld Přeskočeno: %ld Varování: %ld"
dan "Poster: %ld Fjernet: %ld Sprunget over: %ld Advarsler: %ld"
nla "Records: %ld Verwijderd: %ld Overgeslagen: %ld Waarschuwingen: %ld"
eng "Records: %ld Deleted: %ld Skipped: %ld Warnings: %ld"
- jps "レコード数: %ld 削除: %ld Skipped: %ld Warnings: %ld",
est "Kirjeid: %ld Kustutatud: %ld Vahele jäetud: %ld Hoiatusi: %ld"
fre "Enregistrements: %ld Effacés: %ld Non traités: %ld Avertissements: %ld"
ger "Datensätze: %ld Gelöscht: %ld Ausgelassen: %ld Warnungen: %ld"
greek "Εγγραφές: %ld Διαγραφές: %ld Παρεκάμφθησαν: %ld Προειδοποιήσεις: %ld"
hun "Rekordok: %ld Torolve: %ld Skipped: %ld Warnings: %ld"
ita "Records: %ld Cancellati: %ld Saltati: %ld Avvertimenti: %ld"
- jpn "レコード数: %ld 削除: %ld Skipped: %ld Warnings: %ld"
+ jpn "レコード数: %ld 削除: %ld スキップ: %ld 警告: %ld"
kor "레코드: %ld개 삭제: %ld개 스킵: %ld개 경고: %ld개"
nor "Poster: %ld Fjernet: %ld Hoppet over: %ld Advarsler: %ld"
norwegian-ny "Poster: %ld Fjerna: %ld Hoppa over: %ld Åtvaringar: %ld"
@@ -2118,11 +2064,10 @@ ER_LOAD_INFO
swe "Rader: %ld Bortagna: %ld Dubletter: %ld Varningar: %ld"
ukr "Записів: %ld Видалено: %ld Пропущено: %ld Застережень: %ld"
ER_ALTER_INFO
- cze "Z-Báznamů: %ld Zdvojených: %ld"
+ cze "Záznamů: %ld Zdvojených: %ld"
dan "Poster: %ld Ens: %ld"
nla "Records: %ld Dubbel: %ld"
eng "Records: %ld Duplicates: %ld"
- jps "レコード数: %ld 重複: %ld",
est "Kirjeid: %ld Kattuvaid: %ld"
fre "Enregistrements: %ld Doublons: %ld"
ger "Datensätze: %ld Duplikate: %ld"
@@ -2143,7 +2088,7 @@ ER_ALTER_INFO
swe "Rader: %ld Dubletter: %ld"
ukr "Записів: %ld Дублікатів: %ld"
ER_WRONG_SUB_KEY
- cze "Chybn-Bá podčást klíče -- není to řetězec nebo je delší než délka části klíče"
+ cze "Chybná podčást klíče -- není to řetězec nebo je delší než délka části klíče"
dan "Forkert indeksdel. Den anvendte nøgledel er ikke en streng eller længden er større end nøglelængden"
nla "Foutief sub-gedeelte van de zoeksleutel. De gebruikte zoeksleutel is geen onderdeel van een string of of de gebruikte lengte is langer dan de zoeksleutel"
eng "Incorrect prefix key; the used key part isn't a string, the used length is longer than the key part, or the storage engine doesn't support unique prefix keys"
@@ -2153,7 +2098,7 @@ ER_WRONG_SUB_KEY
greek "Εσφαλμένο sub part key. Το χρησιμοποιούμενο key part δεν είναι string ή το μήκος του είναι μεγαλύτερο"
hun "Rossz alkulcs. A hasznalt kulcsresz nem karaktersorozat vagy hosszabb, mint a kulcsresz"
ita "Sotto-parte della chiave errata. La parte di chiave utilizzata non e` una stringa o la lunghezza e` maggiore della parte di chiave."
- jpn "Incorrect prefix key; the used key part isn't a string or the used length is longer than the key part"
+ jpn "キーのプレフィックスが不正です。キーが文字列ではないか、プレフィックス長がキーよりも長いか、ストレージエンジンが一意索引のプレフィックス指定をサポートしていません。"
kor "부정확한 서버 파트 키. 사용된 키 파트가 스트링이 아니거나 키 파트의 길이가 너무 깁니다."
nor "Feil delnøkkel. Den brukte delnøkkelen er ikke en streng eller den oppgitte lengde er lengre enn nøkkel lengden"
norwegian-ny "Feil delnykkel. Den brukte delnykkelen er ikkje ein streng eller den oppgitte lengda er lengre enn nykkellengden"
@@ -2167,18 +2112,17 @@ ER_WRONG_SUB_KEY
swe "Felaktig delnyckel. Nyckeldelen är inte en sträng eller den angivna längden är längre än kolumnlängden"
ukr "Невірна частина ключа. Використана частина ключа не є строкою, задовга або вказівник таблиці не підтримує унікальних частин ключей"
ER_CANT_REMOVE_ALL_FIELDS 42000
- cze "Nen-Bí možné vymazat všechny položky s ALTER TABLE. Použijte DROP TABLE"
+ cze "Není možné vymazat všechny položky s ALTER TABLE. Použijte DROP TABLE"
dan "Man kan ikke slette alle felter med ALTER TABLE. Brug DROP TABLE i stedet."
nla "Het is niet mogelijk alle velden te verwijderen met ALTER TABLE. Gebruik a.u.b. DROP TABLE hiervoor!"
eng "You can't delete all columns with ALTER TABLE; use DROP TABLE instead"
- jps "ALTER TABLE で全ての column は削除できません. DROP TABLE を使用してください",
est "ALTER TABLE kasutades ei saa kustutada kõiki tulpasid. Kustuta tabel DROP TABLE abil"
fre "Vous ne pouvez effacer tous les champs avec ALTER TABLE. Utilisez DROP TABLE"
ger "Mit ALTER TABLE können nicht alle Felder auf einmal gelöscht werden. Dafür DROP TABLE verwenden"
greek "Δεν είναι δυνατή η διαγραφή όλων των πεδίων με ALTER TABLE. Παρακαλώ χρησιμοποιείστε DROP TABLE"
hun "Az osszes mezo nem torolheto az ALTER TABLE-lel. Hasznalja a DROP TABLE-t helyette"
ita "Non si possono cancellare tutti i campi con una ALTER TABLE. Utilizzare DROP TABLE"
- jpn "ALTER TABLE で全ての column は削除できません. DROP TABLE を使用してください"
+ jpn "ALTER TABLE では全ての列の削除はできません。DROP TABLE を使用してください。"
kor "ALTER TABLE 명령으로는 모든 칼럼을 지울 수 없습니다. DROP TABLE 명령을 이용하세요."
nor "En kan ikke slette alle felt med ALTER TABLE. Bruk DROP TABLE isteden."
norwegian-ny "Ein kan ikkje slette alle felt med ALTER TABLE. Bruk DROP TABLE istadenfor."
@@ -2192,18 +2136,17 @@ ER_CANT_REMOVE_ALL_FIELDS 42000
swe "Man kan inte radera alla fält med ALTER TABLE. Använd DROP TABLE istället"
ukr "Не можливо видалити всі стовбці за допомогою ALTER TABLE. Для цього скористайтеся DROP TABLE"
ER_CANT_DROP_FIELD_OR_KEY 42000
- cze "Nemohu zru-Bšit '%-.192s' (provést DROP). Zkontrolujte, zda neexistují záznamy/klíče"
+ cze "Nemohu zrušit '%-.192s' (provést DROP). Zkontrolujte, zda neexistují záznamy/klíče"
dan "Kan ikke udføre DROP '%-.192s'. Undersøg om feltet/nøglen eksisterer."
nla "Kan '%-.192s' niet weggooien. Controleer of het veld of de zoeksleutel daadwerkelijk bestaat."
eng "Can't DROP '%-.192s'; check that column/key exists"
- jps "'%-.192s' を破棄できませんでした; check that column/key exists",
est "Ei suuda kustutada '%-.192s'. Kontrolli kas tulp/võti eksisteerib"
fre "Ne peut effacer (DROP) '%-.192s'. Vérifiez s'il existe"
ger "Kann '%-.192s' nicht löschen. Existiert die Spalte oder der Schlüssel?"
greek "Αδύνατη η διαγραφή (DROP) '%-.192s'. Παρακαλώ ελέγξτε αν το πεδίο/κλειδί υπάρχει"
hun "A DROP '%-.192s' nem lehetseges. Ellenorizze, hogy a mezo/kulcs letezik-e"
ita "Impossibile cancellare '%-.192s'. Controllare che il campo chiave esista"
- jpn "'%-.192s' を破棄できませんでした; check that column/key exists"
+ jpn "'%-.192s' を削除できません。列/索引の存在を確認して下さい。"
kor "'%-.192s'를 DROP할 수 없습니다. 칼럼이나 키가 존재하는지 채크하세요."
nor "Kan ikke DROP '%-.192s'. Undersøk om felt/nøkkel eksisterer."
norwegian-ny "Kan ikkje DROP '%-.192s'. Undersøk om felt/nøkkel eksisterar."
@@ -2217,18 +2160,17 @@ ER_CANT_DROP_FIELD_OR_KEY 42000
swe "Kan inte ta bort '%-.192s'. Kontrollera att fältet/nyckel finns"
ukr "Не можу DROP '%-.192s'. Перевірте, чи цей стовбець/ключ існує"
ER_INSERT_INFO
- cze "Z-Báznamů: %ld Zdvojených: %ld Varování: %ld"
+ cze "Záznamů: %ld Zdvojených: %ld Varování: %ld"
dan "Poster: %ld Ens: %ld Advarsler: %ld"
nla "Records: %ld Dubbel: %ld Waarschuwing: %ld"
eng "Records: %ld Duplicates: %ld Warnings: %ld"
- jps "レコード数: %ld 重複数: %ld Warnings: %ld",
est "Kirjeid: %ld Kattuvaid: %ld Hoiatusi: %ld"
fre "Enregistrements: %ld Doublons: %ld Avertissements: %ld"
ger "Datensätze: %ld Duplikate: %ld Warnungen: %ld"
greek "Εγγραφές: %ld Επαναλήψεις: %ld Προειδοποιήσεις: %ld"
hun "Rekordok: %ld Duplikalva: %ld Warnings: %ld"
ita "Records: %ld Duplicati: %ld Avvertimenti: %ld"
- jpn "レコード数: %ld 重複数: %ld Warnings: %ld"
+ jpn "レコード数: %ld 重複数: %ld 警告: %ld"
kor "레코드: %ld개 중복: %ld개 경고: %ld개"
nor "Poster: %ld Like: %ld Advarsler: %ld"
norwegian-ny "Postar: %ld Like: %ld Åtvaringar: %ld"
@@ -2244,22 +2186,22 @@ ER_INSERT_INFO
ER_UPDATE_TABLE_USED
eng "You can't specify target table '%-.192s' for update in FROM clause"
ger "Die Verwendung der zu aktualisierenden Zieltabelle '%-.192s' ist in der FROM-Klausel nicht zulässig."
+ jpn "FROM句にある表 '%-.192s' はUPDATEの対象にできません。"
rus "Не допускается указание таблицы '%-.192s' в списке таблиц FROM для внесения в нее изменений"
swe "INSERT-table '%-.192s' får inte finnas i FROM tabell-listan"
ukr "Таблиця '%-.192s' що змінюється не дозволена у переліку таблиць FROM"
ER_NO_SUCH_THREAD
- cze "Nezn-Bámá identifikace threadu: %lu"
+ cze "Neznámá identifikace threadu: %lu"
dan "Ukendt tråd id: %lu"
nla "Onbekend thread id: %lu"
eng "Unknown thread id: %lu"
- jps "thread id: %lu はありません",
est "Tundmatu lõim: %lu"
fre "Numéro de tâche inconnu: %lu"
ger "Unbekannte Thread-ID: %lu"
greek "Αγνωστο thread id: %lu"
hun "Ervenytelen szal (thread) id: %lu"
ita "Thread id: %lu sconosciuto"
- jpn "thread id: %lu はありません"
+ jpn "不明なスレッドIDです: %lu"
kor "알수 없는 쓰레드 id: %lu"
nor "Ukjent tråd id: %lu"
norwegian-ny "Ukjent tråd id: %lu"
@@ -2273,18 +2215,17 @@ ER_NO_SUCH_THREAD
swe "Finns ingen tråd med id %lu"
ukr "Невідомий ідентифікатор гілки: %lu"
ER_KILL_DENIED_ERROR
- cze "Nejste vlastn-Bíkem threadu %lu"
+ cze "Nejste vlastníkem threadu %lu"
dan "Du er ikke ejer af tråden %lu"
nla "U bent geen bezitter van thread %lu"
eng "You are not owner of thread %lu"
- jps "thread %lu のオーナーではありません",
est "Ei ole lõime %lu omanik"
fre "Vous n'êtes pas propriétaire de la tâche no: %lu"
ger "Sie sind nicht Eigentümer von Thread %lu"
greek "Δεν είσθε owner του thread %lu"
hun "A %lu thread-nek mas a tulajdonosa"
ita "Utente non proprietario del thread %lu"
- jpn "thread %lu のオーナーではありません"
+ jpn "スレッド %lu のオーナーではありません。"
kor "쓰레드(Thread) %lu의 소유자가 아닙니다."
nor "Du er ikke eier av tråden %lu"
norwegian-ny "Du er ikkje eigar av tråd %lu"
@@ -2298,7 +2239,7 @@ ER_KILL_DENIED_ERROR
swe "Du är inte ägare till tråd %lu"
ukr "Ви не володар гілки %lu"
ER_NO_TABLES_USED
- cze "Nejsou pou-Bžity žádné tabulky"
+ cze "Nejsou použity žádné tabulky"
dan "Ingen tabeller i brug"
nla "Geen tabellen gebruikt."
eng "No tables used"
@@ -2308,6 +2249,7 @@ ER_NO_TABLES_USED
greek "Δεν χρησιμοποιήθηκαν πίνακες"
hun "Nincs hasznalt tabla"
ita "Nessuna tabella usata"
+ jpn "表が指定されていません。"
kor "어떤 테이블도 사용되지 않았습니다."
nor "Ingen tabeller i bruk"
norwegian-ny "Ingen tabellar i bruk"
@@ -2321,7 +2263,7 @@ ER_NO_TABLES_USED
swe "Inga tabeller angivna"
ukr "Не використано таблиць"
ER_TOO_BIG_SET
- cze "P-Bříliš mnoho řetězců pro sloupec %-.192s a SET"
+ cze "Příliš mnoho řetězců pro sloupec %-.192s a SET"
dan "For mange tekststrenge til specifikationen af SET i kolonne %-.192s"
nla "Teveel strings voor kolom %-.192s en SET"
eng "Too many strings for column %-.192s and SET"
@@ -2331,6 +2273,7 @@ ER_TOO_BIG_SET
greek "Πάρα πολλά strings για το πεδίο %-.192s και SET"
hun "Tul sok karakter: %-.192s es SET"
ita "Troppe stringhe per la colonna %-.192s e la SET"
+ jpn "SET型の列 '%-.192s' のメンバーの数が多すぎます。"
kor "칼럼 %-.192s와 SET에서 스트링이 너무 많습니다."
nor "For mange tekststrenger kolonne %-.192s og SET"
norwegian-ny "For mange tekststrengar felt %-.192s og SET"
@@ -2344,7 +2287,7 @@ ER_TOO_BIG_SET
swe "För många alternativ till kolumn %-.192s för SET"
ukr "Забагато строк для стовбця %-.192s та SET"
ER_NO_UNIQUE_LOGFILE
- cze "Nemohu vytvo-Břit jednoznačné jméno logovacího souboru %-.200s.(1-999)\n"
+ cze "Nemohu vytvořit jednoznačné jméno logovacího souboru %-.200s.(1-999)\n"
dan "Kan ikke lave unikt log-filnavn %-.200s.(1-999)\n"
nla "Het is niet mogelijk een unieke naam te maken voor de logfile %-.200s.(1-999)\n"
eng "Can't generate a unique log-filename %-.200s.(1-999)\n"
@@ -2354,6 +2297,7 @@ ER_NO_UNIQUE_LOGFILE
greek "Αδύνατη η δημιουργία unique log-filename %-.200s.(1-999)\n"
hun "Egyedi log-filenev nem generalhato: %-.200s.(1-999)\n"
ita "Impossibile generare un nome del file log unico %-.200s.(1-999)\n"
+ jpn "一意なログファイル名 %-.200s.(1-999) を生成できません。\n"
kor "Unique 로그화일 '%-.200s'를 만들수 없습니다.(1-999)\n"
nor "Kan ikke lage unikt loggfilnavn %-.200s.(1-999)\n"
norwegian-ny "Kan ikkje lage unikt loggfilnavn %-.200s.(1-999)\n"
@@ -2367,18 +2311,17 @@ ER_NO_UNIQUE_LOGFILE
swe "Kan inte generera ett unikt filnamn %-.200s.(1-999)\n"
ukr "Не можу згенерувати унікальне ім'я log-файлу %-.200s.(1-999)\n"
ER_TABLE_NOT_LOCKED_FOR_WRITE
- cze "Tabulka '%-.192s' byla zam-Bčena s READ a nemůže být změněna"
+ cze "Tabulka '%-.192s' byla zamčena s READ a nemůže být změněna"
dan "Tabellen '%-.192s' var låst med READ lås og kan ikke opdateres"
nla "Tabel '%-.192s' was gelocked met een lock om te lezen. Derhalve kunnen geen wijzigingen worden opgeslagen."
eng "Table '%-.192s' was locked with a READ lock and can't be updated"
- jps "Table '%-.192s' は READ lock になっていて、更新はできません",
est "Tabel '%-.192s' on lukustatud READ lukuga ning ei ole muudetav"
fre "Table '%-.192s' verrouillée lecture (READ): modification impossible"
ger "Tabelle '%-.192s' ist mit Lesesperre versehen und kann nicht aktualisiert werden"
greek "Ο πίνακας '%-.192s' έχει κλειδωθεί με READ lock και δεν επιτρέπονται αλλαγές"
hun "A(z) '%-.192s' tabla zarolva lett (READ lock) es nem lehet frissiteni"
ita "La tabella '%-.192s' e` soggetta a lock in lettura e non puo` essere aggiornata"
- jpn "Table '%-.192s' は READ lock になっていて、更新はできません"
+ jpn "表 '%-.192s' はREADロックされていて、更新できません。"
kor "테이블 '%-.192s'는 READ 락이 잠겨있어서 갱신할 수 없습니다."
nor "Tabellen '%-.192s' var låst med READ lås og kan ikke oppdateres"
norwegian-ny "Tabellen '%-.192s' var låst med READ lås og kan ikkje oppdaterast"
@@ -2392,18 +2335,17 @@ ER_TABLE_NOT_LOCKED_FOR_WRITE
swe "Tabell '%-.192s' kan inte uppdateras emedan den är låst för läsning"
ukr "Таблицю '%-.192s' заблоковано тільки для читання, тому її не можна оновити"
ER_TABLE_NOT_LOCKED
- cze "Tabulka '%-.192s' nebyla zam-Bčena s LOCK TABLES"
+ cze "Tabulka '%-.192s' nebyla zamčena s LOCK TABLES"
dan "Tabellen '%-.192s' var ikke låst med LOCK TABLES"
nla "Tabel '%-.192s' was niet gelocked met LOCK TABLES"
eng "Table '%-.192s' was not locked with LOCK TABLES"
- jps "Table '%-.192s' は LOCK TABLES によってロックされていません",
est "Tabel '%-.192s' ei ole lukustatud käsuga LOCK TABLES"
fre "Table '%-.192s' non verrouillée: utilisez LOCK TABLES"
ger "Tabelle '%-.192s' wurde nicht mit LOCK TABLES gesperrt"
greek "Ο πίνακας '%-.192s' δεν έχει κλειδωθεί με LOCK TABLES"
hun "A(z) '%-.192s' tabla nincs zarolva a LOCK TABLES-szel"
ita "Non e` stato impostato il lock per la tabella '%-.192s' con LOCK TABLES"
- jpn "Table '%-.192s' は LOCK TABLES によってロックされていません"
+ jpn "表 '%-.192s' は LOCK TABLES でロックされていません。"
kor "테이블 '%-.192s'는 LOCK TABLES 명령으로 잠기지 않았습니다."
nor "Tabellen '%-.192s' var ikke låst med LOCK TABLES"
norwegian-ny "Tabellen '%-.192s' var ikkje låst med LOCK TABLES"
@@ -2417,7 +2359,7 @@ ER_TABLE_NOT_LOCKED
swe "Tabell '%-.192s' är inte låst med LOCK TABLES"
ukr "Таблицю '%-.192s' не було блоковано з LOCK TABLES"
ER_BLOB_CANT_HAVE_DEFAULT 42000
- cze "Blob polo-Bžka '%-.192s' nemůže mít defaultní hodnotu"
+ cze "Blob položka '%-.192s' nemůže mít defaultní hodnotu"
dan "BLOB feltet '%-.192s' kan ikke have en standard værdi"
nla "Blob veld '%-.192s' can geen standaardwaarde bevatten"
eng "BLOB/TEXT column '%-.192s' can't have a default value"
@@ -2427,7 +2369,7 @@ ER_BLOB_CANT_HAVE_DEFAULT 42000
greek "Τα Blob πεδία '%-.192s' δεν μπορούν να έχουν προκαθορισμένες τιμές (default value)"
hun "A(z) '%-.192s' blob objektumnak nem lehet alapertelmezett erteke"
ita "Il campo BLOB '%-.192s' non puo` avere un valore di default"
- jpn "BLOB column '%-.192s' can't have a default value"
+ jpn "BLOB/TEXT 列 '%-.192s' にはデフォルト値を指定できません。"
kor "BLOB 칼럼 '%-.192s' 는 디폴트 값을 가질 수 없습니다."
nor "Blob feltet '%-.192s' kan ikke ha en standard verdi"
norwegian-ny "Blob feltet '%-.192s' kan ikkje ha ein standard verdi"
@@ -2441,18 +2383,17 @@ ER_BLOB_CANT_HAVE_DEFAULT 42000
swe "BLOB fält '%-.192s' kan inte ha ett DEFAULT-värde"
ukr "Стовбець BLOB '%-.192s' не може мати значення по замовчуванню"
ER_WRONG_DB_NAME 42000
- cze "Nep-Břípustné jméno databáze '%-.100s'"
+ cze "Nepřípustné jméno databáze '%-.100s'"
dan "Ugyldigt database navn '%-.100s'"
nla "Databasenaam '%-.100s' is niet getoegestaan"
eng "Incorrect database name '%-.100s'"
- jps "指定した database 名 '%-.100s' が間違っています",
est "Vigane andmebaasi nimi '%-.100s'"
fre "Nom de base de donnée illégal: '%-.100s'"
ger "Unerlaubter Datenbankname '%-.100s'"
greek "Λάθος όνομα βάσης δεδομένων '%-.100s'"
hun "Hibas adatbazisnev: '%-.100s'"
ita "Nome database errato '%-.100s'"
- jpn "指定した database 名 '%-.100s' が間違っています"
+ jpn "データベース名 '%-.100s' は不正です。"
kor "'%-.100s' 데이타베이스의 이름이 부정확합니다."
nor "Ugyldig database navn '%-.100s'"
norwegian-ny "Ugyldig database namn '%-.100s'"
@@ -2466,18 +2407,17 @@ ER_WRONG_DB_NAME 42000
swe "Felaktigt databasnamn '%-.100s'"
ukr "Невірне ім'я бази данних '%-.100s'"
ER_WRONG_TABLE_NAME 42000
- cze "Nep-Břípustné jméno tabulky '%-.100s'"
+ cze "Nepřípustné jméno tabulky '%-.100s'"
dan "Ugyldigt tabel navn '%-.100s'"
nla "Niet toegestane tabelnaam '%-.100s'"
eng "Incorrect table name '%-.100s'"
- jps "指定した table 名 '%-.100s' はまちがっています",
est "Vigane tabeli nimi '%-.100s'"
fre "Nom de table illégal: '%-.100s'"
ger "Unerlaubter Tabellenname '%-.100s'"
greek "Λάθος όνομα πίνακα '%-.100s'"
hun "Hibas tablanev: '%-.100s'"
ita "Nome tabella errato '%-.100s'"
- jpn "指定した table 名 '%-.100s' はまちがっています"
+ jpn "表名 '%-.100s' は不正です。"
kor "'%-.100s' 테이블 이름이 부정확합니다."
nor "Ugyldig tabell navn '%-.100s'"
norwegian-ny "Ugyldig tabell namn '%-.100s'"
@@ -2491,7 +2431,7 @@ ER_WRONG_TABLE_NAME 42000
swe "Felaktigt tabellnamn '%-.100s'"
ukr "Невірне ім'я таблиці '%-.100s'"
ER_TOO_BIG_SELECT 42000
- cze "Zadan-Bý SELECT by procházel příliš mnoho záznamů a trval velmi dlouho. Zkontrolujte tvar WHERE a je-li SELECT v pořádku, použijte SET SQL_BIG_SELECTS=1"
+ cze "Zadaný SELECT by procházel příliš mnoho záznamů a trval velmi dlouho. Zkontrolujte tvar WHERE a je-li SELECT v pořádku, použijte SET SQL_BIG_SELECTS=1"
dan "SELECT ville undersøge for mange poster og ville sandsynligvis tage meget lang tid. Undersøg WHERE delen og brug SET SQL_BIG_SELECTS=1 hvis udtrykket er korrekt"
nla "Het SELECT-statement zou te veel records analyseren en dus veel tijd in beslagnemen. Kijk het WHERE-gedeelte van de query na en kies SET SQL_BIG_SELECTS=1 als het stament in orde is."
eng "The SELECT would examine more than MAX_JOIN_SIZE rows; check your WHERE and use SET SQL_BIG_SELECTS=1 or SET MAX_JOIN_SIZE=# if the SELECT is okay"
@@ -2501,6 +2441,7 @@ ER_TOO_BIG_SELECT 42000
greek "Το SELECT θα εξετάσει μεγάλο αριθμό εγγραφών και πιθανώς θα καθυστερήσει. Παρακαλώ εξετάστε τις παραμέτρους του WHERE και χρησιμοποιείστε SET SQL_BIG_SELECTS=1 αν το SELECT είναι σωστό"
hun "A SELECT tul sok rekordot fog megvizsgalni es nagyon sokaig fog tartani. Ellenorizze a WHERE-t es hasznalja a SET SQL_BIG_SELECTS=1 beallitast, ha a SELECT okay"
ita "La SELECT dovrebbe esaminare troppi record e usare troppo tempo. Controllare la WHERE e usa SET SQL_BIG_SELECTS=1 se e` tutto a posto."
+ jpn "SELECTがMAX_JOIN_SIZEを超える行数を処理しました。WHERE句を確認し、SELECT文に問題がなければ、 SET SQL_BIG_SELECTS=1 または SET MAX_JOIN_SIZE=# を使用して下さい。"
kor "SELECT 명령에서 너무 많은 레코드를 찾기 때문에 많은 시간이 소요됩니다. 따라서 WHERE 문을 점검하거나, 만약 SELECT가 ok되면 SET SQL_BIG_SELECTS=1 옵션을 사용하세요."
nor "SELECT ville undersøke for mange poster og ville sannsynligvis ta veldig lang tid. Undersøk WHERE klausulen og bruk SET SQL_BIG_SELECTS=1 om SELECTen er korrekt"
norwegian-ny "SELECT ville undersøkje for mange postar og ville sannsynligvis ta veldig lang tid. Undersøk WHERE klausulen og bruk SET SQL_BIG_SELECTS=1 om SELECTen er korrekt"
@@ -2514,7 +2455,7 @@ ER_TOO_BIG_SELECT 42000
swe "Den angivna frågan skulle läsa mer än MAX_JOIN_SIZE rader. Kontrollera din WHERE och använd SET SQL_BIG_SELECTS=1 eller SET MAX_JOIN_SIZE=# ifall du vill hantera stora joins"
ukr "Запиту SELECT потрібно обробити багато записів, що, певне, займе дуже багато часу. Перевірте ваше WHERE та використовуйте SET SQL_BIG_SELECTS=1, якщо цей запит SELECT є вірним"
ER_UNKNOWN_ERROR
- cze "Nezn-Bámá chyba"
+ cze "Neznámá chyba"
dan "Ukendt fejl"
nla "Onbekende Fout"
eng "Unknown error"
@@ -2524,6 +2465,7 @@ ER_UNKNOWN_ERROR
greek "Προέκυψε άγνωστο λάθος"
hun "Ismeretlen hiba"
ita "Errore sconosciuto"
+ jpn "不明なエラー"
kor "알수 없는 에러입니다."
nor "Ukjent feil"
norwegian-ny "Ukjend feil"
@@ -2533,10 +2475,10 @@ ER_UNKNOWN_ERROR
serbian "Nepoznata greška"
slo "Neznámá chyba"
spa "Error desconocido"
- swe "Oidentifierat fel"
+ swe "Okänt fel"
ukr "Невідома помилка"
ER_UNKNOWN_PROCEDURE 42000
- cze "Nezn-Bámá procedura %-.192s"
+ cze "Neznámá procedura %-.192s"
dan "Ukendt procedure %-.192s"
nla "Onbekende procedure %-.192s"
eng "Unknown procedure '%-.192s'"
@@ -2546,6 +2488,7 @@ ER_UNKNOWN_PROCEDURE 42000
greek "Αγνωστη διαδικασία '%-.192s'"
hun "Ismeretlen eljaras: '%-.192s'"
ita "Procedura '%-.192s' sconosciuta"
+ jpn "'%-.192s' は不明なプロシージャです。"
kor "알수 없는 수행문 : '%-.192s'"
nor "Ukjent prosedyre %-.192s"
norwegian-ny "Ukjend prosedyre %-.192s"
@@ -2559,7 +2502,7 @@ ER_UNKNOWN_PROCEDURE 42000
swe "Okänd procedur: %-.192s"
ukr "Невідома процедура '%-.192s'"
ER_WRONG_PARAMCOUNT_TO_PROCEDURE 42000
- cze "Chybn-Bý počet parametrů procedury %-.192s"
+ cze "Chybný počet parametrů procedury %-.192s"
dan "Forkert antal parametre til proceduren %-.192s"
nla "Foutief aantal parameters doorgegeven aan procedure %-.192s"
eng "Incorrect parameter count to procedure '%-.192s'"
@@ -2569,6 +2512,7 @@ ER_WRONG_PARAMCOUNT_TO_PROCEDURE 42000
greek "Λάθος αριθμός παραμέτρων στη διαδικασία '%-.192s'"
hun "Rossz parameter a(z) '%-.192s'eljaras szamitasanal"
ita "Numero di parametri errato per la procedura '%-.192s'"
+ jpn "プロシージャ '%-.192s' へのパラメータ数が不正です。"
kor "'%-.192s' 수행문에 대한 부정확한 파라메터"
nor "Feil parameter antall til prosedyren %-.192s"
norwegian-ny "Feil parameter tal til prosedyra %-.192s"
@@ -2582,7 +2526,7 @@ ER_WRONG_PARAMCOUNT_TO_PROCEDURE 42000
swe "Felaktigt antal parametrar till procedur %-.192s"
ukr "Хибна кількість параметрів процедури '%-.192s'"
ER_WRONG_PARAMETERS_TO_PROCEDURE
- cze "Chybn-Bé parametry procedury %-.192s"
+ cze "Chybné parametry procedury %-.192s"
dan "Forkert(e) parametre til proceduren %-.192s"
nla "Foutieve parameters voor procedure %-.192s"
eng "Incorrect parameters to procedure '%-.192s'"
@@ -2592,6 +2536,7 @@ ER_WRONG_PARAMETERS_TO_PROCEDURE
greek "Λάθος παράμετροι στην διαδικασία '%-.192s'"
hun "Rossz parameter a(z) '%-.192s' eljarasban"
ita "Parametri errati per la procedura '%-.192s'"
+ jpn "プロシージャ '%-.192s' へのパラメータが不正です。"
kor "'%-.192s' 수행문에 대한 부정확한 파라메터"
nor "Feil parametre til prosedyren %-.192s"
norwegian-ny "Feil parameter til prosedyra %-.192s"
@@ -2605,7 +2550,7 @@ ER_WRONG_PARAMETERS_TO_PROCEDURE
swe "Felaktiga parametrar till procedur %-.192s"
ukr "Хибний параметер процедури '%-.192s'"
ER_UNKNOWN_TABLE 42S02
- cze "Nezn-Bámá tabulka '%-.192s' v %-.32s"
+ cze "Neznámá tabulka '%-.192s' v %-.32s"
dan "Ukendt tabel '%-.192s' i %-.32s"
nla "Onbekende tabel '%-.192s' in %-.32s"
eng "Unknown table '%-.192s' in %-.32s"
@@ -2615,7 +2560,7 @@ ER_UNKNOWN_TABLE 42S02
greek "Αγνωστος πίνακας '%-.192s' σε %-.32s"
hun "Ismeretlen tabla: '%-.192s' %-.32s-ban"
ita "Tabella '%-.192s' sconosciuta in %-.32s"
- jpn "Unknown table '%-.192s' in %-.32s"
+ jpn "'%-.192s' は %-.32s では不明な表です。"
kor "알수 없는 테이블 '%-.192s' (데이타베이스 %-.32s)"
nor "Ukjent tabell '%-.192s' i %-.32s"
norwegian-ny "Ukjend tabell '%-.192s' i %-.32s"
@@ -2629,7 +2574,7 @@ ER_UNKNOWN_TABLE 42S02
swe "Okänd tabell '%-.192s' i '%-.32s'"
ukr "Невідома таблиця '%-.192s' у %-.32s"
ER_FIELD_SPECIFIED_TWICE 42000
- cze "Polo-Bžka '%-.192s' je zadána dvakrát"
+ cze "Položka '%-.192s' je zadána dvakrát"
dan "Feltet '%-.192s' er anvendt to gange"
nla "Veld '%-.192s' is dubbel gespecificeerd"
eng "Column '%-.192s' specified twice"
@@ -2639,6 +2584,7 @@ ER_FIELD_SPECIFIED_TWICE 42000
greek "Το πεδίο '%-.192s' έχει ορισθεί δύο φορές"
hun "A(z) '%-.192s' mezot ketszer definialta"
ita "Campo '%-.192s' specificato 2 volte"
+ jpn "列 '%-.192s' は2回指定されています。"
kor "칼럼 '%-.192s'는 두번 정의되어 있읍니다."
nor "Feltet '%-.192s' er spesifisert to ganger"
norwegian-ny "Feltet '%-.192s' er spesifisert to gangar"
@@ -2652,7 +2598,7 @@ ER_FIELD_SPECIFIED_TWICE 42000
swe "Fält '%-.192s' är redan använt"
ukr "Стовбець '%-.192s' зазначено двічі"
ER_INVALID_GROUP_FUNC_USE
- cze "Nespr-Bávné použití funkce group"
+ cze "Nesprávné použití funkce group"
dan "Forkert brug af grupperings-funktion"
nla "Ongeldig gebruik van GROUP-functie"
eng "Invalid use of group function"
@@ -2662,6 +2608,7 @@ ER_INVALID_GROUP_FUNC_USE
greek "Εσφαλμένη χρήση της group function"
hun "A group funkcio ervenytelen hasznalata"
ita "Uso non valido di una funzione di raggruppamento"
+ jpn "集計関数の使用方法が不正です。"
kor "잘못된 그룹 함수를 사용하였습니다."
por "Uso inválido de função de agrupamento (GROUP)"
rum "Folosire incorecta a functiei group"
@@ -2672,7 +2619,7 @@ ER_INVALID_GROUP_FUNC_USE
swe "Felaktig användning av SQL grupp function"
ukr "Хибне використання функції групування"
ER_UNSUPPORTED_EXTENSION 42000
- cze "Tabulka '%-.192s' pou-Bžívá rozšíření, které v této verzi MariaDB není"
+ cze "Tabulka '%-.192s' používá rozšíření, které v této verzi MySQL není"
dan "Tabellen '%-.192s' bruger et filtypenavn som ikke findes i denne MariaDB version"
nla "Tabel '%-.192s' gebruikt een extensie, die niet in deze MariaDB-versie voorkomt."
eng "Table '%-.192s' uses an extension that doesn't exist in this MariaDB version"
@@ -2682,6 +2629,7 @@ ER_UNSUPPORTED_EXTENSION 42000
greek "Ο πίνακς '%-.192s' χρησιμοποιεί κάποιο extension που δεν υπάρχει στην έκδοση αυτή της MariaDB"
hun "A(z) '%-.192s' tabla olyan bovitest hasznal, amely nem letezik ebben a MariaDB versioban."
ita "La tabella '%-.192s' usa un'estensione che non esiste in questa versione di MariaDB"
+ jpn "表 '%-.192s' は、このMySQLバージョンには無い機能を使用しています。"
kor "테이블 '%-.192s'는 확장명령을 이용하지만 현재의 MariaDB 버젼에서는 존재하지 않습니다."
nor "Table '%-.192s' uses a extension that doesn't exist in this MariaDB version"
norwegian-ny "Table '%-.192s' uses a extension that doesn't exist in this MariaDB version"
@@ -2695,18 +2643,17 @@ ER_UNSUPPORTED_EXTENSION 42000
swe "Tabell '%-.192s' har en extension som inte finns i denna version av MariaDB"
ukr "Таблиця '%-.192s' використовує розширення, що не існує у цій версії MariaDB"
ER_TABLE_MUST_HAVE_COLUMNS 42000
- cze "Tabulka mus-Bí mít alespoň jeden sloupec"
+ cze "Tabulka musí mít alespoň jeden sloupec"
dan "En tabel skal have mindst een kolonne"
nla "Een tabel moet minstens 1 kolom bevatten"
eng "A table must have at least 1 column"
- jps "テーブルは最低 1 個の column が必要です",
est "Tabelis peab olema vähemalt üks tulp"
fre "Une table doit comporter au moins une colonne"
ger "Eine Tabelle muss mindestens eine Spalte besitzen"
greek "Ενας πίνακας πρέπει να έχει τουλάχιστον ένα πεδίο"
hun "A tablanak legalabb egy oszlopot tartalmazni kell"
ita "Una tabella deve avere almeno 1 colonna"
- jpn "テーブルは最低 1 個の column が必要です"
+ jpn "表には最低でも1個の列が必要です。"
kor "하나의 테이블에서는 적어도 하나의 칼럼이 존재하여야 합니다."
por "Uma tabela tem que ter pelo menos uma (1) coluna"
rum "O tabela trebuie sa aiba cel putin o coloana"
@@ -2717,18 +2664,17 @@ ER_TABLE_MUST_HAVE_COLUMNS 42000
swe "Tabeller måste ha minst 1 kolumn"
ukr "Таблиця повинна мати хочаб один стовбець"
ER_RECORD_FILE_FULL
- cze "Tabulka '%-.192s' je pln-Bá"
+ cze "Tabulka '%-.192s' je plná"
dan "Tabellen '%-.192s' er fuld"
nla "De tabel '%-.192s' is vol"
eng "The table '%-.192s' is full"
- jps "table '%-.192s' はいっぱいです",
est "Tabel '%-.192s' on täis"
fre "La table '%-.192s' est pleine"
ger "Tabelle '%-.192s' ist voll"
greek "Ο πίνακας '%-.192s' είναι γεμάτος"
hun "A '%-.192s' tabla megtelt"
ita "La tabella '%-.192s' e` piena"
- jpn "table '%-.192s' はいっぱいです"
+ jpn "表 '%-.192s' は満杯です。"
kor "테이블 '%-.192s'가 full났습니다. "
por "Tabela '%-.192s' está cheia"
rum "Tabela '%-.192s' e plina"
@@ -2739,18 +2685,17 @@ ER_RECORD_FILE_FULL
swe "Tabellen '%-.192s' är full"
ukr "Таблиця '%-.192s' заповнена"
ER_UNKNOWN_CHARACTER_SET 42000
- cze "Nezn-Bámá znaková sada: '%-.64s'"
+ cze "Neznámá znaková sada: '%-.64s'"
dan "Ukendt tegnsæt: '%-.64s'"
nla "Onbekende character set: '%-.64s'"
eng "Unknown character set: '%-.64s'"
- jps "character set '%-.64s' はサポートしていません",
est "Vigane kooditabel '%-.64s'"
fre "Jeu de caractères inconnu: '%-.64s'"
ger "Unbekannter Zeichensatz: '%-.64s'"
greek "Αγνωστο character set: '%-.64s'"
hun "Ervenytelen karakterkeszlet: '%-.64s'"
ita "Set di caratteri '%-.64s' sconosciuto"
- jpn "character set '%-.64s' はサポートしていません"
+ jpn "不明な文字コードセット: '%-.64s'"
kor "알수없는 언어 Set: '%-.64s'"
por "Conjunto de caracteres '%-.64s' desconhecido"
rum "Set de caractere invalid: '%-.64s'"
@@ -2761,18 +2706,17 @@ ER_UNKNOWN_CHARACTER_SET 42000
swe "Okänd teckenuppsättning: '%-.64s'"
ukr "Невідома кодова таблиця: '%-.64s'"
ER_TOO_MANY_TABLES
- cze "P-Bříliš mnoho tabulek, MariaDB jich může mít v joinu jen %d"
+ cze "Příliš mnoho tabulek, MySQL jich může mít v joinu jen %d"
dan "For mange tabeller. MariaDB kan kun bruge %d tabeller i et join"
nla "Teveel tabellen. MariaDB kan slechts %d tabellen in een join bevatten"
eng "Too many tables; MariaDB can only use %d tables in a join"
- jps "テーブルが多すぎます; MariaDB can only use %d tables in a join",
est "Liiga palju tabeleid. MariaDB suudab JOINiga ühendada kuni %d tabelit"
fre "Trop de tables. MariaDB ne peut utiliser que %d tables dans un JOIN"
ger "Zu viele Tabellen. MariaDB kann in einem Join maximal %d Tabellen verwenden"
greek "Πολύ μεγάλος αριθμός πινάκων. Η MariaDB μπορεί να χρησιμοποιήσει %d πίνακες σε διαδικασία join"
hun "Tul sok tabla. A MariaDB csak %d tablat tud kezelni osszefuzeskor"
ita "Troppe tabelle. MariaDB puo` usare solo %d tabelle in una join"
- jpn "テーブルが多すぎます; MariaDB can only use %d tables in a join"
+ jpn "表が多すぎます。MySQLがJOINできる表は %d 個までです。"
kor "너무 많은 테이블이 Join되었습니다. MariaDB에서는 JOIN시 %d개의 테이블만 사용할 수 있습니다."
por "Tabelas demais. O MariaDB pode usar somente %d tabelas em uma junção (JOIN)"
rum "Prea multe tabele. MariaDB nu poate folosi mai mult de %d tabele intr-un join"
@@ -2783,18 +2727,17 @@ ER_TOO_MANY_TABLES
swe "För många tabeller. MariaDB can ha högst %d tabeller i en och samma join"
ukr "Забагато таблиць. MariaDB може використовувати лише %d таблиць у об'єднанні"
ER_TOO_MANY_FIELDS
- cze "P-Bříliš mnoho položek"
+ cze "Příliš mnoho položek"
dan "For mange felter"
nla "Te veel velden"
eng "Too many columns"
- jps "column が多すぎます",
est "Liiga palju tulpasid"
fre "Trop de champs"
ger "Zu viele Felder"
greek "Πολύ μεγάλος αριθμός πεδίων"
hun "Tul sok mezo"
ita "Troppi campi"
- jpn "column が多すぎます"
+ jpn "列が多すぎます。"
kor "칼럼이 너무 많습니다."
por "Colunas demais"
rum "Prea multe coloane"
@@ -2805,18 +2748,17 @@ ER_TOO_MANY_FIELDS
swe "För många fält"
ukr "Забагато стовбців"
ER_TOO_BIG_ROWSIZE 42000
- cze "-BŘádek je příliš velký. Maximální velikost řádku, nepočítaje položky blob, je %ld. Musíte změnit některé položky na blob"
+ cze "Řádek je příliš velký. Maximální velikost řádku, nepočítaje položky blob, je %ld. Musíte změnit některé položky na blob"
dan "For store poster. Max post størrelse, uden BLOB's, er %ld. Du må lave nogle felter til BLOB's"
nla "Rij-grootte is groter dan toegestaan. Maximale rij grootte, blobs niet meegeteld, is %ld. U dient sommige velden in blobs te veranderen."
eng "Row size too large. The maximum row size for the used table type, not counting BLOBs, is %ld. This includes storage overhead, check the manual. You have to change some columns to TEXT or BLOBs"
- jps "row size が大きすぎます. BLOB を含まない場合の row size の最大は %ld です. いくつかの field を BLOB に変えてください.",
est "Liiga pikk kirje. Kirje maksimumpikkus arvestamata BLOB-tüüpi välju on %ld. Muuda mõned väljad BLOB-tüüpi väljadeks"
fre "Ligne trop grande. Le taille maximale d'une ligne, sauf les BLOBs, est %ld. Changez le type de quelques colonnes en BLOB"
ger "Zeilenlänge zu groß. Die maximale Zeilenlänge für den verwendeten Tabellentyp (ohne BLOB-Felder) beträgt %ld. Einige Felder müssen in BLOB oder TEXT umgewandelt werden"
greek "Πολύ μεγάλο μέγεθος εγγραφής. Το μέγιστο μέγεθος εγγραφής, χωρίς να υπολογίζονται τα blobs, είναι %ld. Πρέπει να ορίσετε κάποια πεδία σαν blobs"
hun "Tul nagy sormeret. A maximalis sormeret (nem szamolva a blob objektumokat) %ld. Nehany mezot meg kell valtoztatnia"
ita "Riga troppo grande. La massima grandezza di una riga, non contando i BLOB, e` %ld. Devi cambiare alcuni campi in BLOB"
- jpn "row size が大きすぎます. BLOB を含まない場合の row size の最大は %ld です. いくつかの field を BLOB に変えてください."
+ jpn "行サイズが大きすぎます。この表の最大行サイズは BLOB を含まずに %ld です。格納時のオーバーヘッドも含まれます(マニュアルを確認してください)。列をTEXTまたはBLOBに変更する必要があります。"
kor "너무 큰 row 사이즈입니다. BLOB를 계산하지 않고 최대 row 사이즈는 %ld입니다. 얼마간의 필드들을 BLOB로 바꾸셔야 겠군요.."
por "Tamanho de linha grande demais. O máximo tamanho de linha, não contando BLOBs, é %ld. Você tem que mudar alguns campos para BLOBs"
rum "Marimea liniei (row) prea mare. Marimea maxima a liniei, excluzind BLOB-urile este de %ld. Trebuie sa schimbati unele cimpuri in BLOB-uri"
@@ -2827,17 +2769,16 @@ ER_TOO_BIG_ROWSIZE 42000
swe "För stor total radlängd. Den högst tillåtna radlängden, förutom BLOBs, är %ld. Ändra några av dina fält till BLOB"
ukr "Задовга строка. Найбільшою довжиною строки, не рахуючи BLOB, є %ld. Вам потрібно привести деякі стовбці до типу BLOB"
ER_STACK_OVERRUN
- cze "P-Břetečení zásobníku threadu: použito %ld z %ld. Použijte 'mysqld --thread_stack=#' k zadání většího zásobníku"
+ cze "Přetečení zásobníku threadu: použito %ld z %ld. Použijte 'mysqld --thread_stack=#' k zadání většího zásobníku"
dan "Thread stack brugt: Brugt: %ld af en %ld stak. Brug 'mysqld --thread_stack=#' for at allokere en større stak om nødvendigt"
nla "Thread stapel overrun: Gebruikte: %ld van een %ld stack. Gebruik 'mysqld --thread_stack=#' om een grotere stapel te definieren (indien noodzakelijk)."
eng "Thread stack overrun: Used: %ld of a %ld stack. Use 'mysqld --thread_stack=#' to specify a bigger stack if needed"
- jps "Thread stack overrun: Used: %ld of a %ld stack. スタック領域を多くとりたい場合、'mysqld --thread_stack=#' と指定してください",
fre "Débordement de la pile des tâches (Thread stack). Utilisées: %ld pour une pile de %ld. Essayez 'mysqld --thread_stack=#' pour indiquer une plus grande valeur"
ger "Thread-Stack-Überlauf. Benutzt: %ld von %ld Stack. 'mysqld --thread_stack=#' verwenden, um bei Bedarf einen größeren Stack anzulegen"
greek "Stack overrun στο thread: Used: %ld of a %ld stack. Παρακαλώ χρησιμοποιείστε 'mysqld --thread_stack=#' για να ορίσετε ένα μεγαλύτερο stack αν χρειάζεται"
hun "Thread verem tullepes: Used: %ld of a %ld stack. Hasznalja a 'mysqld --thread_stack=#' nagyobb verem definialasahoz"
ita "Thread stack overrun: Usati: %ld di uno stack di %ld. Usa 'mysqld --thread_stack=#' per specificare uno stack piu` grande."
- jpn "Thread stack overrun: Used: %ld of a %ld stack. スタック領域を多くとりたい場合、'mysqld --thread_stack=#' と指定してください"
+ jpn "スレッドスタック不足です(使用: %ld ; サイズ: %ld)。必要に応じて、より大きい値で 'mysqld --thread_stack=#' の指定をしてください。"
kor "쓰레드 스택이 넘쳤습니다. 사용: %ld개 스택: %ld개. 만약 필요시 더큰 스택을 원할때에는 'mysqld --thread_stack=#' 를 정의하세요"
por "Estouro da pilha do 'thread'. Usados %ld de uma pilha de %ld. Use 'mysqld --thread_stack=#' para especificar uma pilha maior, se necessário"
rum "Stack-ul thread-ului a fost depasit (prea mic): Folositi: %ld intr-un stack de %ld. Folositi 'mysqld --thread_stack=#' ca sa specifici un stack mai mare"
@@ -2848,7 +2789,7 @@ ER_STACK_OVERRUN
swe "Trådstacken tog slut: Har använt %ld av %ld bytes. Använd 'mysqld --thread_stack=#' ifall du behöver en större stack"
ukr "Стек гілок переповнено: Використано: %ld з %ld. Використовуйте 'mysqld --thread_stack=#' аби зазначити більший стек, якщо необхідно"
ER_WRONG_OUTER_JOIN 42000
- cze "V OUTER JOIN byl nalezen k-Břížový odkaz. Prověřte ON podmínky"
+ cze "V OUTER JOIN byl nalezen křížový odkaz. Prověřte ON podmínky"
dan "Krydsreferencer fundet i OUTER JOIN; check dine ON conditions"
nla "Gekruiste afhankelijkheid gevonden in OUTER JOIN. Controleer uw ON-conditions"
eng "Cross dependency found in OUTER JOIN; examine your ON conditions"
@@ -2858,6 +2799,7 @@ ER_WRONG_OUTER_JOIN 42000
greek "Cross dependency βρέθηκε σε OUTER JOIN. Παρακαλώ εξετάστε τις συνθήκες που θέσατε στο ON"
hun "Keresztfuggoseg van az OUTER JOIN-ban. Ellenorizze az ON felteteleket"
ita "Trovata una dipendenza incrociata nella OUTER JOIN. Controlla le condizioni ON"
+ jpn "OUTER JOINに相互依存が見つかりました。ON句の条件を確認して下さい。"
por "Dependência cruzada encontrada em junção externa (OUTER JOIN); examine as condições utilizadas nas cláusulas 'ON'"
rum "Dependinta incrucisata (cross dependency) gasita in OUTER JOIN. Examinati conditiile ON"
rus "В OUTER JOIN обнаружена перекрестная зависимость. Внимательно проанализируйте свои условия ON"
@@ -2870,18 +2812,17 @@ ER_NULL_COLUMN_IN_INDEX 42000
eng "Table handler doesn't support NULL in given index. Please change column '%-.192s' to be NOT NULL or use another handler"
swe "Tabell hanteraren kan inte indexera NULL kolumner för den givna index typen. Ändra '%-.192s' till NOT NULL eller använd en annan hanterare"
ER_CANT_FIND_UDF
- cze "Nemohu na-Bčíst funkci '%-.192s'"
+ cze "Nemohu načíst funkci '%-.192s'"
dan "Kan ikke læse funktionen '%-.192s'"
nla "Kan functie '%-.192s' niet laden"
eng "Can't load function '%-.192s'"
- jps "function '%-.192s' を ロードできません",
est "Ei suuda avada funktsiooni '%-.192s'"
fre "Imposible de charger la fonction '%-.192s'"
ger "Kann Funktion '%-.192s' nicht laden"
greek "Δεν είναι δυνατή η διαδικασία load για τη συνάρτηση '%-.192s'"
hun "A(z) '%-.192s' fuggveny nem toltheto be"
ita "Impossibile caricare la funzione '%-.192s'"
- jpn "function '%-.192s' を ロードできません"
+ jpn "関数 '%-.192s' をロードできません。"
kor "'%-.192s' 함수를 로드하지 못했습니다."
por "Não pode carregar a função '%-.192s'"
rum "Nu pot incarca functia '%-.192s'"
@@ -2896,14 +2837,13 @@ ER_CANT_INITIALIZE_UDF
dan "Kan ikke starte funktionen '%-.192s'; %-.80s"
nla "Kan functie '%-.192s' niet initialiseren; %-.80s"
eng "Can't initialize function '%-.192s'; %-.80s"
- jps "function '%-.192s' を初期化できません; %-.80s",
est "Ei suuda algväärtustada funktsiooni '%-.192s'; %-.80s"
fre "Impossible d'initialiser la fonction '%-.192s'; %-.80s"
ger "Kann Funktion '%-.192s' nicht initialisieren: %-.80s"
greek "Δεν είναι δυνατή η έναρξη της συνάρτησης '%-.192s'; %-.80s"
hun "A(z) '%-.192s' fuggveny nem inicializalhato; %-.80s"
ita "Impossibile inizializzare la funzione '%-.192s'; %-.80s"
- jpn "function '%-.192s' を初期化できません; %-.80s"
+ jpn "関数 '%-.192s' を初期化できません。; %-.80s"
kor "'%-.192s' 함수를 초기화 하지 못했습니다.; %-.80s"
por "Não pode inicializar a função '%-.192s' - '%-.80s'"
rum "Nu pot initializa functia '%-.192s'; %-.80s"
@@ -2914,18 +2854,17 @@ ER_CANT_INITIALIZE_UDF
swe "Kan inte initialisera funktionen '%-.192s'; '%-.80s'"
ukr "Не можу ініціалізувати функцію '%-.192s'; %-.80s"
ER_UDF_NO_PATHS
- cze "Pro sd-Bílenou knihovnu nejsou povoleny cesty"
+ cze "Pro sdílenou knihovnu nejsou povoleny cesty"
dan "Angivelse af sti ikke tilladt for delt bibliotek"
nla "Geen pad toegestaan voor shared library"
eng "No paths allowed for shared library"
- jps "shared library へのパスが通っていません",
est "Teegi nimes ei tohi olla kataloogi"
fre "Chemin interdit pour les bibliothèques partagées"
ger "Keine Pfade gestattet für Shared Library"
greek "Δεν βρέθηκαν paths για την shared library"
hun "Nincs ut a megosztott konyvtarakhoz (shared library)"
ita "Non sono ammessi path per le librerie condivisa"
- jpn "shared library へのパスが通っていません"
+ jpn "共有ライブラリにはパスを指定できません。"
kor "공유 라이버러리를 위한 패스가 정의되어 있지 않습니다."
por "Não há caminhos (paths) permitidos para biblioteca compartilhada"
rum "Nici un paths nu e permis pentru o librarie shared"
@@ -2936,18 +2875,17 @@ ER_UDF_NO_PATHS
swe "Man får inte ange sökväg för dynamiska bibliotek"
ukr "Не дозволено використовувати путі для розділюваних бібліотек"
ER_UDF_EXISTS
- cze "Funkce '%-.192s' ji-Bž existuje"
+ cze "Funkce '%-.192s' již existuje"
dan "Funktionen '%-.192s' findes allerede"
nla "Functie '%-.192s' bestaat reeds"
eng "Function '%-.192s' already exists"
- jps "Function '%-.192s' は既に定義されています",
est "Funktsioon '%-.192s' juba eksisteerib"
fre "La fonction '%-.192s' existe déjà"
ger "Funktion '%-.192s' existiert schon"
greek "Η συνάρτηση '%-.192s' υπάρχει ήδη"
hun "A '%-.192s' fuggveny mar letezik"
ita "La funzione '%-.192s' esiste gia`"
- jpn "Function '%-.192s' は既に定義されています"
+ jpn "関数 '%-.192s' はすでに定義されています。"
kor "'%-.192s' 함수는 이미 존재합니다."
por "Função '%-.192s' já existe"
rum "Functia '%-.192s' exista deja"
@@ -2958,18 +2896,17 @@ ER_UDF_EXISTS
swe "Funktionen '%-.192s' finns redan"
ukr "Функція '%-.192s' вже існує"
ER_CANT_OPEN_LIBRARY
- cze "Nemohu otev-Břít sdílenou knihovnu '%-.192s' (errno: %d %-.128s)"
+ cze "Nemohu otevřít sdílenou knihovnu '%-.192s' (errno: %d %-.128s)"
dan "Kan ikke åbne delt bibliotek '%-.192s' (errno: %d %-.128s)"
nla "Kan shared library '%-.192s' niet openen (Errcode: %d %-.128s)"
eng "Can't open shared library '%-.192s' (errno: %d %-.128s)"
- jps "shared library '%-.192s' を開く事ができません (errno: %d %-.128s)",
est "Ei suuda avada jagatud teeki '%-.192s' (veakood: %d %-.128s)"
fre "Impossible d'ouvrir la bibliothèque partagée '%-.192s' (errno: %d %-.128s)"
ger "Kann Shared Library '%-.192s' nicht öffnen (Fehler: %d %-.128s)"
greek "Δεν είναι δυνατή η ανάγνωση της shared library '%-.192s' (κωδικός λάθους: %d %-.128s)"
hun "A(z) '%-.192s' megosztott konyvtar nem hasznalhato (hibakod: %d %-.128s)"
ita "Impossibile aprire la libreria condivisa '%-.192s' (errno: %d %-.128s)"
- jpn "shared library '%-.192s' を開く事ができません (errno: %d %-.128s)"
+ jpn "共有ライブラリ '%-.192s' を開く事ができません。(エラー番号: %d %-.128s)"
kor "'%-.192s' 공유 라이버러리를 열수 없습니다.(에러번호: %d %-.128s)"
nor "Can't open shared library '%-.192s' (errno: %d %-.128s)"
norwegian-ny "Can't open shared library '%-.192s' (errno: %d %-.128s)"
@@ -2983,18 +2920,17 @@ ER_CANT_OPEN_LIBRARY
swe "Kan inte öppna det dynamiska biblioteket '%-.192s' (Felkod: %d %-.128s)"
ukr "Не можу відкрити розділювану бібліотеку '%-.192s' (помилка: %d %-.128s)"
ER_CANT_FIND_DL_ENTRY
- cze "Nemohu naj-Bít funkci '%-.128s' v knihovně"
+ cze "Nemohu najít funkci '%-.128s' v knihovně"
dan "Kan ikke finde funktionen '%-.128s' i bibliotek"
nla "Kan functie '%-.128s' niet in library vinden"
eng "Can't find symbol '%-.128s' in library"
- jps "function '%-.128s' をライブラリー中に見付ける事ができません",
est "Ei leia funktsiooni '%-.128s' antud teegis"
fre "Impossible de trouver la fonction '%-.128s' dans la bibliothèque"
ger "Kann Funktion '%-.128s' in der Library nicht finden"
greek "Δεν είναι δυνατή η ανεύρεση της συνάρτησης '%-.128s' στην βιβλιοθήκη"
hun "A(z) '%-.128s' fuggveny nem talalhato a konyvtarban"
ita "Impossibile trovare la funzione '%-.128s' nella libreria"
- jpn "function '%-.128s' をライブラリー中に見付ける事ができません"
+ jpn "関数 '%-.128s' は共有ライブラリー中にありません。"
kor "라이버러리에서 '%-.128s' 함수를 찾을 수 없습니다."
por "Não pode encontrar a função '%-.128s' na biblioteca"
rum "Nu pot gasi functia '%-.128s' in libraria"
@@ -3005,18 +2941,17 @@ ER_CANT_FIND_DL_ENTRY
swe "Hittar inte funktionen '%-.128s' in det dynamiska biblioteket"
ukr "Не можу знайти функцію '%-.128s' у бібліотеці"
ER_FUNCTION_NOT_DEFINED
- cze "Funkce '%-.192s' nen-Bí definována"
+ cze "Funkce '%-.192s' není definována"
dan "Funktionen '%-.192s' er ikke defineret"
nla "Functie '%-.192s' is niet gedefinieerd"
eng "Function '%-.192s' is not defined"
- jps "Function '%-.192s' は定義されていません",
est "Funktsioon '%-.192s' ei ole defineeritud"
fre "La fonction '%-.192s' n'est pas définie"
ger "Funktion '%-.192s' ist nicht definiert"
greek "Η συνάρτηση '%-.192s' δεν έχει ορισθεί"
hun "A '%-.192s' fuggveny nem definialt"
ita "La funzione '%-.192s' non e` definita"
- jpn "Function '%-.192s' は定義されていません"
+ jpn "関数 '%-.192s' は定義されていません。"
kor "'%-.192s' 함수가 정의되어 있지 않습니다."
por "Função '%-.192s' não está definida"
rum "Functia '%-.192s' nu e definita"
@@ -3027,18 +2962,17 @@ ER_FUNCTION_NOT_DEFINED
swe "Funktionen '%-.192s' är inte definierad"
ukr "Функцію '%-.192s' не визначено"
ER_HOST_IS_BLOCKED
- cze "Stroj '%-.64s' je zablokov-Bán kvůli mnoha chybám při připojování. Odblokujete použitím 'mysqladmin flush-hosts'"
+ cze "Stroj '%-.64s' je zablokován kvůli mnoha chybám při připojování. Odblokujete použitím 'mysqladmin flush-hosts'"
dan "Værten '%-.64s' er blokeret på grund af mange fejlforespørgsler. Lås op med 'mysqladmin flush-hosts'"
nla "Host '%-.64s' is geblokkeeerd vanwege te veel verbindings fouten. Deblokkeer met 'mysqladmin flush-hosts'"
eng "Host '%-.64s' is blocked because of many connection errors; unblock with 'mysqladmin flush-hosts'"
- jps "Host '%-.64s' は many connection error のため、拒否されました. 'mysqladmin flush-hosts' で解除してください",
est "Masin '%-.64s' on blokeeritud hulgaliste ühendusvigade tõttu. Blokeeringu saab tühistada 'mysqladmin flush-hosts' käsuga"
fre "L'hôte '%-.64s' est bloqué à cause d'un trop grand nombre d'erreur de connexion. Débloquer le par 'mysqladmin flush-hosts'"
ger "Host '%-.64s' blockiert wegen zu vieler Verbindungsfehler. Aufheben der Blockierung mit 'mysqladmin flush-hosts'"
greek "Ο υπολογιστής '%-.64s' έχει αποκλεισθεί λόγω πολλαπλών λαθών σύνδεσης. Προσπαθήστε να διορώσετε με 'mysqladmin flush-hosts'"
hun "A '%-.64s' host blokkolodott, tul sok kapcsolodasi hiba miatt. Hasznalja a 'mysqladmin flush-hosts' parancsot"
ita "Sistema '%-.64s' bloccato a causa di troppi errori di connessione. Per sbloccarlo: 'mysqladmin flush-hosts'"
- jpn "Host '%-.64s' は many connection error のため、拒否されました. 'mysqladmin flush-hosts' で解除してください"
+ jpn "接続エラーが多いため、ホスト '%-.64s' は拒否されました。'mysqladmin flush-hosts' で解除できます。"
kor "너무 많은 연결오류로 인하여 호스트 '%-.64s'는 블락되었습니다. 'mysqladmin flush-hosts'를 이용하여 블락을 해제하세요"
por "'Host' '%-.64s' está bloqueado devido a muitos erros de conexão. Desbloqueie com 'mysqladmin flush-hosts'"
rum "Host-ul '%-.64s' e blocat din cauza multelor erori de conectie. Poti deploca folosind 'mysqladmin flush-hosts'"
@@ -3048,18 +2982,17 @@ ER_HOST_IS_BLOCKED
swe "Denna dator, '%-.64s', är blockerad pga många felaktig paket. Gör 'mysqladmin flush-hosts' för att ta bort alla blockeringarna"
ukr "Хост '%-.64s' заблоковано з причини великої кількості помилок з'єднання. Для розблокування використовуйте 'mysqladmin flush-hosts'"
ER_HOST_NOT_PRIVILEGED
- cze "Stroj '%-.64s' nem-Bá povoleno se k tomuto MariaDB serveru připojit"
+ cze "Stroj '%-.64s' nemá povoleno se k tomuto MySQL serveru připojit"
dan "Værten '%-.64s' kan ikke tilkoble denne MariaDB-server"
nla "Het is host '%-.64s' is niet toegestaan verbinding te maken met deze MariaDB server"
eng "Host '%-.64s' is not allowed to connect to this MariaDB server"
- jps "Host '%-.64s' は MariaDB server に接続を許可されていません",
est "Masinal '%-.64s' puudub ligipääs sellele MariaDB serverile"
fre "Le hôte '%-.64s' n'est pas authorisé à se connecter à ce serveur MariaDB"
ger "Host '%-.64s' hat keine Berechtigung, sich mit diesem MariaDB-Server zu verbinden"
greek "Ο υπολογιστής '%-.64s' δεν έχει δικαίωμα σύνδεσης με τον MariaDB server"
hun "A '%-.64s' host szamara nem engedelyezett a kapcsolodas ehhez a MariaDB szerverhez"
ita "Al sistema '%-.64s' non e` consentita la connessione a questo server MariaDB"
- jpn "Host '%-.64s' は MariaDB server に接続を許可されていません"
+ jpn "ホスト '%-.64s' からのこの MySQL server への接続は許可されていません。"
kor "'%-.64s' 호스트는 이 MariaDB서버에 접속할 허가를 받지 못했습니다."
por "'Host' '%-.64s' não tem permissão para se conectar com este servidor MariaDB"
rum "Host-ul '%-.64s' nu este permis a se conecta la aceste server MariaDB"
@@ -3069,18 +3002,17 @@ ER_HOST_NOT_PRIVILEGED
swe "Denna dator, '%-.64s', har inte privileger att använda denna MariaDB server"
ukr "Хосту '%-.64s' не доволено зв'язуватись з цим сервером MariaDB"
ER_PASSWORD_ANONYMOUS_USER 42000
- cze "Pou-Bžíváte MariaDB jako anonymní uživatel a anonymní uživatelé nemají povoleno měnit hesla"
+ cze "Používáte MySQL jako anonymní uživatel a anonymní uživatelé nemají povoleno měnit hesla"
dan "Du bruger MariaDB som anonym bruger. Anonyme brugere må ikke ændre adgangskoder"
nla "U gebruikt MariaDB als anonieme gebruiker en deze mogen geen wachtwoorden wijzigen"
eng "You are using MariaDB as an anonymous user and anonymous users are not allowed to change passwords"
- jps "MariaDB を anonymous users で使用している状態では、パスワードの変更はできません",
est "Te kasutate MariaDB-i anonüümse kasutajana, kelledel pole parooli muutmise õigust"
fre "Vous utilisez un utilisateur anonyme et les utilisateurs anonymes ne sont pas autorisés à changer les mots de passe"
ger "Sie benutzen MariaDB als anonymer Benutzer und dürfen daher keine Passwörter ändern"
greek "Χρησιμοποιείτε την MariaDB σαν anonymous user και έτσι δεν μπορείτε να αλλάξετε τα passwords άλλων χρηστών"
hun "Nevtelen (anonymous) felhasznalokent nem negedelyezett a jelszovaltoztatas"
ita "Impossibile cambiare la password usando MariaDB come utente anonimo"
- jpn "MariaDB を anonymous users で使用している状態では、パスワードの変更はできません"
+ jpn "MySQL を匿名ユーザーで使用しているので、パスワードの変更はできません。"
kor "당신은 MariaDB서버에 익명의 사용자로 접속을 하셨습니다.익명의 사용자는 암호를 변경할 수 없습니다."
por "Você está usando o MariaDB como usuário anônimo e usuários anônimos não têm permissão para mudar senhas"
rum "Dumneavoastra folositi MariaDB ca un utilizator anonim si utilizatorii anonimi nu au voie sa schime parolele"
@@ -3090,18 +3022,17 @@ ER_PASSWORD_ANONYMOUS_USER 42000
swe "Du använder MariaDB som en anonym användare och som sådan får du inte ändra ditt lösenord"
ukr "Ви використовуєте MariaDB як анонімний користувач, тому вам не дозволено змінювати паролі"
ER_PASSWORD_NOT_ALLOWED 42000
- cze "Na zm-Běnu hesel ostatním musíte mít právo provést update tabulek v databázi mysql"
+ cze "Na změnu hesel ostatním musíte mít právo provést update tabulek v databázi mysql"
dan "Du skal have tilladelse til at opdatere tabeller i MariaDB databasen for at ændre andres adgangskoder"
nla "U moet tabel update priveleges hebben in de mysql database om wachtwoorden voor anderen te mogen wijzigen"
eng "You must have privileges to update tables in the mysql database to be able to change passwords for others"
- jps "他のユーザーのパスワードを変更するためには, mysql データベースに対して update の許可がなければなりません.",
est "Teiste paroolide muutmiseks on nõutav tabelite muutmisõigus 'mysql' andmebaasis"
fre "Vous devez avoir le privilège update sur les tables de la base de donnée mysql pour pouvoir changer les mots de passe des autres"
ger "Sie benötigen die Berechtigung zum Aktualisieren von Tabellen in der Datenbank 'mysql', um die Passwörter anderer Benutzer ändern zu können"
greek "Πρέπει να έχετε δικαίωμα διόρθωσης πινάκων (update) στη βάση δεδομένων mysql για να μπορείτε να αλλάξετε τα passwords άλλων χρηστών"
hun "Onnek tabla-update joggal kell rendelkeznie a mysql adatbazisban masok jelszavanak megvaltoztatasahoz"
ita "E` necessario il privilegio di update sulle tabelle del database mysql per cambiare le password per gli altri utenti"
- jpn "他のユーザーのパスワードを変更するためには, mysql データベースに対して update の許可がなければなりません."
+ jpn "他のユーザーのパスワードを変更するためには、mysqlデータベースの表を更新する権限が必要です。"
kor "당신은 다른사용자들의 암호를 변경할 수 있도록 데이타베이스 변경권한을 가져야 합니다."
por "Você deve ter privilégios para atualizar tabelas no banco de dados mysql para ser capaz de mudar a senha de outros"
rum "Trebuie sa aveti privilegii sa actualizati tabelele in bazele de date mysql ca sa puteti sa schimati parolele altora"
@@ -3111,7 +3042,7 @@ ER_PASSWORD_NOT_ALLOWED 42000
swe "För att ändra lösenord för andra måste du ha rättigheter att uppdatera mysql-databasen"
ukr "Ви повині мати право на оновлення таблиць у базі данних mysql, аби мати можливість змінювати пароль іншим"
ER_PASSWORD_NO_MATCH 42000
- cze "V tabulce user nen-Bí žádný odpovídající řádek"
+ cze "V tabulce user není žádný odpovídající řádek"
dan "Kan ikke finde nogen tilsvarende poster i bruger tabellen"
nla "Kan geen enkele passende rij vinden in de gebruikers tabel"
eng "Can't find any matching row in the user table"
@@ -3121,6 +3052,7 @@ ER_PASSWORD_NO_MATCH 42000
greek "Δεν είναι δυνατή η ανεύρεση της αντίστοιχης εγγραφής στον πίνακα των χρηστών"
hun "Nincs megegyezo sor a user tablaban"
ita "Impossibile trovare la riga corrispondente nella tabella user"
+ jpn "ユーザーテーブルに該当するレコードが見つかりません。"
kor "사용자 테이블에서 일치하는 것을 찾을 수 없읍니다."
por "Não pode encontrar nenhuma linha que combine na tabela usuário (user table)"
rum "Nu pot gasi nici o linie corespunzatoare in tabela utilizatorului"
@@ -3130,17 +3062,16 @@ ER_PASSWORD_NO_MATCH 42000
swe "Hittade inte användaren i 'user'-tabellen"
ukr "Не можу знайти відповідних записів у таблиці користувача"
ER_UPDATE_INFO
- cze "Nalezen-Bých řádků: %ld Změněno: %ld Varování: %ld"
+ cze "Nalezených řádků: %ld Změněno: %ld Varování: %ld"
dan "Poster fundet: %ld Ændret: %ld Advarsler: %ld"
nla "Passende rijen: %ld Gewijzigd: %ld Waarschuwingen: %ld"
eng "Rows matched: %ld Changed: %ld Warnings: %ld"
- jps "一致数(Rows matched): %ld 変更: %ld Warnings: %ld",
est "Sobinud kirjeid: %ld Muudetud: %ld Hoiatusi: %ld"
fre "Enregistrements correspondants: %ld Modifiés: %ld Warnings: %ld"
ger "Datensätze gefunden: %ld Geändert: %ld Warnungen: %ld"
hun "Megegyezo sorok szama: %ld Valtozott: %ld Warnings: %ld"
ita "Rows riconosciute: %ld Cambiate: %ld Warnings: %ld"
- jpn "一致数(Rows matched): %ld 変更: %ld Warnings: %ld"
+ jpn "該当した行: %ld 変更: %ld 警告: %ld"
kor "일치하는 Rows : %ld개 변경됨: %ld개 경고: %ld개"
por "Linhas que combinaram: %ld - Alteradas: %ld - Avisos: %ld"
rum "Linii identificate (matched): %ld Schimbate: %ld Atentionari (warnings): %ld"
@@ -3150,17 +3081,16 @@ ER_UPDATE_INFO
swe "Rader: %ld Uppdaterade: %ld Varningar: %ld"
ukr "Записів відповідає: %ld Змінено: %ld Застережень: %ld"
ER_CANT_CREATE_THREAD
- cze "Nemohu vytvo-Břit nový thread (errno %M). Pokud je ještě nějaká volná paměť, podívejte se do manuálu na část o chybách specifických pro jednotlivé operační systémy"
+ cze "Nemohu vytvořit nový thread (errno %M). Pokud je ještě nějaká volná paměť, podívejte se do manuálu na část o chybách specifických pro jednotlivé operační systémy"
dan "Kan ikke danne en ny tråd (fejl nr. %M). Hvis computeren ikke er løbet tør for hukommelse, kan du se i brugervejledningen for en mulig operativ-system - afhængig fejl"
nla "Kan geen nieuwe thread aanmaken (Errcode: %M). Indien er geen tekort aan geheugen is kunt u de handleiding consulteren over een mogelijke OS afhankelijke fout"
eng "Can't create a new thread (errno %M); if you are not out of available memory, you can consult the manual for a possible OS-dependent bug"
- jps "新規にスレッドが作れませんでした (errno %M). もし最大使用許可メモリー数を越えていないのにエラーが発生しているなら, マニュアルの中から 'possible OS-dependent bug' という文字を探してくみてださい.",
est "Ei suuda luua uut lõime (veakood %M). Kui mälu ei ole otsas, on tõenäoliselt tegemist operatsioonisüsteemispetsiifilise veaga"
fre "Impossible de créer une nouvelle tâche (errno %M). S'il reste de la mémoire libre, consultez le manual pour trouver un éventuel bug dépendant de l'OS"
ger "Kann keinen neuen Thread erzeugen (Fehler: %M). Sollte noch Speicher verfügbar sein, bitte im Handbuch wegen möglicher Fehler im Betriebssystem nachschlagen"
hun "Uj thread letrehozasa nem lehetseges (Hibakod: %M). Amenyiben van meg szabad memoria, olvassa el a kezikonyv operacios rendszerfuggo hibalehetosegekrol szolo reszet"
ita "Impossibile creare un nuovo thread (errno %M). Se non ci sono problemi di memoria disponibile puoi consultare il manuale per controllare possibili problemi dipendenti dal SO"
- jpn "新規にスレッドが作れませんでした (errno %M). もし最大使用許可メモリー数を越えていないのにエラーが発生しているなら, マニュアルの中から 'possible OS-dependent bug' という文字を探してくみてださい."
+ jpn "新規にスレッドを作成できません。(エラー番号 %M) もしも使用可能メモリーの不足でなければ、OS依存のバグである可能性があります。"
kor "새로운 쓰레드를 만들 수 없습니다.(에러번호 %M). 만약 여유메모리가 있다면 OS-dependent버그 의 메뉴얼 부분을 찾아보시오."
nor "Can't create a new thread (errno %M); if you are not out of available memory you can consult the manual for any possible OS dependent bug"
norwegian-ny "Can't create a new thread (errno %M); if you are not out of available memory you can consult the manual for any possible OS dependent bug"
@@ -3173,7 +3103,7 @@ ER_CANT_CREATE_THREAD
swe "Kan inte skapa en ny tråd (errno %M)"
ukr "Не можу створити нову гілку (помилка %M). Якщо ви не використали усю пам'ять, то прочитайте документацію до вашої ОС - можливо це помилка ОС"
ER_WRONG_VALUE_COUNT_ON_ROW 21S01
- cze "Po-Bčet sloupců neodpovídá počtu hodnot na řádku %lu"
+ cze "Počet sloupců neodpovídá počtu hodnot na řádku %lu"
dan "Kolonne antallet stemmer ikke overens med antallet af værdier i post %lu"
nla "Kolom aantal komt niet overeen met waarde aantal in rij %lu"
eng "Column count doesn't match value count at row %lu"
@@ -3181,6 +3111,7 @@ ER_WRONG_VALUE_COUNT_ON_ROW 21S01
ger "Anzahl der Felder stimmt nicht mit der Anzahl der Werte in Zeile %lu überein"
hun "Az oszlopban talalhato ertek nem egyezik meg a %lu sorban szamitott ertekkel"
ita "Il numero delle colonne non corrisponde al conteggio alla riga %lu"
+ jpn "%lu 行目で、列の数が値の数と一致しません。"
kor "Row %lu에서 칼럼 카운트와 value 카운터와 일치하지 않습니다."
por "Contagem de colunas não confere com a contagem de valores na linha %lu"
rum "Numarul de coloane nu corespunde cu numarul de valori la linia %lu"
@@ -3190,7 +3121,7 @@ ER_WRONG_VALUE_COUNT_ON_ROW 21S01
swe "Antalet kolumner motsvarar inte antalet värden på rad: %lu"
ukr "Кількість стовбців не співпадає з кількістю значень у строці %lu"
ER_CANT_REOPEN_TABLE
- cze "Nemohu znovuotev-Břít tabulku: '%-.192s"
+ cze "Nemohu znovuotevřít tabulku: '%-.192s"
dan "Kan ikke genåbne tabel '%-.192s"
nla "Kan tabel niet opnieuw openen: '%-.192s"
eng "Can't reopen table: '%-.192s'"
@@ -3199,6 +3130,7 @@ ER_CANT_REOPEN_TABLE
ger "Kann Tabelle'%-.192s' nicht erneut öffnen"
hun "Nem lehet ujra-megnyitni a tablat: '%-.192s"
ita "Impossibile riaprire la tabella: '%-.192s'"
+ jpn "表を再オープンできません。: '%-.192s'"
kor "테이블을 다시 열수 없군요: '%-.192s"
nor "Can't reopen table: '%-.192s"
norwegian-ny "Can't reopen table: '%-.192s"
@@ -3212,17 +3144,16 @@ ER_CANT_REOPEN_TABLE
swe "Kunde inte stänga och öppna tabell '%-.192s"
ukr "Не можу перевідкрити таблицю: '%-.192s'"
ER_INVALID_USE_OF_NULL 22004
- cze "Neplatn-Bé užití hodnoty NULL"
+ cze "Neplatné užití hodnoty NULL"
dan "Forkert brug af nulværdi (NULL)"
nla "Foutief gebruik van de NULL waarde"
eng "Invalid use of NULL value"
- jps "NULL 値の使用方法が不適切です",
est "NULL väärtuse väärkasutus"
fre "Utilisation incorrecte de la valeur NULL"
ger "Unerlaubte Verwendung eines NULL-Werts"
hun "A NULL ervenytelen hasznalata"
ita "Uso scorretto del valore NULL"
- jpn "NULL 値の使用方法が不適切です"
+ jpn "NULL 値の使用方法が不適切です。"
kor "NULL 값을 잘못 사용하셨군요..."
por "Uso inválido do valor NULL"
rum "Folosirea unei value NULL e invalida"
@@ -3232,7 +3163,7 @@ ER_INVALID_USE_OF_NULL 22004
swe "Felaktig använding av NULL"
ukr "Хибне використання значення NULL"
ER_REGEXP_ERROR 42000
- cze "Regul-Bární výraz vrátil chybu '%-.64s'"
+ cze "Regulární výraz vrátil chybu '%-.64s'"
dan "Fik fejl '%-.64s' fra regexp"
nla "Fout '%-.64s' ontvangen van regexp"
eng "Got error '%-.64s' from regexp"
@@ -3241,6 +3172,7 @@ ER_REGEXP_ERROR 42000
ger "regexp lieferte Fehler '%-.64s'"
hun "'%-.64s' hiba a regularis kifejezes hasznalata soran (regexp)"
ita "Errore '%-.64s' da regexp"
+ jpn "regexp がエラー '%-.64s' を返しました。"
kor "regexp에서 '%-.64s'가 났습니다."
por "Obteve erro '%-.64s' em regexp"
rum "Eroarea '%-.64s' obtinuta din expresia regulara (regexp)"
@@ -3250,7 +3182,7 @@ ER_REGEXP_ERROR 42000
swe "Fick fel '%-.64s' från REGEXP"
ukr "Отримано помилку '%-.64s' від регулярного виразу"
ER_MIX_OF_GROUP_FUNC_AND_FIELDS 42000
- cze "Pokud nen-Bí žádná GROUP BY klauzule, není dovoleno současné použití GROUP položek (MIN(),MAX(),COUNT()...) s ne GROUP položkami"
+ cze "Pokud není žádná GROUP BY klauzule, není dovoleno současné použití GROUP položek (MIN(),MAX(),COUNT()...) s ne GROUP položkami"
dan "Sammenblanding af GROUP kolonner (MIN(),MAX(),COUNT()...) uden GROUP kolonner er ikke tilladt, hvis der ikke er noget GROUP BY prædikat"
nla "Het mixen van GROUP kolommen (MIN(),MAX(),COUNT()...) met no-GROUP kolommen is foutief indien er geen GROUP BY clausule is"
eng "Mixing of GROUP columns (MIN(),MAX(),COUNT(),...) with no GROUP columns is illegal if there is no GROUP BY clause"
@@ -3259,6 +3191,7 @@ ER_MIX_OF_GROUP_FUNC_AND_FIELDS 42000
ger "Das Vermischen von GROUP-Feldern (MIN(),MAX(),COUNT()...) mit Nicht-GROUP-Feldern ist nicht zulässig, wenn keine GROUP-BY-Klausel vorhanden ist"
hun "A GROUP mezok (MIN(),MAX(),COUNT()...) kevert hasznalata nem lehetseges GROUP BY hivatkozas nelkul"
ita "Il mescolare funzioni di aggregazione (MIN(),MAX(),COUNT()...) e non e` illegale se non c'e` una clausula GROUP BY"
+ jpn "GROUP BY句が無い場合、集計関数(MIN(),MAX(),COUNT(),...)と通常の列を同時に使用できません。"
kor "Mixing of GROUP 칼럼s (MIN(),MAX(),COUNT(),...) with no GROUP 칼럼s is illegal if there is no GROUP BY clause"
por "Mistura de colunas agrupadas (com MIN(), MAX(), COUNT(), ...) com colunas não agrupadas é ilegal, se não existir uma cláusula de agrupamento (cláusula GROUP BY)"
rum "Amestecarea de coloane GROUP (MIN(),MAX(),COUNT()...) fara coloane GROUP este ilegala daca nu exista o clauza GROUP BY"
@@ -3268,17 +3201,16 @@ ER_MIX_OF_GROUP_FUNC_AND_FIELDS 42000
swe "Man får ha både GROUP-kolumner (MIN(),MAX(),COUNT()...) och fält i en fråga om man inte har en GROUP BY-del"
ukr "Змішування GROUP стовбців (MIN(),MAX(),COUNT()...) з не GROUP стовбцями є забороненим, якщо не має GROUP BY"
ER_NONEXISTING_GRANT 42000
- cze "Neexistuje odpov-Bídající grant pro uživatele '%-.48s' na stroji '%-.64s'"
+ cze "Neexistuje odpovídající grant pro uživatele '%-.48s' na stroji '%-.64s'"
dan "Denne tilladelse findes ikke for brugeren '%-.48s' på vært '%-.64s'"
nla "Deze toegang (GRANT) is niet toegekend voor gebruiker '%-.48s' op host '%-.64s'"
eng "There is no such grant defined for user '%-.48s' on host '%-.64s'"
- jps "ユーザー '%-.48s' (ホスト '%-.64s' のユーザー) は許可されていません",
est "Sellist õigust ei ole defineeritud kasutajale '%-.48s' masinast '%-.64s'"
fre "Un tel droit n'est pas défini pour l'utilisateur '%-.48s' sur l'hôte '%-.64s'"
ger "Für Benutzer '%-.48s' auf Host '%-.64s' gibt es keine solche Berechtigung"
hun "A '%-.48s' felhasznalonak nincs ilyen joga a '%-.64s' host-on"
ita "GRANT non definita per l'utente '%-.48s' dalla macchina '%-.64s'"
- jpn "ユーザー '%-.48s' (ホスト '%-.64s' のユーザー) は許可されていません"
+ jpn "ユーザー '%-.48s' (ホスト '%-.64s' 上) は許可されていません。"
kor "사용자 '%-.48s' (호스트 '%-.64s')를 위하여 정의된 그런 승인은 없습니다."
por "Não existe tal permissão (grant) definida para o usuário '%-.48s' no 'host' '%-.64s'"
rum "Nu exista un astfel de grant definit pentru utilzatorul '%-.48s' de pe host-ul '%-.64s'"
@@ -3288,17 +3220,16 @@ ER_NONEXISTING_GRANT 42000
swe "Det finns inget privilegium definierat för användare '%-.48s' på '%-.64s'"
ukr "Повноважень не визначено для користувача '%-.48s' з хосту '%-.64s'"
ER_TABLEACCESS_DENIED_ERROR 42000
- cze "%-.128s p-Bříkaz nepřístupný pro uživatele: '%-.48s'@'%-.64s' pro tabulku '%-.192s'"
+ cze "%-.128s příkaz nepřístupný pro uživatele: '%-.48s'@'%-.64s' pro tabulku '%-.192s'"
dan "%-.128s-kommandoen er ikke tilladt for brugeren '%-.48s'@'%-.64s' for tabellen '%-.192s'"
nla "%-.128s commando geweigerd voor gebruiker: '%-.48s'@'%-.64s' voor tabel '%-.192s'"
eng "%-.128s command denied to user '%-.48s'@'%-.64s' for table '%-.192s'"
- jps "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s' ,テーブル '%-.192s' に対して許可されていません",
est "%-.128s käsk ei ole lubatud kasutajale '%-.48s'@'%-.64s' tabelis '%-.192s'"
fre "La commande '%-.128s' est interdite à l'utilisateur: '%-.48s'@'@%-.64s' sur la table '%-.192s'"
ger "%-.128s Befehl nicht erlaubt für Benutzer '%-.48s'@'%-.64s' auf Tabelle '%-.192s'"
hun "%-.128s parancs a '%-.48s'@'%-.64s' felhasznalo szamara nem engedelyezett a '%-.192s' tablaban"
ita "Comando %-.128s negato per l'utente: '%-.48s'@'%-.64s' sulla tabella '%-.192s'"
- jpn "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s' ,テーブル '%-.192s' に対して許可されていません"
+ jpn "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s' の表 '%-.192s' の使用に関して許可されていません。"
kor "'%-.128s' 명령은 다음 사용자에게 거부되었습니다. : '%-.48s'@'%-.64s' for 테이블 '%-.192s'"
por "Comando '%-.128s' negado para o usuário '%-.48s'@'%-.64s' na tabela '%-.192s'"
rum "Comanda %-.128s interzisa utilizatorului: '%-.48s'@'%-.64s' pentru tabela '%-.192s'"
@@ -3308,17 +3239,16 @@ ER_TABLEACCESS_DENIED_ERROR 42000
swe "%-.128s ej tillåtet för '%-.48s'@'%-.64s' för tabell '%-.192s'"
ukr "%-.128s команда заборонена користувачу: '%-.48s'@'%-.64s' у таблиці '%-.192s'"
ER_COLUMNACCESS_DENIED_ERROR 42000
- cze "%-.128s p-Bříkaz nepřístupný pro uživatele: '%-.48s'@'%-.64s' pro sloupec '%-.192s' v tabulce '%-.192s'"
+ cze "%-.128s příkaz nepřístupný pro uživatele: '%-.48s'@'%-.64s' pro sloupec '%-.192s' v tabulce '%-.192s'"
dan "%-.128s-kommandoen er ikke tilladt for brugeren '%-.48s'@'%-.64s' for kolonne '%-.192s' in tabellen '%-.192s'"
nla "%-.128s commando geweigerd voor gebruiker: '%-.48s'@'%-.64s' voor kolom '%-.192s' in tabel '%-.192s'"
eng "%-.128s command denied to user '%-.48s'@'%-.64s' for column '%-.192s' in table '%-.192s'"
- jps "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s'¥n カラム '%-.192s' テーブル '%-.192s' に対して許可されていません",
est "%-.128s käsk ei ole lubatud kasutajale '%-.48s'@'%-.64s' tulbale '%-.192s' tabelis '%-.192s'"
fre "La commande '%-.128s' est interdite à l'utilisateur: '%-.48s'@'@%-.64s' sur la colonne '%-.192s' de la table '%-.192s'"
ger "%-.128s Befehl nicht erlaubt für Benutzer '%-.48s'@'%-.64s' und Feld '%-.192s' in Tabelle '%-.192s'"
hun "%-.128s parancs a '%-.48s'@'%-.64s' felhasznalo szamara nem engedelyezett a '%-.192s' mezo eseten a '%-.192s' tablaban"
ita "Comando %-.128s negato per l'utente: '%-.48s'@'%-.64s' sulla colonna '%-.192s' della tabella '%-.192s'"
- jpn "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s'\n カラム '%-.192s' テーブル '%-.192s' に対して許可されていません"
+ jpn "コマンド %-.128s は ユーザー '%-.48s'@'%-.64s'\n の列 '%-.192s'(表 '%-.192s') の利用に関して許可されていません。"
kor "'%-.128s' 명령은 다음 사용자에게 거부되었습니다. : '%-.48s'@'%-.64s' for 칼럼 '%-.192s' in 테이블 '%-.192s'"
por "Comando '%-.128s' negado para o usuário '%-.48s'@'%-.64s' na coluna '%-.192s', na tabela '%-.192s'"
rum "Comanda %-.128s interzisa utilizatorului: '%-.48s'@'%-.64s' pentru coloana '%-.192s' in tabela '%-.192s'"
@@ -3328,7 +3258,7 @@ ER_COLUMNACCESS_DENIED_ERROR 42000
swe "%-.128s ej tillåtet för '%-.48s'@'%-.64s' för kolumn '%-.192s' i tabell '%-.192s'"
ukr "%-.128s команда заборонена користувачу: '%-.48s'@'%-.64s' для стовбця '%-.192s' у таблиці '%-.192s'"
ER_ILLEGAL_GRANT_FOR_TABLE 42000
- cze "Neplatn-Bý příkaz GRANT/REVOKE. Prosím, přečtěte si v manuálu, jaká privilegia je možné použít."
+ cze "Neplatný příkaz GRANT/REVOKE. Prosím, přečtěte si v manuálu, jaká privilegia je možné použít."
dan "Forkert GRANT/REVOKE kommando. Se i brugervejledningen hvilke privilegier der kan specificeres."
nla "Foutief GRANT/REVOKE commando. Raadpleeg de handleiding welke priveleges gebruikt kunnen worden."
eng "Illegal GRANT/REVOKE command; please consult the manual to see which privileges can be used"
@@ -3338,7 +3268,7 @@ ER_ILLEGAL_GRANT_FOR_TABLE 42000
greek "Illegal GRANT/REVOKE command; please consult the manual to see which privileges can be used."
hun "Ervenytelen GRANT/REVOKE parancs. Kerem, nezze meg a kezikonyvben, milyen jogok lehetsegesek"
ita "Comando GRANT/REVOKE illegale. Prego consultare il manuale per sapere quali privilegi possono essere usati."
- jpn "Illegal GRANT/REVOKE command; please consult the manual to see which privleges can be used."
+ jpn "不正な GRANT/REVOKE コマンドです。どの権限で利用可能かはマニュアルを参照して下さい。"
kor "잘못된 GRANT/REVOKE 명령. 어떤 권리와 승인이 사용되어 질 수 있는지 메뉴얼을 보시오."
nor "Illegal GRANT/REVOKE command; please consult the manual to see which privleges can be used."
norwegian-ny "Illegal GRANT/REVOKE command; please consult the manual to see which privleges can be used."
@@ -3352,7 +3282,7 @@ ER_ILLEGAL_GRANT_FOR_TABLE 42000
swe "Felaktigt GRANT-privilegium använt"
ukr "Хибна GRANT/REVOKE команда; прочитайте документацію стосовно того, які права можна використовувати"
ER_GRANT_WRONG_HOST_OR_USER 42000
- cze "Argument p-Bříkazu GRANT uživatel nebo stroj je příliš dlouhý"
+ cze "Argument příkazu GRANT uživatel nebo stroj je příliš dlouhý"
dan "Værts- eller brugernavn for langt til GRANT"
nla "De host of gebruiker parameter voor GRANT is te lang"
eng "The host or user argument to GRANT is too long"
@@ -3361,6 +3291,7 @@ ER_GRANT_WRONG_HOST_OR_USER 42000
ger "Das Host- oder User-Argument für GRANT ist zu lang"
hun "A host vagy felhasznalo argumentuma tul hosszu a GRANT parancsban"
ita "L'argomento host o utente per la GRANT e` troppo lungo"
+ jpn "GRANTコマンドへの、ホスト名やユーザー名が長すぎます。"
kor "승인(GRANT)을 위하여 사용한 사용자나 호스트의 값들이 너무 깁니다."
por "Argumento de 'host' ou de usuário para o GRANT é longo demais"
rum "Argumentul host-ului sau utilizatorului pentru GRANT e prea lung"
@@ -3379,7 +3310,7 @@ ER_NO_SUCH_TABLE 42S02
ger "Tabelle '%-.192s.%-.192s' existiert nicht"
hun "A '%-.192s.%-.192s' tabla nem letezik"
ita "La tabella '%-.192s.%-.192s' non esiste"
- jpn "Table '%-.192s.%-.192s' doesn't exist"
+ jpn "表 '%-.192s.%-.192s' は存在しません。"
kor "테이블 '%-.192s.%-.192s' 는 존재하지 않습니다."
nor "Table '%-.192s.%-.192s' doesn't exist"
norwegian-ny "Table '%-.192s.%-.192s' doesn't exist"
@@ -3393,7 +3324,7 @@ ER_NO_SUCH_TABLE 42S02
swe "Det finns ingen tabell som heter '%-.192s.%-.192s'"
ukr "Таблиця '%-.192s.%-.192s' не існує"
ER_NONEXISTING_TABLE_GRANT 42000
- cze "Neexistuje odpov-Bídající grant pro uživatele '%-.48s' na stroji '%-.64s' pro tabulku '%-.192s'"
+ cze "Neexistuje odpovídající grant pro uživatele '%-.48s' na stroji '%-.64s' pro tabulku '%-.192s'"
dan "Denne tilladelse eksisterer ikke for brugeren '%-.48s' på vært '%-.64s' for tabellen '%-.192s'"
nla "Deze toegang (GRANT) is niet toegekend voor gebruiker '%-.48s' op host '%-.64s' op tabel '%-.192s'"
eng "There is no such grant defined for user '%-.48s' on host '%-.64s' on table '%-.192s'"
@@ -3402,6 +3333,7 @@ ER_NONEXISTING_TABLE_GRANT 42000
ger "Eine solche Berechtigung ist für User '%-.48s' auf Host '%-.64s' an Tabelle '%-.192s' nicht definiert"
hun "A '%-.48s' felhasznalo szamara a '%-.64s' host '%-.192s' tablajaban ez a parancs nem engedelyezett"
ita "GRANT non definita per l'utente '%-.48s' dalla macchina '%-.64s' sulla tabella '%-.192s'"
+ jpn "ユーザー '%-.48s' (ホスト '%-.64s' 上) の表 '%-.192s' への権限は定義されていません。"
kor "사용자 '%-.48s'(호스트 '%-.64s')는 테이블 '%-.192s'를 사용하기 위하여 정의된 승인은 없습니다. "
por "Não existe tal permissão (grant) definido para o usuário '%-.48s' no 'host' '%-.64s', na tabela '%-.192s'"
rum "Nu exista un astfel de privilegiu (grant) definit pentru utilizatorul '%-.48s' de pe host-ul '%-.64s' pentru tabela '%-.192s'"
@@ -3411,7 +3343,7 @@ ER_NONEXISTING_TABLE_GRANT 42000
swe "Det finns inget privilegium definierat för användare '%-.48s' på '%-.64s' för tabell '%-.192s'"
ukr "Повноважень не визначено для користувача '%-.48s' з хосту '%-.64s' для таблиці '%-.192s'"
ER_NOT_ALLOWED_COMMAND 42000
- cze "Pou-Bžitý příkaz není v této verzi MariaDB povolen"
+ cze "Použitý příkaz není v této verzi MySQL povolen"
dan "Den brugte kommando er ikke tilladt med denne udgave af MariaDB"
nla "Het used commando is niet toegestaan in deze MariaDB versie"
eng "The used command is not allowed with this MariaDB version"
@@ -3420,6 +3352,7 @@ ER_NOT_ALLOWED_COMMAND 42000
ger "Der verwendete Befehl ist in dieser MariaDB-Version nicht zulässig"
hun "A hasznalt parancs nem engedelyezett ebben a MariaDB verzioban"
ita "Il comando utilizzato non e` supportato in questa versione di MariaDB"
+ jpn "このMySQLバージョンでは利用できないコマンドです。"
kor "사용된 명령은 현재의 MariaDB 버젼에서는 이용되지 않습니다."
por "Comando usado não é permitido para esta versão do MariaDB"
rum "Comanda folosita nu este permisa pentru aceasta versiune de MariaDB"
@@ -3429,7 +3362,7 @@ ER_NOT_ALLOWED_COMMAND 42000
swe "Du kan inte använda detta kommando med denna MariaDB version"
ukr "Використовувана команда не дозволена у цій версії MariaDB"
ER_SYNTAX_ERROR 42000
- cze "Va-Bše syntaxe je nějaká divná"
+ cze "Vaše syntaxe je nějaká divná"
dan "Der er en fejl i SQL syntaksen"
nla "Er is iets fout in de gebruikte syntax"
eng "You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use"
@@ -3439,7 +3372,7 @@ ER_SYNTAX_ERROR 42000
greek "You have an error in your SQL syntax"
hun "Szintaktikai hiba"
ita "Errore di sintassi nella query SQL"
- jpn "Something is wrong in your syntax"
+ jpn "SQL構文エラーです。バージョンに対応するマニュアルを参照して正しい構文を確認してください。"
kor "SQL 구문에 오류가 있습니다."
nor "Something is wrong in your syntax"
norwegian-ny "Something is wrong in your syntax"
@@ -3453,7 +3386,7 @@ ER_SYNTAX_ERROR 42000
swe "Du har något fel i din syntax"
ukr "У вас помилка у синтаксисі SQL"
ER_DELAYED_CANT_CHANGE_LOCK
- cze "Zpo-Bžděný insert threadu nebyl schopen získat požadovaný zámek pro tabulku %-.192s"
+ cze "Zpožděný insert threadu nebyl schopen získat požadovaný zámek pro tabulku %-.192s"
dan "Forsinket indsættelse tråden (delayed insert thread) kunne ikke opnå lås på tabellen %-.192s"
nla "'Delayed insert' thread kon de aangevraagde 'lock' niet krijgen voor tabel %-.192s"
eng "Delayed insert thread couldn't get requested lock for table %-.192s"
@@ -3462,6 +3395,7 @@ ER_DELAYED_CANT_CHANGE_LOCK
ger "Verzögerter (DELAYED) Einfüge-Thread konnte die angeforderte Sperre für Tabelle '%-.192s' nicht erhalten"
hun "A kesleltetett beillesztes (delayed insert) thread nem kapott zatolast a %-.192s tablahoz"
ita "Il thread di inserimento ritardato non riesce ad ottenere il lock per la tabella %-.192s"
+ jpn "'Delayed insert'スレッドが表 '%-.192s' のロックを取得できませんでした。"
kor "지연된 insert 쓰레드가 테이블 %-.192s의 요구된 락킹을 처리할 수 없었습니다."
por "'Thread' de inserção retardada (atrasada) pois não conseguiu obter a trava solicitada para tabela '%-.192s'"
rum "Thread-ul pentru inserarea aminata nu a putut obtine lacatul (lock) pentru tabela %-.192s"
@@ -3471,7 +3405,7 @@ ER_DELAYED_CANT_CHANGE_LOCK
swe "DELAYED INSERT-tråden kunde inte låsa tabell '%-.192s'"
ukr "Гілка для INSERT DELAYED не може отримати блокування для таблиці %-.192s"
ER_TOO_MANY_DELAYED_THREADS
- cze "P-Bříliš mnoho zpožděných threadů"
+ cze "Příliš mnoho zpožděných threadů"
dan "For mange slettede tråde (threads) i brug"
nla "Te veel 'delayed' threads in gebruik"
eng "Too many delayed threads in use"
@@ -3480,6 +3414,7 @@ ER_TOO_MANY_DELAYED_THREADS
ger "Zu viele verzögerte (DELAYED) Threads in Verwendung"
hun "Tul sok kesletetett thread (delayed)"
ita "Troppi threads ritardati in uso"
+ jpn "'Delayed insert'スレッドが多すぎます。"
kor "너무 많은 지연 쓰레드를 사용하고 있습니다."
por "Excesso de 'threads' retardadas (atrasadas) em uso"
rum "Prea multe threaduri aminate care sint in uz"
@@ -3489,7 +3424,7 @@ ER_TOO_MANY_DELAYED_THREADS
swe "Det finns redan 'max_delayed_threads' trådar i använding"
ukr "Забагато затриманих гілок використовується"
ER_ABORTING_CONNECTION 08S01
- cze "Zru-Bšeno spojení %ld do databáze: '%-.192s' uživatel: '%-.48s' (%-.64s)"
+ cze "Zrušeno spojení %ld do databáze: '%-.192s' uživatel: '%-.48s' (%-.64s)"
dan "Afbrudt forbindelse %ld til database: '%-.192s' bruger: '%-.48s' (%-.64s)"
nla "Afgebroken verbinding %ld naar db: '%-.192s' gebruiker: '%-.48s' (%-.64s)"
eng "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
@@ -3498,7 +3433,7 @@ ER_ABORTING_CONNECTION 08S01
ger "Abbruch der Verbindung %ld zur Datenbank '%-.192s'. Benutzer: '%-.48s' (%-.64s)"
hun "Megszakitott kapcsolat %ld db: '%-.192s' adatbazishoz, felhasznalo: '%-.48s' (%-.64s)"
ita "Interrotta la connessione %ld al db: '%-.192s' utente: '%-.48s' (%-.64s)"
- jpn "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
+ jpn "接続 %ld が中断されました。データベース: '%-.192s' ユーザー: '%-.48s' (%-.64s)"
kor "데이타베이스 접속을 위한 연결 %ld가 중단됨 : '%-.192s' 사용자: '%-.48s' (%-.64s)"
nor "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
norwegian-ny "Aborted connection %ld to db: '%-.192s' user: '%-.48s' (%-.64s)"
@@ -3512,7 +3447,7 @@ ER_ABORTING_CONNECTION 08S01
swe "Avbröt länken för tråd %ld till db '%-.192s', användare '%-.48s' (%-.64s)"
ukr "Перервано з'єднання %ld до бази данних: '%-.192s' користувача: '%-.48s' (%-.64s)"
ER_NET_PACKET_TOO_LARGE 08S01
- cze "Zji-Bštěn příchozí packet delší než 'max_allowed_packet'"
+ cze "Zjištěn příchozí packet delší než 'max_allowed_packet'"
dan "Modtog en datapakke som var større end 'max_allowed_packet'"
nla "Groter pakket ontvangen dan 'max_allowed_packet'"
eng "Got a packet bigger than 'max_allowed_packet' bytes"
@@ -3521,6 +3456,7 @@ ER_NET_PACKET_TOO_LARGE 08S01
ger "Empfangenes Paket ist größer als 'max_allowed_packet' Bytes"
hun "A kapott csomag nagyobb, mint a maximalisan engedelyezett: 'max_allowed_packet'"
ita "Ricevuto un pacchetto piu` grande di 'max_allowed_packet'"
+ jpn "'max_allowed_packet'よりも大きなパケットを受信しました。"
kor "'max_allowed_packet'보다 더큰 패킷을 받았습니다."
por "Obteve um pacote maior do que a taxa máxima de pacotes definida (max_allowed_packet)"
rum "Un packet mai mare decit 'max_allowed_packet' a fost primit"
@@ -3530,7 +3466,7 @@ ER_NET_PACKET_TOO_LARGE 08S01
swe "Kommunkationspaketet är större än 'max_allowed_packet'"
ukr "Отримано пакет більший ніж max_allowed_packet"
ER_NET_READ_ERROR_FROM_PIPE 08S01
- cze "Zji-Bštěna chyba při čtení z roury spojení"
+ cze "Zjištěna chyba při čtení z roury spojení"
dan "Fik læsefejl fra forbindelse (connection pipe)"
nla "Kreeg leesfout van de verbindings pipe"
eng "Got a read error from the connection pipe"
@@ -3539,6 +3475,7 @@ ER_NET_READ_ERROR_FROM_PIPE 08S01
ger "Lese-Fehler bei einer Verbindungs-Pipe"
hun "Olvasasi hiba a kapcsolat soran"
ita "Rilevato un errore di lettura dalla pipe di connessione"
+ jpn "接続パイプの読み込みエラーです。"
kor "연결 파이프로부터 에러가 발생하였습니다."
por "Obteve um erro de leitura no 'pipe' da conexão"
rum "Eroare la citire din cauza lui 'connection pipe'"
@@ -3548,7 +3485,7 @@ ER_NET_READ_ERROR_FROM_PIPE 08S01
swe "Fick läsfel från klienten vid läsning från 'PIPE'"
ukr "Отримано помилку читання з комунікаційного каналу"
ER_NET_FCNTL_ERROR 08S01
- cze "Zji-Bštěna chyba fcntl()"
+ cze "Zjištěna chyba fcntl()"
dan "Fik fejlmeddelelse fra fcntl()"
nla "Kreeg fout van fcntl()"
eng "Got an error from fcntl()"
@@ -3557,6 +3494,7 @@ ER_NET_FCNTL_ERROR 08S01
ger "fcntl() lieferte einen Fehler"
hun "Hiba a fcntl() fuggvenyben"
ita "Rilevato un errore da fcntl()"
+ jpn "fcntl()がエラーを返しました。"
kor "fcntl() 함수로부터 에러가 발생하였습니다."
por "Obteve um erro em fcntl()"
rum "Eroare obtinuta de la fcntl()"
@@ -3566,7 +3504,7 @@ ER_NET_FCNTL_ERROR 08S01
swe "Fick fatalt fel från 'fcntl()'"
ukr "Отримано помилкку від fcntl()"
ER_NET_PACKETS_OUT_OF_ORDER 08S01
- cze "P-Bříchozí packety v chybném pořadí"
+ cze "Příchozí packety v chybném pořadí"
dan "Modtog ikke datapakker i korrekt rækkefølge"
nla "Pakketten in verkeerde volgorde ontvangen"
eng "Got packets out of order"
@@ -3575,6 +3513,7 @@ ER_NET_PACKETS_OUT_OF_ORDER 08S01
ger "Pakete nicht in der richtigen Reihenfolge empfangen"
hun "Helytelen sorrendben erkezett adatcsomagok"
ita "Ricevuti pacchetti non in ordine"
+ jpn "不正な順序のパケットを受信しました。"
kor "순서가 맞지않는 패킷을 받았습니다."
por "Obteve pacotes fora de ordem"
rum "Packets care nu sint ordonati au fost gasiti"
@@ -3584,7 +3523,7 @@ ER_NET_PACKETS_OUT_OF_ORDER 08S01
swe "Kommunikationspaketen kom i fel ordning"
ukr "Отримано пакети у неналежному порядку"
ER_NET_UNCOMPRESS_ERROR 08S01
- cze "Nemohu rozkomprimovat komunika-Bční packet"
+ cze "Nemohu rozkomprimovat komunikační packet"
dan "Kunne ikke dekomprimere kommunikations-pakke (communication packet)"
nla "Communicatiepakket kon niet worden gedecomprimeerd"
eng "Couldn't uncompress communication packet"
@@ -3593,6 +3532,7 @@ ER_NET_UNCOMPRESS_ERROR 08S01
ger "Kommunikationspaket lässt sich nicht entpacken"
hun "A kommunikacios adatcsomagok nem tomorithetok ki"
ita "Impossibile scompattare i pacchetti di comunicazione"
+ jpn "圧縮パケットの展開ができませんでした。"
kor "통신 패킷의 압축해제를 할 수 없었습니다."
por "Não conseguiu descomprimir pacote de comunicação"
rum "Nu s-a putut decompresa pachetul de comunicatie (communication packet)"
@@ -3602,7 +3542,7 @@ ER_NET_UNCOMPRESS_ERROR 08S01
swe "Kunde inte packa up kommunikationspaketet"
ukr "Не можу декомпресувати комунікаційний пакет"
ER_NET_READ_ERROR 08S01
- cze "Zji-Bštěna chyba při čtení komunikačního packetu"
+ cze "Zjištěna chyba při čtení komunikačního packetu"
dan "Fik fejlmeddelelse ved læsning af kommunikations-pakker (communication packets)"
nla "Fout bij het lezen van communicatiepakketten"
eng "Got an error reading communication packets"
@@ -3611,6 +3551,7 @@ ER_NET_READ_ERROR 08S01
ger "Fehler beim Lesen eines Kommunikationspakets"
hun "HIba a kommunikacios adatcsomagok olvasasa soran"
ita "Rilevato un errore ricevendo i pacchetti di comunicazione"
+ jpn "パケットの受信でエラーが発生しました。"
kor "통신 패킷을 읽는 중 오류가 발생하였습니다."
por "Obteve um erro na leitura de pacotes de comunicação"
rum "Eroare obtinuta citind pachetele de comunicatie (communication packets)"
@@ -3620,7 +3561,7 @@ ER_NET_READ_ERROR 08S01
swe "Fick ett fel vid läsning från klienten"
ukr "Отримано помилку читання комунікаційних пакетів"
ER_NET_READ_INTERRUPTED 08S01
- cze "Zji-Bštěn timeout při čtení komunikačního packetu"
+ cze "Zjištěn timeout při čtení komunikačního packetu"
dan "Timeout-fejl ved læsning af kommunukations-pakker (communication packets)"
nla "Timeout bij het lezen van communicatiepakketten"
eng "Got timeout reading communication packets"
@@ -3629,6 +3570,7 @@ ER_NET_READ_INTERRUPTED 08S01
ger "Zeitüberschreitung beim Lesen eines Kommunikationspakets"
hun "Idotullepes a kommunikacios adatcsomagok olvasasa soran"
ita "Rilevato un timeout ricevendo i pacchetti di comunicazione"
+ jpn "パケットの受信でタイムアウトが発生しました。"
kor "통신 패킷을 읽는 중 timeout이 발생하였습니다."
por "Obteve expiração de tempo (timeout) na leitura de pacotes de comunicação"
rum "Timeout obtinut citind pachetele de comunicatie (communication packets)"
@@ -3638,7 +3580,7 @@ ER_NET_READ_INTERRUPTED 08S01
swe "Fick 'timeout' vid läsning från klienten"
ukr "Отримано затримку читання комунікаційних пакетів"
ER_NET_ERROR_ON_WRITE 08S01
- cze "Zji-Bštěna chyba při zápisu komunikačního packetu"
+ cze "Zjištěna chyba při zápisu komunikačního packetu"
dan "Fik fejlmeddelelse ved skrivning af kommunukations-pakker (communication packets)"
nla "Fout bij het schrijven van communicatiepakketten"
eng "Got an error writing communication packets"
@@ -3647,6 +3589,7 @@ ER_NET_ERROR_ON_WRITE 08S01
ger "Fehler beim Schreiben eines Kommunikationspakets"
hun "Hiba a kommunikacios csomagok irasa soran"
ita "Rilevato un errore inviando i pacchetti di comunicazione"
+ jpn "パケットの送信でエラーが発生しました。"
kor "통신 패킷을 기록하는 중 오류가 발생하였습니다."
por "Obteve um erro na escrita de pacotes de comunicação"
rum "Eroare in scrierea pachetelor de comunicatie (communication packets)"
@@ -3656,7 +3599,7 @@ ER_NET_ERROR_ON_WRITE 08S01
swe "Fick ett fel vid skrivning till klienten"
ukr "Отримано помилку запису комунікаційних пакетів"
ER_NET_WRITE_INTERRUPTED 08S01
- cze "Zji-Bštěn timeout při zápisu komunikačního packetu"
+ cze "Zjištěn timeout při zápisu komunikačního packetu"
dan "Timeout-fejl ved skrivning af kommunukations-pakker (communication packets)"
nla "Timeout bij het schrijven van communicatiepakketten"
eng "Got timeout writing communication packets"
@@ -3665,6 +3608,7 @@ ER_NET_WRITE_INTERRUPTED 08S01
ger "Zeitüberschreitung beim Schreiben eines Kommunikationspakets"
hun "Idotullepes a kommunikacios csomagok irasa soran"
ita "Rilevato un timeout inviando i pacchetti di comunicazione"
+ jpn "パケットの送信でタイムアウトが発生しました。"
kor "통신 패팃을 기록하는 중 timeout이 발생하였습니다."
por "Obteve expiração de tempo ('timeout') na escrita de pacotes de comunicação"
rum "Timeout obtinut scriind pachetele de comunicatie (communication packets)"
@@ -3674,7 +3618,7 @@ ER_NET_WRITE_INTERRUPTED 08S01
swe "Fick 'timeout' vid skrivning till klienten"
ukr "Отримано затримку запису комунікаційних пакетів"
ER_TOO_LONG_STRING 42000
- cze "V-Býsledný řetězec je delší než 'max_allowed_packet'"
+ cze "Výsledný řetězec je delší než 'max_allowed_packet'"
dan "Strengen med resultater er større end 'max_allowed_packet'"
nla "Resultaat string is langer dan 'max_allowed_packet'"
eng "Result string is longer than 'max_allowed_packet' bytes"
@@ -3683,6 +3627,7 @@ ER_TOO_LONG_STRING 42000
ger "Ergebnis-String ist länger als 'max_allowed_packet' Bytes"
hun "Ez eredmeny sztring nagyobb, mint a lehetseges maximum: 'max_allowed_packet'"
ita "La stringa di risposta e` piu` lunga di 'max_allowed_packet'"
+ jpn "結果の文字列が 'max_allowed_packet' よりも大きいです。"
por "'String' resultante é mais longa do que 'max_allowed_packet'"
rum "Sirul rezultat este mai lung decit 'max_allowed_packet'"
rus "Результирующая строка больше, чем 'max_allowed_packet'"
@@ -3691,7 +3636,7 @@ ER_TOO_LONG_STRING 42000
swe "Resultatsträngen är längre än max_allowed_packet"
ukr "Строка результату довша ніж max_allowed_packet"
ER_TABLE_CANT_HANDLE_BLOB 42000
- cze "Typ pou-Bžité tabulky nepodporuje BLOB/TEXT sloupce"
+ cze "Typ použité tabulky nepodporuje BLOB/TEXT sloupce"
dan "Denne tabeltype understøtter ikke brug af BLOB og TEXT kolonner"
nla "Het gebruikte tabel type ondersteunt geen BLOB/TEXT kolommen"
eng "The used table type doesn't support BLOB/TEXT columns"
@@ -3700,6 +3645,7 @@ ER_TABLE_CANT_HANDLE_BLOB 42000
ger "Der verwendete Tabellentyp unterstützt keine BLOB- und TEXT-Felder"
hun "A hasznalt tabla tipus nem tamogatja a BLOB/TEXT mezoket"
ita "Il tipo di tabella usata non supporta colonne di tipo BLOB/TEXT"
+ jpn "指定されたストレージエンジンでは、BLOB/TEXT型の列を使用できません。"
por "Tipo de tabela usado não permite colunas BLOB/TEXT"
rum "Tipul de tabela folosit nu suporta coloane de tip BLOB/TEXT"
rus "Используемая таблица не поддерживает типы BLOB/TEXT"
@@ -3708,7 +3654,7 @@ ER_TABLE_CANT_HANDLE_BLOB 42000
swe "Den använda tabelltypen kan inte hantera BLOB/TEXT-kolumner"
ukr "Використаний тип таблиці не підтримує BLOB/TEXT стовбці"
ER_TABLE_CANT_HANDLE_AUTO_INCREMENT 42000
- cze "Typ pou-Bžité tabulky nepodporuje AUTO_INCREMENT sloupce"
+ cze "Typ použité tabulky nepodporuje AUTO_INCREMENT sloupce"
dan "Denne tabeltype understøtter ikke brug af AUTO_INCREMENT kolonner"
nla "Het gebruikte tabel type ondersteunt geen AUTO_INCREMENT kolommen"
eng "The used table type doesn't support AUTO_INCREMENT columns"
@@ -3716,6 +3662,7 @@ ER_TABLE_CANT_HANDLE_AUTO_INCREMENT 42000
fre "Ce type de table ne supporte pas les colonnes AUTO_INCREMENT"
ger "Der verwendete Tabellentyp unterstützt keine AUTO_INCREMENT-Felder"
hun "A hasznalt tabla tipus nem tamogatja az AUTO_INCREMENT tipusu mezoket"
+ jpn "指定されたストレージエンジンでは、AUTO_INCREMENT列を使用できません。"
ita "Il tipo di tabella usata non supporta colonne di tipo AUTO_INCREMENT"
por "Tipo de tabela usado não permite colunas AUTO_INCREMENT"
rum "Tipul de tabela folosit nu suporta coloane de tip AUTO_INCREMENT"
@@ -3725,7 +3672,7 @@ ER_TABLE_CANT_HANDLE_AUTO_INCREMENT 42000
swe "Den använda tabelltypen kan inte hantera AUTO_INCREMENT-kolumner"
ukr "Використаний тип таблиці не підтримує AUTO_INCREMENT стовбці"
ER_DELAYED_INSERT_TABLE_LOCKED
- cze "INSERT DELAYED nen-Bí možno s tabulkou '%-.192s' použít, protože je zamčená pomocí LOCK TABLES"
+ cze "INSERT DELAYED není možno s tabulkou '%-.192s' použít, protože je zamčená pomocí LOCK TABLES"
dan "INSERT DELAYED kan ikke bruges med tabellen '%-.192s', fordi tabellen er låst med LOCK TABLES"
nla "INSERT DELAYED kan niet worden gebruikt bij table '%-.192s', vanwege een 'lock met LOCK TABLES"
eng "INSERT DELAYED can't be used with table '%-.192s' because it is locked with LOCK TABLES"
@@ -3735,7 +3682,7 @@ ER_DELAYED_INSERT_TABLE_LOCKED
greek "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
hun "Az INSERT DELAYED nem hasznalhato a '%-.192s' tablahoz, mert a tabla zarolt (LOCK TABLES)"
ita "L'inserimento ritardato (INSERT DELAYED) non puo` essere usato con la tabella '%-.192s', perche` soggetta a lock da 'LOCK TABLES'"
- jpn "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
+ jpn "表 '%-.192s' はLOCK TABLESでロックされているため、INSERT DELAYEDを使用できません。"
kor "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
nor "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
norwegian-ny "INSERT DELAYED can't be used with table '%-.192s', because it is locked with LOCK TABLES"
@@ -3749,7 +3696,7 @@ ER_DELAYED_INSERT_TABLE_LOCKED
swe "INSERT DELAYED kan inte användas med tabell '%-.192s', emedan den är låst med LOCK TABLES"
ukr "INSERT DELAYED не може бути використано з таблицею '%-.192s', тому що її заблоковано з LOCK TABLES"
ER_WRONG_COLUMN_NAME 42000
- cze "Nespr-Bávné jméno sloupce '%-.100s'"
+ cze "Nesprávné jméno sloupce '%-.100s'"
dan "Forkert kolonnenavn '%-.100s'"
nla "Incorrecte kolom naam '%-.100s'"
eng "Incorrect column name '%-.100s'"
@@ -3758,6 +3705,7 @@ ER_WRONG_COLUMN_NAME 42000
ger "Falscher Spaltenname '%-.100s'"
hun "Ervenytelen mezonev: '%-.100s'"
ita "Nome colonna '%-.100s' non corretto"
+ jpn "列名 '%-.100s' は不正です。"
por "Nome de coluna '%-.100s' incorreto"
rum "Nume increct de coloana '%-.100s'"
rus "Неверное имя столбца '%-.100s'"
@@ -3766,7 +3714,7 @@ ER_WRONG_COLUMN_NAME 42000
swe "Felaktigt kolumnnamn '%-.100s'"
ukr "Невірне ім'я стовбця '%-.100s'"
ER_WRONG_KEY_COLUMN 42000
- cze "Handler pou-Bžité tabulky neumí indexovat sloupce '%-.192s'"
+ cze "Handler použité tabulky neumí indexovat sloupce '%-.192s'"
dan "Den brugte tabeltype kan ikke indeksere kolonnen '%-.192s'"
nla "De gebruikte tabel 'handler' kan kolom '%-.192s' niet indexeren"
eng "The used storage engine can't index column '%-.192s'"
@@ -3776,7 +3724,7 @@ ER_WRONG_KEY_COLUMN 42000
greek "The used table handler can't index column '%-.192s'"
hun "A hasznalt tablakezelo nem tudja a '%-.192s' mezot indexelni"
ita "Il gestore delle tabelle non puo` indicizzare la colonna '%-.192s'"
- jpn "The used table handler can't index column '%-.192s'"
+ jpn "使用のストレージエンジンは列 '%-.192s' の索引を作成できません。"
kor "The used table handler can't index column '%-.192s'"
nor "The used table handler can't index column '%-.192s'"
norwegian-ny "The used table handler can't index column '%-.192s'"
@@ -3790,7 +3738,7 @@ ER_WRONG_KEY_COLUMN 42000
swe "Den använda tabelltypen kan inte indexera kolumn '%-.192s'"
ukr "Використаний вказівник таблиці не може індексувати стовбець '%-.192s'"
ER_WRONG_MRG_TABLE
- cze "V-Bšechny tabulky v MERGE tabulce nejsou definovány stejně"
+ cze "Všechny tabulky v MERGE tabulce nejsou definovány stejně"
dan "Tabellerne i MERGE er ikke defineret ens"
nla "Niet alle tabellen in de MERGE tabel hebben identieke gedefinities"
eng "Unable to open underlying table which is differently defined or of non-MyISAM type or doesn't exist"
@@ -3799,7 +3747,7 @@ ER_WRONG_MRG_TABLE
ger "Nicht alle Tabellen in der MERGE-Tabelle sind gleich definiert"
hun "A MERGE tablaban talalhato tablak definicioja nem azonos"
ita "Non tutte le tabelle nella tabella di MERGE sono definite in maniera identica"
- jpn "All tables in the MERGE table are not defined identically"
+ jpn "MERGE表の構成表がオープンできません。列定義が異なるか、MyISAM表ではないか、存在しません。"
kor "All tables in the MERGE table are not defined identically"
nor "All tables in the MERGE table are not defined identically"
norwegian-ny "All tables in the MERGE table are not defined identically"
@@ -3813,7 +3761,7 @@ ER_WRONG_MRG_TABLE
swe "Tabellerna i MERGE-tabellen är inte identiskt definierade"
ukr "Таблиці у MERGE TABLE мають різну структуру"
ER_DUP_UNIQUE 23000
- cze "Kv-Bůli unique constraintu nemozu zapsat do tabulky '%-.192s'"
+ cze "Kvůli unique constraintu nemozu zapsat do tabulky '%-.192s'"
dan "Kan ikke skrive til tabellen '%-.192s' fordi det vil bryde CONSTRAINT regler"
nla "Kan niet opslaan naar table '%-.192s' vanwege 'unique' beperking"
eng "Can't write, because of unique constraint, to table '%-.192s'"
@@ -3821,6 +3769,7 @@ ER_DUP_UNIQUE 23000
fre "Écriture impossible à cause d'un index UNIQUE sur la table '%-.192s'"
ger "Schreiben in Tabelle '%-.192s' nicht möglich wegen einer Eindeutigkeitsbeschränkung (unique constraint)"
hun "A '%-.192s' nem irhato, az egyedi mezok miatt"
+ jpn "一意性制約違反のため、表 '%-.192s' に書き込めません。"
ita "Impossibile scrivere nella tabella '%-.192s' per limitazione di unicita`"
por "Não pode gravar, devido à restrição UNIQUE, na tabela '%-.192s'"
rum "Nu pot scrie pe hard-drive, din cauza constraintului unic (unique constraint) pentru tabela '%-.192s'"
@@ -3830,7 +3779,7 @@ ER_DUP_UNIQUE 23000
swe "Kan inte skriva till tabell '%-.192s'; UNIQUE-test"
ukr "Не можу записати до таблиці '%-.192s', з причини вимог унікальності"
ER_BLOB_KEY_WITHOUT_LENGTH 42000
- cze "BLOB sloupec '%-.192s' je pou-Bžit ve specifikaci klíče bez délky"
+ cze "BLOB sloupec '%-.192s' je použit ve specifikaci klíče bez délky"
dan "BLOB kolonnen '%-.192s' brugt i nøglespecifikation uden nøglelængde"
nla "BLOB kolom '%-.192s' gebruikt in zoeksleutel specificatie zonder zoeksleutel lengte"
eng "BLOB/TEXT column '%-.192s' used in key specification without a key length"
@@ -3840,7 +3789,7 @@ ER_BLOB_KEY_WITHOUT_LENGTH 42000
greek "BLOB column '%-.192s' used in key specification without a key length"
hun "BLOB mezo '%-.192s' hasznalt a mezo specifikacioban, a mezohossz megadasa nelkul"
ita "La colonna '%-.192s' di tipo BLOB e` usata in una chiave senza specificarne la lunghezza"
- jpn "BLOB column '%-.192s' used in key specification without a key length"
+ jpn "BLOB列 '%-.192s' をキーに使用するには長さ指定が必要です。"
kor "BLOB column '%-.192s' used in key specification without a key length"
nor "BLOB column '%-.192s' used in key specification without a key length"
norwegian-ny "BLOB column '%-.192s' used in key specification without a key length"
@@ -3854,7 +3803,7 @@ ER_BLOB_KEY_WITHOUT_LENGTH 42000
swe "Du har inte angett någon nyckellängd för BLOB '%-.192s'"
ukr "Стовбець BLOB '%-.192s' використано у визначенні ключа без вказання довжини ключа"
ER_PRIMARY_CANT_HAVE_NULL 42000
- cze "V-Bšechny části primárního klíče musejí být NOT NULL; pokud potřebujete NULL, použijte UNIQUE"
+ cze "Všechny části primárního klíče musejí být NOT NULL; pokud potřebujete NULL, použijte UNIQUE"
dan "Alle dele af en PRIMARY KEY skal være NOT NULL; Hvis du skal bruge NULL i nøglen, brug UNIQUE istedet"
nla "Alle delen van een PRIMARY KEY moeten NOT NULL zijn; Indien u NULL in een zoeksleutel nodig heeft kunt u UNIQUE gebruiken"
eng "All parts of a PRIMARY KEY must be NOT NULL; if you need NULL in a key, use UNIQUE instead"
@@ -3863,6 +3812,7 @@ ER_PRIMARY_CANT_HAVE_NULL 42000
ger "Alle Teile eines PRIMARY KEY müssen als NOT NULL definiert sein. Wenn NULL in einem Schlüssel benötigt wird, muss ein UNIQUE-Schlüssel verwendet werden"
hun "Az elsodleges kulcs teljes egeszeben csak NOT NULL tipusu lehet; Ha NULL mezot szeretne a kulcskent, hasznalja inkabb a UNIQUE-ot"
ita "Tutte le parti di una chiave primaria devono essere dichiarate NOT NULL; se necessitano valori NULL nelle chiavi utilizzare UNIQUE"
+ jpn "PRIMARY KEYの列は全てNOT NULLでなければいけません。UNIQUE索引であればNULLを含むことが可能です。"
por "Todas as partes de uma chave primária devem ser não-nulas. Se você precisou usar um valor nulo (NULL) em uma chave, use a cláusula UNIQUE em seu lugar"
rum "Toate partile unei chei primare (PRIMARY KEY) trebuie sa fie NOT NULL; Daca aveti nevoie de NULL in vreo cheie, folositi UNIQUE in schimb"
rus "Все части первичного ключа (PRIMARY KEY) должны быть определены как NOT NULL; Если вам нужна поддержка величин NULL в ключе, воспользуйтесь индексом UNIQUE"
@@ -3871,7 +3821,7 @@ ER_PRIMARY_CANT_HAVE_NULL 42000
swe "Alla delar av en PRIMARY KEY måste vara NOT NULL; Om du vill ha en nyckel med NULL, använd UNIQUE istället"
ukr "Усі частини PRIMARY KEY повинні бути NOT NULL; Якщо ви потребуєте NULL у ключі, скористайтеся UNIQUE"
ER_TOO_MANY_ROWS 42000
- cze "V-Býsledek obsahuje více než jeden řádek"
+ cze "Výsledek obsahuje více než jeden řádek"
dan "Resultatet bestod af mere end een række"
nla "Resultaat bevatte meer dan een rij"
eng "Result consisted of more than one row"
@@ -3880,6 +3830,7 @@ ER_TOO_MANY_ROWS 42000
ger "Ergebnis besteht aus mehr als einer Zeile"
hun "Az eredmeny tobb, mint egy sort tartalmaz"
ita "Il risultato consiste di piu` di una riga"
+ jpn "結果が2行以上です。"
por "O resultado consistiu em mais do que uma linha"
rum "Resultatul constista din mai multe linii"
rus "В результате возвращена более чем одна строка"
@@ -3888,7 +3839,7 @@ ER_TOO_MANY_ROWS 42000
swe "Resultet bestod av mera än en rad"
ukr "Результат знаходиться у більше ніж одній строці"
ER_REQUIRES_PRIMARY_KEY 42000
- cze "Tento typ tabulky vy-Bžaduje primární klíč"
+ cze "Tento typ tabulky vyžaduje primární klíč"
dan "Denne tabeltype kræver en primærnøgle"
nla "Dit tabel type heeft een primaire zoeksleutel nodig"
eng "This table type requires a primary key"
@@ -3897,6 +3848,7 @@ ER_REQUIRES_PRIMARY_KEY 42000
ger "Dieser Tabellentyp benötigt einen Primärschlüssel (PRIMARY KEY)"
hun "Az adott tablatipushoz elsodleges kulcs hasznalata kotelezo"
ita "Questo tipo di tabella richiede una chiave primaria"
+ jpn "使用のストレージエンジンでは、PRIMARY KEYが必要です。"
por "Este tipo de tabela requer uma chave primária"
rum "Aceast tip de tabela are nevoie de o cheie primara"
rus "Этот тип таблицы требует определения первичного ключа"
@@ -3905,7 +3857,7 @@ ER_REQUIRES_PRIMARY_KEY 42000
swe "Denna tabelltyp kräver en PRIMARY KEY"
ukr "Цей тип таблиці потребує первинного ключа"
ER_NO_RAID_COMPILED
- cze "Tato verze MariaDB nen-Bí zkompilována s podporou RAID"
+ cze "Tato verze MySQL není zkompilována s podporou RAID"
dan "Denne udgave af MariaDB er ikke oversat med understøttelse af RAID"
nla "Deze versie van MariaDB is niet gecompileerd met RAID ondersteuning"
eng "This version of MariaDB is not compiled with RAID support"
@@ -3914,6 +3866,7 @@ ER_NO_RAID_COMPILED
ger "Diese MariaDB-Version ist nicht mit RAID-Unterstützung kompiliert"
hun "Ezen leforditott MariaDB verzio nem tartalmaz RAID support-ot"
ita "Questa versione di MYSQL non e` compilata con il supporto RAID"
+ jpn "このバージョンのMySQLはRAIDサポートを含めてコンパイルされていません。"
por "Esta versão do MariaDB não foi compilada com suporte a RAID"
rum "Aceasta versiune de MariaDB, nu a fost compilata cu suport pentru RAID"
rus "Эта версия MariaDB скомпилирована без поддержки RAID"
@@ -3922,7 +3875,7 @@ ER_NO_RAID_COMPILED
swe "Denna version av MariaDB är inte kompilerad med RAID"
ukr "Ця версія MariaDB не зкомпільована з підтримкою RAID"
ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE
- cze "Update tabulky bez WHERE s kl-Bíčem není v módu bezpečných update dovoleno"
+ cze "Update tabulky bez WHERE s klíčem není v módu bezpečných update dovoleno"
dan "Du bruger sikker opdaterings modus ('safe update mode') og du forsøgte at opdatere en tabel uden en WHERE klausul, der gør brug af et KEY felt"
nla "U gebruikt 'safe update mode' en u probeerde een tabel te updaten zonder een WHERE met een KEY kolom"
eng "You are using safe update mode and you tried to update a table without a WHERE that uses a KEY column"
@@ -3931,6 +3884,7 @@ ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE
ger "MariaDB läuft im sicheren Aktualisierungsmodus (safe update mode). Sie haben versucht, eine Tabelle zu aktualisieren, ohne in der WHERE-Klausel ein KEY-Feld anzugeben"
hun "On a biztonsagos update modot hasznalja, es WHERE that uses a KEY column"
ita "In modalita` 'safe update' si e` cercato di aggiornare una tabella senza clausola WHERE su una chiave"
+ jpn "'safe update mode'で、索引を利用するWHERE句の無い更新処理を実行しようとしました。"
por "Você está usando modo de atualização seguro e tentou atualizar uma tabela sem uma cláusula WHERE que use uma coluna chave"
rus "Вы работаете в режиме безопасных обновлений (safe update mode) и попробовали изменить таблицу без использования ключевого столбца в части WHERE"
serbian "Vi koristite safe update mod servera, a probali ste da promenite podatke bez 'WHERE' komande koja koristi kolonu ključa"
@@ -3938,7 +3892,7 @@ ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE
swe "Du använder 'säker uppdateringsmod' och försökte uppdatera en tabell utan en WHERE-sats som använder sig av en nyckel"
ukr "Ви у режимі безпечного оновлення та намагаєтесь оновити таблицю без оператора WHERE, що використовує KEY стовбець"
ER_KEY_DOES_NOT_EXITS 42000 S1009
- cze "Kl-Bíč '%-.192s' v tabulce '%-.192s' neexistuje"
+ cze "Klíč '%-.192s' v tabulce '%-.192s' neexistuje"
dan "Nøglen '%-.192s' eksisterer ikke i tabellen '%-.192s'"
nla "Zoeksleutel '%-.192s' bestaat niet in tabel '%-.192s'"
eng "Key '%-.192s' doesn't exist in table '%-.192s'"
@@ -3947,6 +3901,7 @@ ER_KEY_DOES_NOT_EXITS 42000 S1009
ger "Schlüssel '%-.192s' existiert in der Tabelle '%-.192s' nicht"
hun "A '%-.192s' kulcs nem letezik a '%-.192s' tablaban"
ita "La chiave '%-.192s' non esiste nella tabella '%-.192s'"
+ jpn "索引 '%-.192s' は表 '%-.192s' には存在しません。"
por "Chave '%-.192s' não existe na tabela '%-.192s'"
rus "Ключ '%-.192s' не существует в таблице '%-.192s'"
serbian "Ključ '%-.192s' ne postoji u tabeli '%-.192s'"
@@ -3954,7 +3909,7 @@ ER_KEY_DOES_NOT_EXITS 42000 S1009
swe "Nyckel '%-.192s' finns inte in tabell '%-.192s'"
ukr "Ключ '%-.192s' не існує в таблиці '%-.192s'"
ER_CHECK_NO_SUCH_TABLE 42000
- cze "Nemohu otev-Břít tabulku"
+ cze "Nemohu otevřít tabulku"
dan "Kan ikke åbne tabellen"
nla "Kan tabel niet openen"
eng "Can't open table"
@@ -3963,6 +3918,7 @@ ER_CHECK_NO_SUCH_TABLE 42000
ger "Kann Tabelle nicht öffnen"
hun "Nem tudom megnyitni a tablat"
ita "Impossibile aprire la tabella"
+ jpn "表をオープンできません。"
por "Não pode abrir a tabela"
rus "Невозможно открыть таблицу"
serbian "Ne mogu da otvorim tabelu"
@@ -3980,7 +3936,7 @@ ER_CHECK_NOT_IMPLEMENTED 42000
greek "The handler for the table doesn't support %s"
hun "A tabla kezeloje (handler) nem tamogatja az %s"
ita "Il gestore per la tabella non supporta il %s"
- jpn "The handler for the table doesn't support %s"
+ jpn "この表のストレージエンジンは '%s' を利用できません。"
kor "The handler for the table doesn't support %s"
nor "The handler for the table doesn't support %s"
norwegian-ny "The handler for the table doesn't support %s"
@@ -3994,7 +3950,7 @@ ER_CHECK_NOT_IMPLEMENTED 42000
swe "Tabellhanteraren för denna tabell kan inte göra %s"
ukr "Вказівник таблиці не підтримуе %s"
ER_CANT_DO_THIS_DURING_AN_TRANSACTION 25000
- cze "Proveden-Bí tohoto příkazu není v transakci dovoleno"
+ cze "Provedení tohoto příkazu není v transakci dovoleno"
dan "Du må ikke bruge denne kommando i en transaktion"
nla "Het is u niet toegestaan dit commando uit te voeren binnen een transactie"
eng "You are not allowed to execute this command in a transaction"
@@ -4003,6 +3959,7 @@ ER_CANT_DO_THIS_DURING_AN_TRANSACTION 25000
ger "Sie dürfen diesen Befehl nicht in einer Transaktion ausführen"
hun "Az On szamara nem engedelyezett a parancs vegrehajtasa a tranzakcioban"
ita "Non puoi eseguire questo comando in una transazione"
+ jpn "このコマンドはトランザクション内で実行できません。"
por "Não lhe é permitido executar este comando em uma transação"
rus "Вам не разрешено выполнять эту команду в транзакции"
serbian "Nije Vam dozvoljeno da izvršite ovu komandu u transakciji"
@@ -4010,7 +3967,7 @@ ER_CANT_DO_THIS_DURING_AN_TRANSACTION 25000
swe "Du får inte utföra detta kommando i en transaktion"
ukr "Вам не дозволено виконувати цю команду в транзакції"
ER_ERROR_DURING_COMMIT
- cze "Chyba %M p-Bři COMMIT"
+ cze "Chyba %M při COMMIT"
dan "Modtog fejl %M mens kommandoen COMMIT blev udført"
nla "Kreeg fout %M tijdens COMMIT"
eng "Got error %M during COMMIT"
@@ -4019,6 +3976,7 @@ ER_ERROR_DURING_COMMIT
ger "Fehler %M beim COMMIT"
hun "%M hiba a COMMIT vegrehajtasa soran"
ita "Rilevato l'errore %M durante il COMMIT"
+ jpn "COMMIT中にエラー %M が発生しました。"
por "Obteve erro %M durante COMMIT"
rus "Получена ошибка %M в процессе COMMIT"
serbian "Greška %M za vreme izvršavanja komande 'COMMIT'"
@@ -4026,7 +3984,7 @@ ER_ERROR_DURING_COMMIT
swe "Fick fel %M vid COMMIT"
ukr "Отримано помилку %M під час COMMIT"
ER_ERROR_DURING_ROLLBACK
- cze "Chyba %M p-Bři ROLLBACK"
+ cze "Chyba %M při ROLLBACK"
dan "Modtog fejl %M mens kommandoen ROLLBACK blev udført"
nla "Kreeg fout %M tijdens ROLLBACK"
eng "Got error %M during ROLLBACK"
@@ -4035,6 +3993,7 @@ ER_ERROR_DURING_ROLLBACK
ger "Fehler %M beim ROLLBACK"
hun "%M hiba a ROLLBACK vegrehajtasa soran"
ita "Rilevato l'errore %M durante il ROLLBACK"
+ jpn "ROLLBACK中にエラー %M が発生しました。"
por "Obteve erro %M durante ROLLBACK"
rus "Получена ошибка %M в процессе ROLLBACK"
serbian "Greška %M za vreme izvršavanja komande 'ROLLBACK'"
@@ -4042,7 +4001,7 @@ ER_ERROR_DURING_ROLLBACK
swe "Fick fel %M vid ROLLBACK"
ukr "Отримано помилку %M під час ROLLBACK"
ER_ERROR_DURING_FLUSH_LOGS
- cze "Chyba %M p-Bři FLUSH_LOGS"
+ cze "Chyba %M při FLUSH_LOGS"
dan "Modtog fejl %M mens kommandoen FLUSH_LOGS blev udført"
nla "Kreeg fout %M tijdens FLUSH_LOGS"
eng "Got error %M during FLUSH_LOGS"
@@ -4051,6 +4010,7 @@ ER_ERROR_DURING_FLUSH_LOGS
ger "Fehler %M bei FLUSH_LOGS"
hun "%M hiba a FLUSH_LOGS vegrehajtasa soran"
ita "Rilevato l'errore %M durante il FLUSH_LOGS"
+ jpn "FLUSH_LOGS中にエラー %M が発生しました。"
por "Obteve erro %M durante FLUSH_LOGS"
rus "Получена ошибка %M в процессе FLUSH_LOGS"
serbian "Greška %M za vreme izvršavanja komande 'FLUSH_LOGS'"
@@ -4058,7 +4018,7 @@ ER_ERROR_DURING_FLUSH_LOGS
swe "Fick fel %M vid FLUSH_LOGS"
ukr "Отримано помилку %M під час FLUSH_LOGS"
ER_ERROR_DURING_CHECKPOINT
- cze "Chyba %M p-Bři CHECKPOINT"
+ cze "Chyba %M při CHECKPOINT"
dan "Modtog fejl %M mens kommandoen CHECKPOINT blev udført"
nla "Kreeg fout %M tijdens CHECKPOINT"
eng "Got error %M during CHECKPOINT"
@@ -4067,6 +4027,7 @@ ER_ERROR_DURING_CHECKPOINT
ger "Fehler %M bei CHECKPOINT"
hun "%M hiba a CHECKPOINT vegrehajtasa soran"
ita "Rilevato l'errore %M durante il CHECKPOINT"
+ jpn "CHECKPOINT中にエラー %M が発生しました。"
por "Obteve erro %M durante CHECKPOINT"
rus "Получена ошибка %M в процессе CHECKPOINT"
serbian "Greška %M za vreme izvršavanja komande 'CHECKPOINT'"
@@ -4074,7 +4035,7 @@ ER_ERROR_DURING_CHECKPOINT
swe "Fick fel %M vid CHECKPOINT"
ukr "Отримано помилку %M під час CHECKPOINT"
ER_NEW_ABORTING_CONNECTION 08S01
- cze "Spojen-Bí %ld do databáze: '%-.192s' uživatel: '%-.48s' stroj: '%-.64s' (%-.64s) bylo přerušeno"
+ cze "Spojení %ld do databáze: '%-.192s' uživatel: '%-.48s' stroj: '%-.64s' (%-.64s) bylo přerušeno"
dan "Afbrød forbindelsen %ld til databasen '%-.192s' bruger: '%-.48s' vært: '%-.64s' (%-.64s)"
nla "Afgebroken verbinding %ld naar db: '%-.192s' gebruiker: '%-.48s' host: '%-.64s' (%-.64s)"
eng "Aborted connection %ld to db: '%-.192s' user: '%-.48s' host: '%-.64s' (%-.64s)"
@@ -4082,6 +4043,7 @@ ER_NEW_ABORTING_CONNECTION 08S01
fre "Connection %ld avortée vers la bd: '%-.192s' utilisateur: '%-.48s' hôte: '%-.64s' (%-.64s)"
ger "Abbruch der Verbindung %ld zur Datenbank '%-.192s'. Benutzer: '%-.48s', Host: '%-.64s' (%-.64s)"
ita "Interrotta la connessione %ld al db: ''%-.192s' utente: '%-.48s' host: '%-.64s' (%-.64s)"
+ jpn "接続 %ld が中断されました。データベース: '%-.192s' ユーザー: '%-.48s' ホスト: '%-.64s' (%-.64s)"
por "Conexão %ld abortada para banco de dados '%-.192s' - usuário '%-.48s' - 'host' '%-.64s' ('%-.64s')"
rus "Прервано соединение %ld к базе данных '%-.192s' пользователя '%-.48s' с хоста '%-.64s' (%-.64s)"
serbian "Prekinuta konekcija broj %ld ka bazi: '%-.192s' korisnik je bio: '%-.48s' a host: '%-.64s' (%-.64s)"
@@ -4089,14 +4051,14 @@ ER_NEW_ABORTING_CONNECTION 08S01
swe "Avbröt länken för tråd %ld till db '%-.192s', användare '%-.48s', host '%-.64s' (%-.64s)"
ukr "Перервано з'єднання %ld до бази данних: '%-.192s' користувач: '%-.48s' хост: '%-.64s' (%-.64s)"
ER_DUMP_NOT_IMPLEMENTED
- cze "Handler tabulky nepodporuje bin-Bární dump"
+ cze "Handler tabulky nepodporuje binární dump"
dan "Denne tabeltype unserstøtter ikke binært tabeldump"
nla "De 'handler' voor de tabel ondersteund geen binaire tabel dump"
eng "The storage engine for the table does not support binary table dump"
fre "Ce type de table ne supporte pas les copies binaires"
ger "Die Speicher-Engine für die Tabelle unterstützt keinen binären Tabellen-Dump"
ita "Il gestore per la tabella non supporta il dump binario"
- jpn "The handler for the table does not support binary table dump"
+ jpn "この表のストレージエンジンはバイナリ形式の表ダンプを利用できません。"
por "O manipulador de tabela não suporta 'dump' binário de tabela"
rum "The handler for the table does not support binary table dump"
rus "Обработчик этой таблицы не поддерживает двоичного сохранения образа таблицы (dump)"
@@ -4107,12 +4069,13 @@ ER_DUMP_NOT_IMPLEMENTED
ER_FLUSH_MASTER_BINLOG_CLOSED
eng "Binlog closed, cannot RESET MASTER"
ger "Binlog geschlossen. Kann RESET MASTER nicht ausführen"
+ jpn "バイナリログがクローズされています。RESET MASTER を実行できません。"
por "Binlog fechado. Não pode fazer RESET MASTER"
rus "Двоичный журнал обновления закрыт, невозможно выполнить RESET MASTER"
serbian "Binarni log file zatvoren, ne mogu da izvršim komandu 'RESET MASTER'"
ukr "Реплікаційний лог закрито, не можу виконати RESET MASTER"
ER_INDEX_REBUILD
- cze "P-Břebudování indexu dumpnuté tabulky '%-.192s' nebylo úspěšné"
+ cze "Přebudování indexu dumpnuté tabulky '%-.192s' nebylo úspěšné"
dan "Kunne ikke genopbygge indekset for den dumpede tabel '%-.192s'"
nla "Gefaald tijdens heropbouw index van gedumpte tabel '%-.192s'"
eng "Failed rebuilding the index of dumped table '%-.192s'"
@@ -4121,6 +4084,7 @@ ER_INDEX_REBUILD
greek "Failed rebuilding the index of dumped table '%-.192s'"
hun "Failed rebuilding the index of dumped table '%-.192s'"
ita "Fallita la ricostruzione dell'indice della tabella copiata '%-.192s'"
+ jpn "ダンプ表 '%-.192s' の索引再構築に失敗しました。"
por "Falhou na reconstrução do índice da tabela 'dumped' '%-.192s'"
rus "Ошибка перестройки индекса сохраненной таблицы '%-.192s'"
serbian "Izgradnja indeksa dump-ovane tabele '%-.192s' nije uspela"
@@ -4134,20 +4098,22 @@ ER_MASTER
fre "Erreur reçue du maître: '%-.64s'"
ger "Fehler vom Master: '%-.64s'"
ita "Errore dal master: '%-.64s"
+ jpn "マスターでエラーが発生: '%-.64s'"
por "Erro no 'master' '%-.64s'"
rus "Ошибка от головного сервера: '%-.64s'"
serbian "Greška iz glavnog servera '%-.64s' u klasteru"
spa "Error del master: '%-.64s'"
- swe "Fick en master: '%-.64s'"
+ swe "Fel från master: '%-.64s'"
ukr "Помилка від головного: '%-.64s'"
ER_MASTER_NET_READ 08S01
- cze "S-Bíťová chyba při čtení z masteru"
+ cze "Síťová chyba při čtení z masteru"
dan "Netværksfejl ved læsning fra master"
nla "Net fout tijdens lezen van master"
eng "Net error reading from master"
fre "Erreur de lecture réseau reçue du maître"
ger "Netzfehler beim Lesen vom Master"
ita "Errore di rete durante la ricezione dal master"
+ jpn "マスターからのデータ受信中のネットワークエラー"
por "Erro de rede lendo do 'master'"
rus "Возникла ошибка чтения в процессе коммуникации с головным сервером"
serbian "Greška u primanju mrežnih paketa sa glavnog servera u klasteru"
@@ -4155,13 +4121,14 @@ ER_MASTER_NET_READ 08S01
swe "Fick nätverksfel vid läsning från master"
ukr "Мережева помилка читання від головного"
ER_MASTER_NET_WRITE 08S01
- cze "S-Bíťová chyba při zápisu na master"
+ cze "Síťová chyba při zápisu na master"
dan "Netværksfejl ved skrivning til master"
nla "Net fout tijdens schrijven naar master"
eng "Net error writing to master"
fre "Erreur d'écriture réseau reçue du maître"
ger "Netzfehler beim Schreiben zum Master"
ita "Errore di rete durante l'invio al master"
+ jpn "マスターへのデータ送信中のネットワークエラー"
por "Erro de rede gravando no 'master'"
rus "Возникла ошибка записи в процессе коммуникации с головным сервером"
serbian "Greška u slanju mrežnih paketa na glavni server u klasteru"
@@ -4169,7 +4136,7 @@ ER_MASTER_NET_WRITE 08S01
swe "Fick nätverksfel vid skrivning till master"
ukr "Мережева помилка запису до головного"
ER_FT_MATCHING_KEY_NOT_FOUND
- cze "-BŽádný sloupec nemá vytvořen fulltextový index"
+ cze "Žádný sloupec nemá vytvořen fulltextový index"
dan "Kan ikke finde en FULLTEXT nøgle som svarer til kolonne listen"
nla "Kan geen FULLTEXT index vinden passend bij de kolom lijst"
eng "Can't find FULLTEXT index matching the column list"
@@ -4177,6 +4144,7 @@ ER_FT_MATCHING_KEY_NOT_FOUND
fre "Impossible de trouver un index FULLTEXT correspondant à cette liste de colonnes"
ger "Kann keinen FULLTEXT-Index finden, der der Feldliste entspricht"
ita "Impossibile trovare un indice FULLTEXT che corrisponda all'elenco delle colonne"
+ jpn "列リストに対応する全文索引(FULLTEXT)が見つかりません。"
por "Não pode encontrar um índice para o texto todo que combine com a lista de colunas"
rus "Невозможно отыскать полнотекстовый (FULLTEXT) индекс, соответствующий списку столбцов"
serbian "Ne mogu da pronađem 'FULLTEXT' indeks koli odgovara listi kolona"
@@ -4184,7 +4152,7 @@ ER_FT_MATCHING_KEY_NOT_FOUND
swe "Hittar inte ett FULLTEXT-index i kolumnlistan"
ukr "Не можу знайти FULLTEXT індекс, що відповідає переліку стовбців"
ER_LOCK_OR_ACTIVE_TRANSACTION
- cze "Nemohu prov-Bést zadaný příkaz, protože existují aktivní zamčené tabulky nebo aktivní transakce"
+ cze "Nemohu provést zadaný příkaz, protože existují aktivní zamčené tabulky nebo aktivní transakce"
dan "Kan ikke udføre den givne kommando fordi der findes aktive, låste tabeller eller fordi der udføres en transaktion"
nla "Kan het gegeven commando niet uitvoeren, want u heeft actieve gelockte tabellen of een actieve transactie"
eng "Can't execute the given command because you have active locked tables or an active transaction"
@@ -4192,6 +4160,7 @@ ER_LOCK_OR_ACTIVE_TRANSACTION
fre "Impossible d'exécuter la commande car vous avez des tables verrouillées ou une transaction active"
ger "Kann den angegebenen Befehl wegen einer aktiven Tabellensperre oder einer aktiven Transaktion nicht ausführen"
ita "Impossibile eseguire il comando richiesto: tabelle sotto lock o transazione in atto"
+ jpn "すでにアクティブな表ロックやトランザクションがあるため、コマンドを実行できません。"
por "Não pode executar o comando dado porque você tem tabelas ativas travadas ou uma transação ativa"
rus "Невозможно выполнить указанную команду, поскольку у вас присутствуют активно заблокированные таблица или открытая транзакция"
serbian "Ne mogu da izvršim datu komandu zbog toga što su tabele zaključane ili je transakcija u toku"
@@ -4199,7 +4168,7 @@ ER_LOCK_OR_ACTIVE_TRANSACTION
swe "Kan inte utföra kommandot emedan du har en låst tabell eller an aktiv transaktion"
ukr "Не можу виконати подану команду тому, що таблиця заблокована або виконується транзакція"
ER_UNKNOWN_SYSTEM_VARIABLE
- cze "Nezn-Bámá systémová proměnná '%-.64s'"
+ cze "Neznámá systémová proměnná '%-.64s'"
dan "Ukendt systemvariabel '%-.64s'"
nla "Onbekende systeem variabele '%-.64s'"
eng "Unknown system variable '%-.64s'"
@@ -4207,6 +4176,7 @@ ER_UNKNOWN_SYSTEM_VARIABLE
fre "Variable système '%-.64s' inconnue"
ger "Unbekannte Systemvariable '%-.64s'"
ita "Variabile di sistema '%-.64s' sconosciuta"
+ jpn "'%-.64s' は不明なシステム変数です。"
por "Variável de sistema '%-.64s' desconhecida"
rus "Неизвестная системная переменная '%-.64s'"
serbian "Nepoznata sistemska promenljiva '%-.64s'"
@@ -4214,7 +4184,7 @@ ER_UNKNOWN_SYSTEM_VARIABLE
swe "Okänd systemvariabel: '%-.64s'"
ukr "Невідома системна змінна '%-.64s'"
ER_CRASHED_ON_USAGE
- cze "Tabulka '%-.192s' je ozna-Bčena jako porušená a měla by být opravena"
+ cze "Tabulka '%-.192s' je označena jako porušená a měla by být opravena"
dan "Tabellen '%-.192s' er markeret med fejl og bør repareres"
nla "Tabel '%-.192s' staat als gecrashed gemarkeerd en dient te worden gerepareerd"
eng "Table '%-.192s' is marked as crashed and should be repaired"
@@ -4222,6 +4192,7 @@ ER_CRASHED_ON_USAGE
fre "La table '%-.192s' est marquée 'crashed' et devrait être réparée"
ger "Tabelle '%-.192s' ist als defekt markiert und sollte repariert werden"
ita "La tabella '%-.192s' e` segnalata come corrotta e deve essere riparata"
+ jpn "表 '%-.192s' は壊れています。修復が必要です。"
por "Tabela '%-.192s' está marcada como danificada e deve ser reparada"
rus "Таблица '%-.192s' помечена как испорченная и должна пройти проверку и ремонт"
serbian "Tabela '%-.192s' je markirana kao oštećena i trebala bi biti popravljena"
@@ -4229,7 +4200,7 @@ ER_CRASHED_ON_USAGE
swe "Tabell '%-.192s' är trasig och bör repareras med REPAIR TABLE"
ukr "Таблицю '%-.192s' марковано як зіпсовану та її потрібно відновити"
ER_CRASHED_ON_REPAIR
- cze "Tabulka '%-.192s' je ozna-Bčena jako porušená a poslední (automatická?) oprava se nezdařila"
+ cze "Tabulka '%-.192s' je označena jako porušená a poslední (automatická?) oprava se nezdařila"
dan "Tabellen '%-.192s' er markeret med fejl og sidste (automatiske?) REPAIR fejlede"
nla "Tabel '%-.192s' staat als gecrashed gemarkeerd en de laatste (automatische?) reparatie poging mislukte"
eng "Table '%-.192s' is marked as crashed and last (automatic?) repair failed"
@@ -4237,6 +4208,7 @@ ER_CRASHED_ON_REPAIR
fre "La table '%-.192s' est marquée 'crashed' et le dernier 'repair' a échoué"
ger "Tabelle '%-.192s' ist als defekt markiert und der letzte (automatische?) Reparaturversuch schlug fehl"
ita "La tabella '%-.192s' e` segnalata come corrotta e l'ultima ricostruzione (automatica?) e` fallita"
+ jpn "表 '%-.192s' は壊れています。修復(自動?)にも失敗しています。"
por "Tabela '%-.192s' está marcada como danificada e a última reparação (automática?) falhou"
rus "Таблица '%-.192s' помечена как испорченная и последний (автоматический?) ремонт не был успешным"
serbian "Tabela '%-.192s' je markirana kao oštećena, a zadnja (automatska?) popravka je bila neuspela"
@@ -4251,6 +4223,7 @@ ER_WARNING_NOT_COMPLETE_ROLLBACK
fre "Attention: certaines tables ne supportant pas les transactions ont été changées et elles ne pourront pas être restituées"
ger "Änderungen an einigen nicht transaktionalen Tabellen konnten nicht zurückgerollt werden"
ita "Attenzione: Alcune delle modifiche alle tabelle non transazionali non possono essere ripristinate (roll back impossibile)"
+ jpn "トランザクション対応ではない表への変更はロールバックされません。"
por "Aviso: Algumas tabelas não-transacionais alteradas não puderam ser reconstituídas (rolled back)"
rus "Внимание: по некоторым измененным нетранзакционным таблицам невозможно будет произвести откат транзакции"
serbian "Upozorenje: Neke izmenjene tabele ne podržavaju komandu 'ROLLBACK'"
@@ -4265,6 +4238,7 @@ ER_TRANS_CACHE_FULL
fre "Cette transaction à commandes multiples nécessite plus de 'max_binlog_cache_size' octets de stockage, augmentez cette variable de mysqld et réessayez"
ger "Transaktionen, die aus mehreren Befehlen bestehen, benötigten mehr als 'max_binlog_cache_size' Bytes an Speicher. Btte vergrössern Sie diese Server-Variable versuchen Sie es noch einmal"
ita "La transazione a comandi multipli (multi-statement) ha richiesto piu` di 'max_binlog_cache_size' bytes di disco: aumentare questa variabile di mysqld e riprovare"
+ jpn "複数ステートメントから成るトランザクションが 'max_binlog_cache_size' 以上の容量を必要としました。このシステム変数を増加して、再試行してください。"
por "Transações multi-declaradas (multi-statement transactions) requeriram mais do que o valor limite (max_binlog_cache_size) de bytes para armazenagem. Aumente o valor desta variável do mysqld e tente novamente"
rus "Транзакции, включающей большое количество команд, потребовалось более чем 'max_binlog_cache_size' байт. Увеличьте эту переменную сервера mysqld и попробуйте еще раз"
spa "Multipla transición necesita mas que 'max_binlog_cache_size' bytes de almacenamiento. Aumente esta variable mysqld y tente de nuevo"
@@ -4290,6 +4264,7 @@ ER_SLAVE_NOT_RUNNING
fre "Cette opération nécessite un esclave actif, configurez les esclaves et faites START SLAVE"
ger "Diese Operation benötigt einen aktiven Slave. Bitte Slave konfigurieren und mittels START SLAVE aktivieren"
ita "Questa operaione richiede un database 'slave', configurarlo ed eseguire START SLAVE"
+ jpn "この処理は、稼働中のスレーブでなければ実行できません。スレーブの設定をしてSTART SLAVEコマンドを実行してください。"
por "Esta operação requer um 'slave' em execução. Configure o 'slave' e execute START SLAVE"
rus "Для этой операции требуется работающий подчиненный сервер. Сначала выполните START SLAVE"
serbian "Ova operacija zahteva da je aktivan podređeni server. Konfigurišite prvo podređeni server i onda izvršite komandu 'START SLAVE'"
@@ -4303,6 +4278,7 @@ ER_BAD_SLAVE
fre "Le server n'est pas configuré comme un esclave, changez le fichier de configuration ou utilisez CHANGE MASTER TO"
ger "Der Server ist nicht als Slave konfiguriert. Bitte in der Konfigurationsdatei oder mittels CHANGE MASTER TO beheben"
ita "Il server non e' configurato come 'slave', correggere il file di configurazione cambiando CHANGE MASTER TO"
+ jpn "このサーバーはスレーブとして設定されていません。コンフィグファイルかCHANGE MASTER TOコマンドで設定して下さい。"
por "O servidor não está configurado como 'slave'. Acerte o arquivo de configuração ou use CHANGE MASTER TO"
rus "Этот сервер не настроен как подчиненный. Внесите исправления в конфигурационном файле или с помощью CHANGE MASTER TO"
serbian "Server nije konfigurisan kao podređeni server, ispravite konfiguracioni file ili na njemu izvršite komandu 'CHANGE MASTER TO'"
@@ -4313,15 +4289,17 @@ ER_MASTER_INFO
eng "Could not initialize master info structure for '%.*s'; more error messages can be found in the MariaDB error log"
fre "Impossible d'initialiser les structures d'information de maître '%.*s', vous trouverez des messages d'erreur supplémentaires dans le journal des erreurs de MariaDB"
ger "Konnte Master-Info-Struktur '%.*s' nicht initialisieren. Weitere Fehlermeldungen können im MariaDB-Error-Log eingesehen werden"
+ jpn "'master info '%.*s''構造体の初期化ができませんでした。MariaDBエラーログでエラーメッセージを確認してください。"
serbian "Nisam mogao da inicijalizujem informacionu strukturu glavnog servera, proverite da li imam privilegije potrebne za pristup file-u 'master.info' '%.*s'"
swe "Kunde inte initialisera replikationsstrukturerna för '%.*s'. See MariaDB fel fil för mera information"
-ER_SLAVE_THREAD
+ER_SLAVE_THREAD
dan "Kunne ikke danne en slave-tråd; check systemressourcerne"
nla "Kon slave thread niet aanmaken, controleer systeem resources"
eng "Could not create slave thread; check system resources"
fre "Impossible de créer une tâche esclave, vérifiez les ressources système"
ger "Konnte Slave-Thread nicht starten. Bitte System-Ressourcen überprüfen"
ita "Impossibile creare il thread 'slave', controllare le risorse di sistema"
+ jpn "スレーブスレッドを作成できません。システムリソースを確認してください。"
por "Não conseguiu criar 'thread' de 'slave'. Verifique os recursos do sistema"
rus "Невозможно создать поток подчиненного сервера. Проверьте системные ресурсы"
serbian "Nisam mogao da startujem thread za podređeni server, proverite sistemske resurse"
@@ -4336,6 +4314,7 @@ ER_TOO_MANY_USER_CONNECTIONS 42000
fre "L'utilisateur %-.64s possède déjà plus de 'max_user_connections' connexions actives"
ger "Benutzer '%-.64s' hat mehr als 'max_user_connections' aktive Verbindungen"
ita "L'utente %-.64s ha gia' piu' di 'max_user_connections' connessioni attive"
+ jpn "ユーザー '%-.64s' はすでに 'max_user_connections' 以上のアクティブな接続を行っています。"
por "Usuário '%-.64s' já possui mais que o valor máximo de conexões (max_user_connections) ativas"
rus "У пользователя %-.64s уже больше чем 'max_user_connections' активных соединений"
serbian "Korisnik %-.64s već ima više aktivnih konekcija nego što je to određeno 'max_user_connections' promenljivom"
@@ -4350,6 +4329,7 @@ ER_SET_CONSTANTS_ONLY
fre "Seules les expressions constantes sont autorisées avec SET"
ger "Bei diesem Befehl dürfen nur konstante Ausdrücke verwendet werden"
ita "Si possono usare solo espressioni costanti con SET"
+ jpn "SET処理が失敗しました。"
por "Você pode usar apenas expressões constantes com SET"
rus "С этой командой вы можете использовать только константные выражения"
serbian "Možete upotrebiti samo konstantan iskaz sa komandom 'SET'"
@@ -4364,6 +4344,7 @@ ER_LOCK_WAIT_TIMEOUT
fre "Timeout sur l'obtention du verrou"
ger "Beim Warten auf eine Sperre wurde die zulässige Wartezeit überschritten. Bitte versuchen Sie, die Transaktion neu zu starten"
ita "E' scaduto il timeout per l'attesa del lock"
+ jpn "ロック待ちがタイムアウトしました。トランザクションを再試行してください。"
por "Tempo de espera (timeout) de travamento excedido. Tente reiniciar a transação."
rus "Таймаут ожидания блокировки истек; попробуйте перезапустить транзакцию"
serbian "Vremenski limit za zaključavanje tabele je istekao; Probajte da ponovo startujete transakciju"
@@ -4378,6 +4359,7 @@ ER_LOCK_TABLE_FULL
fre "Le nombre total de verrou dépasse la taille de la table des verrous"
ger "Die Gesamtzahl der Sperren überschreitet die Größe der Sperrtabelle"
ita "Il numero totale di lock e' maggiore della grandezza della tabella di lock"
+ jpn "ロックの数が多すぎます。"
por "O número total de travamentos excede o tamanho da tabela de travamentos"
rus "Общее количество блокировок превысило размеры таблицы блокировок"
serbian "Broj totalnih zaključavanja tabele premašuje veličinu tabele zaključavanja"
@@ -4392,6 +4374,7 @@ ER_READ_ONLY_TRANSACTION 25000
fre "Un verrou en update ne peut être acquit pendant une transaction READ UNCOMMITTED"
ger "Während einer READ-UNCOMMITTED-Transaktion können keine UPDATE-Sperren angefordert werden"
ita "I lock di aggiornamento non possono essere acquisiti durante una transazione 'READ UNCOMMITTED'"
+ jpn "読み込み専用トランザクションです。"
por "Travamentos de atualização não podem ser obtidos durante uma transação de tipo READ UNCOMMITTED"
rus "Блокировки обновлений нельзя получить в процессе чтения не принятой (в режиме READ UNCOMMITTED) транзакции"
serbian "Zaključavanja izmena ne mogu biti realizovana sve dok traje 'READ UNCOMMITTED' transakcija"
@@ -4406,6 +4389,7 @@ ER_DROP_DB_WITH_READ_LOCK
fre "DROP DATABASE n'est pas autorisée pendant qu'une tâche possède un verrou global en lecture"
ger "DROP DATABASE ist nicht erlaubt, solange der Thread eine globale Lesesperre hält"
ita "DROP DATABASE non e' permesso mentre il thread ha un lock globale di lettura"
+ jpn "グローバルリードロックを保持している間は、DROP DATABASE を実行できません。"
por "DROP DATABASE não permitido enquanto uma 'thread' está mantendo um travamento global de leitura"
rus "Не допускается DROP DATABASE, пока поток держит глобальную блокировку чтения"
serbian "Komanda 'DROP DATABASE' nije dozvoljena dok thread globalno zaključava čitanje podataka"
@@ -4420,6 +4404,7 @@ ER_CREATE_DB_WITH_READ_LOCK
fre "CREATE DATABASE n'est pas autorisée pendant qu'une tâche possède un verrou global en lecture"
ger "CREATE DATABASE ist nicht erlaubt, solange der Thread eine globale Lesesperre hält"
ita "CREATE DATABASE non e' permesso mentre il thread ha un lock globale di lettura"
+ jpn "グローバルリードロックを保持している間は、CREATE DATABASE を実行できません。"
por "CREATE DATABASE não permitido enquanto uma 'thread' está mantendo um travamento global de leitura"
rus "Не допускается CREATE DATABASE, пока поток держит глобальную блокировку чтения"
serbian "Komanda 'CREATE DATABASE' nije dozvoljena dok thread globalno zaključava čitanje podataka"
@@ -4433,6 +4418,7 @@ ER_WRONG_ARGUMENTS
fre "Mauvais arguments à %s"
ger "Falsche Argumente für %s"
ita "Argomenti errati a %s"
+ jpn "%s の引数が不正です"
por "Argumentos errados para %s"
rus "Неверные параметры для %s"
serbian "Pogrešni argumenti prosleđeni na %s"
@@ -4446,6 +4432,7 @@ ER_NO_PERMISSION_TO_CREATE_USER 42000
fre "'%-.48s'@'%-.64s' n'est pas autorisé à créer de nouveaux utilisateurs"
ger "'%-.48s'@'%-.64s' ist nicht berechtigt, neue Benutzer hinzuzufügen"
ita "A '%-.48s'@'%-.64s' non e' permesso creare nuovi utenti"
+ jpn "'%-.48s'@'%-.64s' は新しいユーザーを作成できません。"
por "Não é permitido a '%-.48s'@'%-.64s' criar novos usuários"
rus "'%-.48s'@'%-.64s' не разрешается создавать новых пользователей"
serbian "Korisniku '%-.48s'@'%-.64s' nije dozvoljeno da kreira nove korisnike"
@@ -4459,6 +4446,7 @@ ER_UNION_TABLES_IN_DIFFERENT_DIR
fre "Définition de table incorrecte; toutes les tables MERGE doivent être dans la même base de donnée"
ger "Falsche Tabellendefinition. Alle MERGE-Tabellen müssen sich in derselben Datenbank befinden"
ita "Definizione della tabella errata; tutte le tabelle di tipo MERGE devono essere nello stesso database"
+ jpn "不正な表定義です。MERGE表の構成表はすべて同じデータベース内になければなりません。"
por "Definição incorreta da tabela. Todas as tabelas contidas na junção devem estar no mesmo banco de dados."
rus "Неверное определение таблицы; Все таблицы в MERGE должны принадлежать одной и той же базе данных"
serbian "Pogrešna definicija tabele; sve 'MERGE' tabele moraju biti u istoj bazi podataka"
@@ -4471,6 +4459,7 @@ ER_LOCK_DEADLOCK 40001
fre "Deadlock découvert en essayant d'obtenir les verrous : essayez de redémarrer la transaction"
ger "Beim Versuch, eine Sperre anzufordern, ist ein Deadlock aufgetreten. Versuchen Sie, die Transaktion neu zu starten"
ita "Trovato deadlock durante il lock; Provare a far ripartire la transazione"
+ jpn "ロック取得中にデッドロックが検出されました。トランザクションを再試行してください。"
por "Encontrado um travamento fatal (deadlock) quando tentava obter uma trava. Tente reiniciar a transação."
rus "Возникла тупиковая ситуация в процессе получения блокировки; Попробуйте перезапустить транзакцию"
serbian "Unakrsno zaključavanje pronađeno kada sam pokušao da dobijem pravo na zaključavanje; Probajte da restartujete transakciju"
@@ -4483,6 +4472,7 @@ ER_TABLE_CANT_HANDLE_FT
fre "Le type de table utilisé ne supporte pas les index FULLTEXT"
ger "Der verwendete Tabellentyp unterstützt keine FULLTEXT-Indizes"
ita "La tabella usata non supporta gli indici FULLTEXT"
+ jpn "使用の表は全文索引を利用できません。"
por "O tipo de tabela utilizado não suporta índices de texto completo (fulltext indexes)"
rus "Используемый тип таблиц не поддерживает полнотекстовых индексов"
serbian "Upotrebljeni tip tabele ne podržava 'FULLTEXT' indekse"
@@ -4495,6 +4485,7 @@ ER_CANNOT_ADD_FOREIGN
fre "Impossible d'ajouter des contraintes d'index externe"
ger "Fremdschlüssel-Beschränkung kann nicht hinzugefügt werden"
ita "Impossibile aggiungere il vincolo di integrita' referenziale (foreign key constraint)"
+ jpn "外部キー制約を追加できません。"
por "Não pode acrescentar uma restrição de chave estrangeira"
rus "Невозможно добавить ограничения внешнего ключа"
serbian "Ne mogu da dodam proveru spoljnog ključa"
@@ -4508,6 +4499,7 @@ ER_NO_REFERENCED_ROW 23000
greek "Cannot add a child row: a foreign key constraint fails"
hun "Cannot add a child row: a foreign key constraint fails"
ita "Impossibile aggiungere la riga: un vincolo d'integrita' referenziale non e' soddisfatto"
+ jpn "親キーがありません。外部キー制約違反です。"
norwegian-ny "Cannot add a child row: a foreign key constraint fails"
por "Não pode acrescentar uma linha filha: uma restrição de chave estrangeira falhou"
rus "Невозможно добавить или обновить дочернюю строку: проверка ограничений внешнего ключа не выполняется"
@@ -4520,6 +4512,7 @@ ER_ROW_IS_REFERENCED 23000
greek "Cannot delete a parent row: a foreign key constraint fails"
hun "Cannot delete a parent row: a foreign key constraint fails"
ita "Impossibile cancellare la riga: un vincolo d'integrita' referenziale non e' soddisfatto"
+ jpn "子レコードがあります。外部キー制約違反です。"
por "Não pode apagar uma linha pai: uma restrição de chave estrangeira falhou"
rus "Невозможно удалить или обновить родительскую строку: проверка ограничений внешнего ключа не выполняется"
serbian "Ne mogu da izbrišem roditeljski slog: provera spoljnog ključa je neuspela"
@@ -4530,6 +4523,7 @@ ER_CONNECT_TO_MASTER 08S01
eng "Error connecting to master: %-.128s"
ger "Fehler bei der Verbindung zum Master: %-.128s"
ita "Errore durante la connessione al master: %-.128s"
+ jpn "マスターへの接続エラー: %-.128s"
por "Erro conectando com o master: %-.128s"
rus "Ошибка соединения с головным сервером: %-.128s"
spa "Error de coneccion a master: %-.128s"
@@ -4539,6 +4533,7 @@ ER_QUERY_ON_MASTER
eng "Error running query on master: %-.128s"
ger "Beim Ausführen einer Abfrage auf dem Master trat ein Fehler auf: %-.128s"
ita "Errore eseguendo una query sul master: %-.128s"
+ jpn "マスターでのクエリ実行エラー: %-.128s"
por "Erro rodando consulta no master: %-.128s"
rus "Ошибка выполнения запроса на головном сервере: %-.128s"
spa "Error executando el query en master: %-.128s"
@@ -4549,6 +4544,7 @@ ER_ERROR_WHEN_EXECUTING_COMMAND
est "Viga käsu %s täitmisel: %-.128s"
ger "Fehler beim Ausführen des Befehls %s: %-.128s"
ita "Errore durante l'esecuzione del comando %s: %-.128s"
+ jpn "%s コマンドの実行エラー: %-.128s"
por "Erro quando executando comando %s: %-.128s"
rus "Ошибка при выполнении команды %s: %-.128s"
serbian "Greška pri izvršavanju komande %s: %-.128s"
@@ -4560,6 +4556,7 @@ ER_WRONG_USAGE
est "Vigane %s ja %s kasutus"
ger "Falsche Verwendung von %s und %s"
ita "Uso errato di %s e %s"
+ jpn "%s の %s に関する不正な使用法です。"
por "Uso errado de %s e %s"
rus "Неверное использование %s и %s"
serbian "Pogrešna upotreba %s i %s"
@@ -4572,6 +4569,7 @@ ER_WRONG_NUMBER_OF_COLUMNS_IN_SELECT 21000
est "Tulpade arv kasutatud SELECT lausetes ei kattu"
ger "Die verwendeten SELECT-Befehle liefern unterschiedliche Anzahlen von Feldern zurück"
ita "La SELECT utilizzata ha un numero di colonne differente"
+ jpn "使用のSELECT文が返す列数が違います。"
por "Os comandos SELECT usados têm diferente número de colunas"
rus "Использованные операторы выборки (SELECT) дают разное количество столбцов"
serbian "Upotrebljene 'SELECT' komande adresiraju različit broj kolona"
@@ -4583,6 +4581,7 @@ ER_CANT_UPDATE_WITH_READLOCK
est "Ei suuda täita päringut konfliktse luku tõttu"
ger "Augrund eines READ-LOCK-Konflikts kann die Abfrage nicht ausgeführt werden"
ita "Impossibile eseguire la query perche' c'e' un conflitto con in lock di lettura"
+ jpn "競合するリードロックを保持しているので、クエリを実行できません。"
por "Não posso executar a consulta porque você tem um conflito de travamento de leitura"
rus "Невозможно исполнить запрос, поскольку у вас установлены конфликтующие блокировки чтения"
serbian "Ne mogu da izvršim upit zbog toga što imate zaključavanja čitanja podataka u konfliktu"
@@ -4594,6 +4593,7 @@ ER_MIXING_NOT_ALLOWED
est "Transaktsioone toetavate ning mittetoetavate tabelite kooskasutamine ei ole lubatud"
ger "Die gleichzeitige Verwendung von Tabellen mit und ohne Transaktionsunterstützung ist deaktiviert"
ita "E' disabilitata la possibilita' di mischiare tabelle transazionali e non-transazionali"
+ jpn "トランザクション対応の表と非対応の表の同時使用は無効化されています。"
por "Mistura de tabelas transacional e não-transacional está desabilitada"
rus "Использование транзакционных таблиц наряду с нетранзакционными запрещено"
serbian "Mešanje tabela koje podržavaju transakcije i onih koje ne podržavaju transakcije je isključeno"
@@ -4605,6 +4605,7 @@ ER_DUP_ARGUMENT
est "Määrangut '%s' on lauses kasutatud topelt"
ger "Option '%s' wird im Befehl zweimal verwendet"
ita "L'opzione '%s' e' stata usata due volte nel comando"
+ jpn "オプション '%s' が2度使用されています。"
por "Opção '%s' usada duas vezes no comando"
rus "Опция '%s' дважды использована в выражении"
spa "Opción '%s' usada dos veces en el comando"
@@ -4614,6 +4615,7 @@ ER_USER_LIMIT_REACHED 42000
eng "User '%-.64s' has exceeded the '%s' resource (current value: %ld)"
ger "Benutzer '%-.64s' hat die Ressourcenbeschränkung '%s' überschritten (aktueller Wert: %ld)"
ita "L'utente '%-.64s' ha ecceduto la risorsa '%s' (valore corrente: %ld)"
+ jpn "ユーザー '%-.64s' はリソースの上限 '%s' に達しました。(現在値: %ld)"
por "Usuário '%-.64s' tem excedido o '%s' recurso (atual valor: %ld)"
rus "Пользователь '%-.64s' превысил использование ресурса '%s' (текущее значение: %ld)"
spa "Usuario '%-.64s' ha excedido el recurso '%s' (actual valor: %ld)"
@@ -4623,6 +4625,7 @@ ER_SPECIFIC_ACCESS_DENIED_ERROR 42000
eng "Access denied; you need (at least one of) the %-.128s privilege(s) for this operation"
ger "Kein Zugriff. Hierfür wird die Berechtigung %-.128s benötigt"
ita "Accesso non consentito. Serve il privilegio %-.128s per questa operazione"
+ jpn "アクセスは拒否されました。この操作には %-.128s 権限が(複数の場合はどれか1つ)必要です。"
por "Acesso negado. Você precisa o privilégio %-.128s para essa operação"
rus "В доступе отказано. Вам нужны привилегии %-.128s для этой операции"
spa "Acceso negado. Usted necesita el privilegio %-.128s para esta operación"
@@ -4633,6 +4636,7 @@ ER_LOCAL_VARIABLE
eng "Variable '%-.64s' is a SESSION variable and can't be used with SET GLOBAL"
ger "Variable '%-.64s' ist eine lokale Variable und kann nicht mit SET GLOBAL verändert werden"
ita "La variabile '%-.64s' e' una variabile locale ( SESSION ) e non puo' essere cambiata usando SET GLOBAL"
+ jpn "変数 '%-.64s' はセッション変数です。SET GLOBALでは使用できません。"
por "Variável '%-.64s' é uma SESSION variável e não pode ser usada com SET GLOBAL"
rus "Переменная '%-.64s' является потоковой (SESSION) переменной и не может быть изменена с помощью SET GLOBAL"
spa "Variable '%-.64s' es una SESSION variable y no puede ser usada con SET GLOBAL"
@@ -4642,6 +4646,7 @@ ER_GLOBAL_VARIABLE
eng "Variable '%-.64s' is a GLOBAL variable and should be set with SET GLOBAL"
ger "Variable '%-.64s' ist eine globale Variable und muss mit SET GLOBAL verändert werden"
ita "La variabile '%-.64s' e' una variabile globale ( GLOBAL ) e deve essere cambiata usando SET GLOBAL"
+ jpn "変数 '%-.64s' はグローバル変数です。SET GLOBALを使用してください。"
por "Variável '%-.64s' é uma GLOBAL variável e deve ser configurada com SET GLOBAL"
rus "Переменная '%-.64s' является глобальной (GLOBAL) переменной, и ее следует изменять с помощью SET GLOBAL"
spa "Variable '%-.64s' es una GLOBAL variable y no puede ser configurada con SET GLOBAL"
@@ -4651,6 +4656,7 @@ ER_NO_DEFAULT 42000
eng "Variable '%-.64s' doesn't have a default value"
ger "Variable '%-.64s' hat keinen Vorgabewert"
ita "La variabile '%-.64s' non ha un valore di default"
+ jpn "変数 '%-.64s' にはデフォルト値がありません。"
por "Variável '%-.64s' não tem um valor padrão"
rus "Переменная '%-.64s' не имеет значения по умолчанию"
spa "Variable '%-.64s' no tiene un valor patrón"
@@ -4660,6 +4666,7 @@ ER_WRONG_VALUE_FOR_VAR 42000
eng "Variable '%-.64s' can't be set to the value of '%-.200s'"
ger "Variable '%-.64s' kann nicht auf '%-.200s' gesetzt werden"
ita "Alla variabile '%-.64s' non puo' essere assegato il valore '%-.200s'"
+ jpn "変数 '%-.64s' に値 '%-.200s' を設定できません。"
por "Variável '%-.64s' não pode ser configurada para o valor de '%-.200s'"
rus "Переменная '%-.64s' не может быть установлена в значение '%-.200s'"
spa "Variable '%-.64s' no puede ser configurada para el valor de '%-.200s'"
@@ -4669,6 +4676,7 @@ ER_WRONG_TYPE_FOR_VAR 42000
eng "Incorrect argument type to variable '%-.64s'"
ger "Falscher Argumenttyp für Variable '%-.64s'"
ita "Tipo di valore errato per la variabile '%-.64s'"
+ jpn "変数 '%-.64s' への値の型が不正です。"
por "Tipo errado de argumento para variável '%-.64s'"
rus "Неверный тип аргумента для переменной '%-.64s'"
spa "Tipo de argumento equivocado para variable '%-.64s'"
@@ -4678,6 +4686,7 @@ ER_VAR_CANT_BE_READ
eng "Variable '%-.64s' can only be set, not read"
ger "Variable '%-.64s' kann nur verändert, nicht gelesen werden"
ita "Alla variabile '%-.64s' e' di sola scrittura quindi puo' essere solo assegnato un valore, non letto"
+ jpn "変数 '%-.64s' は書き込み専用です。読み込みはできません。"
por "Variável '%-.64s' somente pode ser configurada, não lida"
rus "Переменная '%-.64s' может быть только установлена, но не считана"
spa "Variable '%-.64s' solamente puede ser configurada, no leída"
@@ -4687,6 +4696,7 @@ ER_CANT_USE_OPTION_HERE 42000
eng "Incorrect usage/placement of '%s'"
ger "Falsche Verwendung oder Platzierung von '%s'"
ita "Uso/posizione di '%s' sbagliato"
+ jpn "'%s' の使用法または場所が不正です。"
por "Errado uso/colocação de '%s'"
rus "Неверное использование или в неверном месте указан '%s'"
spa "Equivocado uso/colocación de '%s'"
@@ -4696,6 +4706,7 @@ ER_NOT_SUPPORTED_YET 42000
eng "This version of MariaDB doesn't yet support '%s'"
ger "Diese MariaDB-Version unterstützt '%s' nicht"
ita "Questa versione di MariaDB non supporta ancora '%s'"
+ jpn "このバージョンのMariaDBでは、まだ '%s' を利用できません。"
por "Esta versão de MariaDB não suporta ainda '%s'"
rus "Эта версия MariaDB пока еще не поддерживает '%s'"
spa "Esta versión de MariaDB no soporta todavia '%s'"
@@ -4705,6 +4716,7 @@ ER_MASTER_FATAL_ERROR_READING_BINLOG
eng "Got fatal error %d from master when reading data from binary log: '%-.320s'"
ger "Schwerer Fehler %d: '%-.320s vom Master beim Lesen des binären Logs"
ita "Errore fatale %d: '%-.320s' dal master leggendo i dati dal log binario"
+ jpn "致命的なエラー %d: '%-.320s' がマスターでバイナリログ読み込み中に発生しました。"
por "Obteve fatal erro %d: '%-.320s' do master quando lendo dados do binary log"
rus "Получена неисправимая ошибка %d: '%-.320s' от головного сервера в процессе выборки данных из двоичного журнала"
spa "Recibió fatal error %d: '%-.320s' del master cuando leyendo datos del binary log"
@@ -4712,6 +4724,7 @@ ER_MASTER_FATAL_ERROR_READING_BINLOG
ER_SLAVE_IGNORED_TABLE
eng "Slave SQL thread ignored the query because of replicate-*-table rules"
ger "Slave-SQL-Thread hat die Abfrage aufgrund von replicate-*-table-Regeln ignoriert"
+ jpn "replicate-*-table ルールに従って、スレーブSQLスレッドはクエリを無視しました。"
nla "Slave SQL thread negeerde de query vanwege replicate-*-table opties"
por "Slave SQL thread ignorado a consulta devido às normas de replicação-*-tabela"
spa "Slave SQL thread ignorado el query debido a las reglas de replicación-*-tabla"
@@ -4720,12 +4733,14 @@ ER_INCORRECT_GLOBAL_LOCAL_VAR
eng "Variable '%-.192s' is a %s variable"
serbian "Promenljiva '%-.192s' je %s promenljiva"
ger "Variable '%-.192s' ist eine %s-Variable"
+ jpn "変数 '%-.192s' は %s 変数です。"
nla "Variabele '%-.192s' is geen %s variabele"
spa "Variable '%-.192s' es una %s variable"
swe "Variabel '%-.192s' är av typ %s"
ER_WRONG_FK_DEF 42000
eng "Incorrect foreign key definition for '%-.192s': %s"
ger "Falsche Fremdschlüssel-Definition für '%-.192s': %s"
+ jpn "外部キー '%-.192s' の定義の不正: %s"
nla "Incorrecte foreign key definitie voor '%-.192s': %s"
por "Definição errada da chave estrangeira para '%-.192s': %s"
spa "Equivocada definición de llave extranjera para '%-.192s': %s"
@@ -4733,6 +4748,7 @@ ER_WRONG_FK_DEF 42000
ER_KEY_REF_DO_NOT_MATCH_TABLE_REF
eng "Key reference and table reference don't match"
ger "Schlüssel- und Tabellenverweis passen nicht zusammen"
+ jpn "外部キーの参照表と定義が一致しません。"
nla "Sleutel- en tabelreferentie komen niet overeen"
por "Referência da chave e referência da tabela não coincidem"
spa "Referencia de llave y referencia de tabla no coinciden"
@@ -4740,6 +4756,7 @@ ER_KEY_REF_DO_NOT_MATCH_TABLE_REF
ER_OPERAND_COLUMNS 21000
eng "Operand should contain %d column(s)"
ger "Operand sollte %d Spalte(n) enthalten"
+ jpn "オペランドに %d 個の列が必要です。"
nla "Operand behoort %d kolommen te bevatten"
rus "Операнд должен содержать %d колонок"
spa "Operando debe tener %d columna(s)"
@@ -4747,6 +4764,7 @@ ER_OPERAND_COLUMNS 21000
ER_SUBQUERY_NO_1_ROW 21000
eng "Subquery returns more than 1 row"
ger "Unterabfrage lieferte mehr als einen Datensatz zurück"
+ jpn "サブクエリが2行以上の結果を返します。"
nla "Subquery retourneert meer dan 1 rij"
por "Subconsulta retorna mais que 1 registro"
rus "Подзапрос возвращает более одной записи"
@@ -4757,6 +4775,7 @@ ER_UNKNOWN_STMT_HANDLER
dan "Unknown prepared statement handler (%.*s) given to %s"
eng "Unknown prepared statement handler (%.*s) given to %s"
ger "Unbekannter Prepared-Statement-Handler (%.*s) für %s angegeben"
+ jpn "'%.*s' はプリペアードステートメントの不明なハンドルです。(%s で指定されました)"
nla "Onebekende prepared statement handler (%.*s) voor %s aangegeven"
por "Desconhecido manipulador de declaração preparado (%.*s) determinado para %s"
spa "Desconocido preparado comando handler (%.*s) dado para %s"
@@ -4765,6 +4784,7 @@ ER_UNKNOWN_STMT_HANDLER
ER_CORRUPT_HELP_DB
eng "Help database is corrupt or does not exist"
ger "Die Hilfe-Datenbank ist beschädigt oder existiert nicht"
+ jpn "ヘルプデータベースは壊れているか存在しません。"
nla "Help database is beschadigd of bestaat niet"
por "Banco de dado de ajuda corrupto ou não existente"
spa "Base de datos Help está corrupto o no existe"
@@ -4772,6 +4792,7 @@ ER_CORRUPT_HELP_DB
ER_CYCLIC_REFERENCE
eng "Cyclic reference on subqueries"
ger "Zyklischer Verweis in Unterabfragen"
+ jpn "サブクエリの参照がループしています。"
nla "Cyclische verwijzing in subqueries"
por "Referência cíclica em subconsultas"
rus "Циклическая ссылка на подзапрос"
@@ -4781,6 +4802,7 @@ ER_CYCLIC_REFERENCE
ER_AUTO_CONVERT
eng "Converting column '%s' from %s to %s"
ger "Feld '%s' wird von %s nach %s umgewandelt"
+ jpn "列 '%s' を %s から %s へ変換します。"
nla "Veld '%s' wordt van %s naar %s geconverteerd"
por "Convertendo coluna '%s' de %s para %s"
rus "Преобразование поля '%s' из %s в %s"
@@ -4790,6 +4812,7 @@ ER_AUTO_CONVERT
ER_ILLEGAL_REFERENCE 42S22
eng "Reference '%-.64s' not supported (%s)"
ger "Verweis '%-.64s' wird nicht unterstützt (%s)"
+ jpn "'%-.64s' の参照はできません。(%s)"
nla "Verwijzing '%-.64s' niet ondersteund (%s)"
por "Referência '%-.64s' não suportada (%s)"
rus "Ссылка '%-.64s' не поддерживается (%s)"
@@ -4799,6 +4822,7 @@ ER_ILLEGAL_REFERENCE 42S22
ER_DERIVED_MUST_HAVE_ALIAS 42000
eng "Every derived table must have its own alias"
ger "Für jede abgeleitete Tabelle muss ein eigener Alias angegeben werden"
+ jpn "導出表には別名が必須です。"
nla "Voor elke afgeleide tabel moet een unieke alias worden gebruikt"
por "Cada tabela derivada deve ter seu próprio alias"
spa "Cada tabla derivada debe tener su propio alias"
@@ -4806,6 +4830,7 @@ ER_DERIVED_MUST_HAVE_ALIAS 42000
ER_SELECT_REDUCED 01000
eng "Select %u was reduced during optimization"
ger "Select %u wurde während der Optimierung reduziert"
+ jpn "Select %u は最適化によって減らされました。"
nla "Select %u werd geredureerd tijdens optimtalisatie"
por "Select %u foi reduzido durante otimização"
rus "Select %u был упразднен в процессе оптимизации"
@@ -4815,6 +4840,7 @@ ER_SELECT_REDUCED 01000
ER_TABLENAME_NOT_ALLOWED_HERE 42000
eng "Table '%-.192s' from one of the SELECTs cannot be used in %-.32s"
ger "Tabelle '%-.192s', die in einem der SELECT-Befehle verwendet wurde, kann nicht in %-.32s verwendet werden"
+ jpn "特定のSELECTのみで使用の表 '%-.192s' は %-.32s では使用できません。"
nla "Tabel '%-.192s' uit een van de SELECTS kan niet in %-.32s gebruikt worden"
por "Tabela '%-.192s' de um dos SELECTs não pode ser usada em %-.32s"
spa "Tabla '%-.192s' de uno de los SELECT no puede ser usada en %-.32s"
@@ -4822,6 +4848,7 @@ ER_TABLENAME_NOT_ALLOWED_HERE 42000
ER_NOT_SUPPORTED_AUTH_MODE 08004
eng "Client does not support authentication protocol requested by server; consider upgrading MariaDB client"
ger "Client unterstützt das vom Server erwartete Authentifizierungsprotokoll nicht. Bitte aktualisieren Sie Ihren MariaDB-Client"
+ jpn "クライアントはサーバーが要求する認証プロトコルに対応できません。MariaDBクライアントのアップグレードを検討してください。"
nla "Client ondersteunt het door de server verwachtte authenticatieprotocol niet. Overweeg een nieuwere MariaDB client te gebruiken"
por "Cliente não suporta o protocolo de autenticação exigido pelo servidor; considere a atualização do cliente MariaDB"
spa "Cliente no soporta protocolo de autenticación solicitado por el servidor; considere actualizar el cliente MariaDB"
@@ -4829,6 +4856,7 @@ ER_NOT_SUPPORTED_AUTH_MODE 08004
ER_SPATIAL_CANT_HAVE_NULL 42000
eng "All parts of a SPATIAL index must be NOT NULL"
ger "Alle Teile eines SPATIAL-Index müssen als NOT NULL deklariert sein"
+ jpn "空間索引のキー列は NOT NULL でなければいけません。"
nla "Alle delete van een SPATIAL index dienen als NOT NULL gedeclareerd te worden"
por "Todas as partes de uma SPATIAL index devem ser NOT NULL"
spa "Todas las partes de una SPATIAL index deben ser NOT NULL"
@@ -4836,6 +4864,7 @@ ER_SPATIAL_CANT_HAVE_NULL 42000
ER_COLLATION_CHARSET_MISMATCH 42000
eng "COLLATION '%s' is not valid for CHARACTER SET '%s'"
ger "COLLATION '%s' ist für CHARACTER SET '%s' ungültig"
+ jpn "COLLATION '%s' は CHARACTER SET '%s' に適用できません。"
nla "COLLATION '%s' is niet geldig voor CHARACTER SET '%s'"
por "COLLATION '%s' não é válida para CHARACTER SET '%s'"
spa "COLLATION '%s' no es válido para CHARACTER SET '%s'"
@@ -4843,6 +4872,7 @@ ER_COLLATION_CHARSET_MISMATCH 42000
ER_SLAVE_WAS_RUNNING
eng "Slave is already running"
ger "Slave läuft bereits"
+ jpn "スレーブはすでに稼働中です。"
nla "Slave is reeds actief"
por "O slave já está rodando"
spa "Slave ya está funcionando"
@@ -4850,6 +4880,7 @@ ER_SLAVE_WAS_RUNNING
ER_SLAVE_WAS_NOT_RUNNING
eng "Slave already has been stopped"
ger "Slave wurde bereits angehalten"
+ jpn "スレーブはすでに停止しています。"
nla "Slave is reeds gestopt"
por "O slave já está parado"
spa "Slave ya fué parado"
@@ -4857,24 +4888,28 @@ ER_SLAVE_WAS_NOT_RUNNING
ER_TOO_BIG_FOR_UNCOMPRESS
eng "Uncompressed data size too large; the maximum size is %d (probably, length of uncompressed data was corrupted)"
ger "Unkomprimierte Daten sind zu groß. Die maximale Größe beträgt %d (wahrscheinlich wurde die Länge der unkomprimierten Daten beschädigt)"
+ jpn "展開後のデータが大きすぎます。最大サイズは %d です。(展開後データの長さ情報が壊れている可能性もあります。)"
nla "Ongecomprimeerder data is te groot; de maximum lengte is %d (waarschijnlijk, de lengte van de gecomprimeerde data was beschadigd)"
por "Tamanho muito grande dos dados des comprimidos. O máximo tamanho é %d. (provavelmente, o comprimento dos dados descomprimidos está corrupto)"
spa "Tamaño demasiado grande para datos descomprimidos. El máximo tamaño es %d. (probablemente, extensión de datos descomprimidos fué corrompida)"
ER_ZLIB_Z_MEM_ERROR
eng "ZLIB: Not enough memory"
ger "ZLIB: Nicht genug Speicher"
+ jpn "ZLIB: メモリ不足です。"
nla "ZLIB: Onvoldoende geheugen"
por "ZLIB: Não suficiente memória disponível"
spa "Z_MEM_ERROR: No suficiente memoria para zlib"
ER_ZLIB_Z_BUF_ERROR
eng "ZLIB: Not enough room in the output buffer (probably, length of uncompressed data was corrupted)"
ger "ZLIB: Im Ausgabepuffer ist nicht genug Platz vorhanden (wahrscheinlich wurde die Länge der unkomprimierten Daten beschädigt)"
+ jpn "ZLIB: 出力バッファに十分な空きがありません。(展開後データの長さ情報が壊れている可能性もあります。)"
nla "ZLIB: Onvoldoende ruimte in uitgaande buffer (waarschijnlijk, de lengte van de ongecomprimeerde data was beschadigd)"
por "ZLIB: Não suficiente espaço no buffer emissor (provavelmente, o comprimento dos dados descomprimidos está corrupto)"
spa "Z_BUF_ERROR: No suficiente espacio en el búfer de salida para zlib (probablemente, extensión de datos descomprimidos fué corrompida)"
ER_ZLIB_Z_DATA_ERROR
eng "ZLIB: Input data corrupted"
ger "ZLIB: Eingabedaten beschädigt"
+ jpn "ZLIB: 入力データが壊れています。"
nla "ZLIB: Invoer data beschadigd"
por "ZLIB: Dados de entrada está corrupto"
spa "ZLIB: Dato de entrada fué corrompido para zlib"
@@ -4883,18 +4918,21 @@ ER_CUT_VALUE_GROUP_CONCAT
ER_WARN_TOO_FEW_RECORDS 01000
eng "Row %lu doesn't contain data for all columns"
ger "Zeile %lu enthält nicht für alle Felder Daten"
+ jpn "行 %lu はすべての列へのデータを含んでいません。"
nla "Rij %lu bevat niet de data voor alle kolommen"
por "Conta de registro é menor que a conta de coluna na linha %lu"
spa "Línea %lu no contiene datos para todas las columnas"
ER_WARN_TOO_MANY_RECORDS 01000
eng "Row %lu was truncated; it contained more data than there were input columns"
ger "Zeile %lu gekürzt, die Zeile enthielt mehr Daten, als es Eingabefelder gibt"
+ jpn "行 %lu はデータを切り捨てられました。列よりも多いデータを含んでいました。"
nla "Regel %lu ingekort, bevatte meer data dan invoer kolommen"
por "Conta de registro é maior que a conta de coluna na linha %lu"
spa "Línea %lu fué truncada; La misma contine mas datos que las que existen en las columnas de entrada"
ER_WARN_NULL_TO_NOTNULL 22004
eng "Column set to default value; NULL supplied to NOT NULL column '%s' at row %lu"
ger "Feld auf Vorgabewert gesetzt, da NULL für NOT-NULL-Feld '%s' in Zeile %lu angegeben"
+ jpn "列にデフォルト値が設定されました。NOT NULLの列 '%s' に 行 %lu で NULL が与えられました。"
por "Dado truncado, NULL fornecido para NOT NULL coluna '%s' na linha %lu"
spa "Datos truncado, NULL suministrado para NOT NULL columna '%s' en la línea %lu"
ER_WARN_DATA_OUT_OF_RANGE 22003
@@ -4902,17 +4940,20 @@ ER_WARN_DATA_OUT_OF_RANGE 22003
WARN_DATA_TRUNCATED 01000
eng "Data truncated for column '%s' at row %lu"
ger "Daten abgeschnitten für Feld '%s' in Zeile %lu"
+ jpn "列 '%s' の 行 %lu でデータが切り捨てられました。"
por "Dado truncado para coluna '%s' na linha %lu"
spa "Datos truncados para columna '%s' en la línea %lu"
ER_WARN_USING_OTHER_HANDLER
eng "Using storage engine %s for table '%s'"
ger "Für Tabelle '%s' wird Speicher-Engine %s benutzt"
+ jpn "ストレージエンジン %s が表 '%s' に利用されています。"
por "Usando engine de armazenamento %s para tabela '%s'"
spa "Usando motor de almacenamiento %s para tabla '%s'"
swe "Använder handler %s för tabell '%s'"
ER_CANT_AGGREGATE_2COLLATIONS
eng "Illegal mix of collations (%s,%s) and (%s,%s) for operation '%s'"
ger "Unerlaubte Mischung von Sortierreihenfolgen (%s, %s) und (%s, %s) für Operation '%s'"
+ jpn "照合順序 (%s,%s) と (%s,%s) の混在は操作 '%s' では不正です。"
por "Combinação ilegal de collations (%s,%s) e (%s,%s) para operação '%s'"
spa "Ilegal mezcla de collations (%s,%s) y (%s,%s) para operación '%s'"
ER_DROP_USER
@@ -4921,42 +4962,50 @@ ER_DROP_USER
ER_REVOKE_GRANTS
eng "Can't revoke all privileges for one or more of the requested users"
ger "Kann nicht alle Berechtigungen widerrufen, die für einen oder mehrere Benutzer gewährt wurden"
+ jpn "指定されたユーザーから指定された全ての権限を剥奪することができませんでした。"
por "Não pode revocar todos os privilégios, grant para um ou mais dos usuários pedidos"
spa "No puede revocar todos los privilegios, derecho para uno o mas de los usuarios solicitados"
ER_CANT_AGGREGATE_3COLLATIONS
eng "Illegal mix of collations (%s,%s), (%s,%s), (%s,%s) for operation '%s'"
ger "Unerlaubte Mischung von Sortierreihenfolgen (%s, %s), (%s, %s), (%s, %s) für Operation '%s'"
+ jpn "照合順序 (%s,%s), (%s,%s), (%s,%s) の混在は操作 '%s' では不正です。"
por "Ilegal combinação de collations (%s,%s), (%s,%s), (%s,%s) para operação '%s'"
spa "Ilegal mezcla de collations (%s,%s), (%s,%s), (%s,%s) para operación '%s'"
ER_CANT_AGGREGATE_NCOLLATIONS
eng "Illegal mix of collations for operation '%s'"
ger "Unerlaubte Mischung von Sortierreihenfolgen für Operation '%s'"
+ jpn "操作 '%s' では不正な照合順序の混在です。"
por "Ilegal combinação de collations para operação '%s'"
spa "Ilegal mezcla de collations para operación '%s'"
ER_VARIABLE_IS_NOT_STRUCT
eng "Variable '%-.64s' is not a variable component (can't be used as XXXX.variable_name)"
ger "Variable '%-.64s' ist keine Variablen-Komponente (kann nicht als XXXX.variablen_name verwendet werden)"
+ jpn "変数 '%-.64s' は構造変数の構成要素ではありません。(XXXX.変数名 という指定はできません。)"
por "Variável '%-.64s' não é uma variável componente (Não pode ser usada como XXXX.variável_nome)"
spa "Variable '%-.64s' no es una variable componente (No puede ser usada como XXXX.variable_name)"
ER_UNKNOWN_COLLATION
eng "Unknown collation: '%-.64s'"
ger "Unbekannte Sortierreihenfolge: '%-.64s'"
+ jpn "不明な照合順序: '%-.64s'"
por "Collation desconhecida: '%-.64s'"
spa "Collation desconocida: '%-.64s'"
ER_SLAVE_IGNORED_SSL_PARAMS
eng "SSL parameters in CHANGE MASTER are ignored because this MariaDB slave was compiled without SSL support; they can be used later if MariaDB slave with SSL is started"
ger "SSL-Parameter in CHANGE MASTER werden ignoriert, weil dieser MariaDB-Slave ohne SSL-Unterstützung kompiliert wurde. Sie können aber später verwendet werden, wenn ein MariaDB-Slave mit SSL gestartet wird"
+ jpn "このMySQLスレーブはSSLサポートを含めてコンパイルされていないので、CHANGE MASTER のSSLパラメータは無視されました。今後SSLサポートを持つMySQLスレーブを起動する際に利用されます。"
por "SSL parâmetros em CHANGE MASTER são ignorados porque este escravo MariaDB foi compilado sem o SSL suporte. Os mesmos podem ser usados mais tarde quando o escravo MariaDB com SSL seja iniciado."
spa "Parametros SSL en CHANGE MASTER son ignorados porque este slave MariaDB fue compilado sin soporte SSL; pueden ser usados despues cuando el slave MariaDB con SSL sea inicializado"
ER_SERVER_IS_IN_SECURE_AUTH_MODE
eng "Server is running in --secure-auth mode, but '%s'@'%s' has a password in the old format; please change the password to the new format"
ger "Server läuft im Modus --secure-auth, aber '%s'@'%s' hat ein Passwort im alten Format. Bitte Passwort ins neue Format ändern"
+ jpn "サーバーは --secure-auth モードで稼働しています。しかし '%s'@'%s' は古い形式のパスワードを使用しています。新しい形式のパスワードに変更してください。"
por "Servidor está rodando em --secure-auth modo, porêm '%s'@'%s' tem senha no formato antigo; por favor troque a senha para o novo formato"
rus "Сервер запущен в режиме --secure-auth (безопасной авторизации), но для пользователя '%s'@'%s' пароль сохранён в старом формате; необходимо обновить формат пароля"
spa "Servidor está rodando en modo --secure-auth, pero '%s'@'%s' tiene clave en el antiguo formato; por favor cambie la clave para el nuevo formato"
ER_WARN_FIELD_RESOLVED
eng "Field or reference '%-.192s%s%-.192s%s%-.192s' of SELECT #%d was resolved in SELECT #%d"
ger "Feld oder Verweis '%-.192s%s%-.192s%s%-.192s' im SELECT-Befehl Nr. %d wurde im SELECT-Befehl Nr. %d aufgelöst"
+ jpn "フィールドまたは参照 '%-.192s%s%-.192s%s%-.192s' は SELECT #%d ではなく、SELECT #%d で解決されました。"
por "Campo ou referência '%-.192s%s%-.192s%s%-.192s' de SELECT #%d foi resolvido em SELECT #%d"
rus "Поле или ссылка '%-.192s%s%-.192s%s%-.192s' из SELECTа #%d была найдена в SELECTе #%d"
spa "Campo o referencia '%-.192s%s%-.192s%s%-.192s' de SELECT #%d fue resolvido en SELECT #%d"
@@ -4964,68 +5013,80 @@ ER_WARN_FIELD_RESOLVED
ER_BAD_SLAVE_UNTIL_COND
eng "Incorrect parameter or combination of parameters for START SLAVE UNTIL"
ger "Falscher Parameter oder falsche Kombination von Parametern für START SLAVE UNTIL"
+ jpn "START SLAVE UNTIL へのパラメータまたはその組み合わせが不正です。"
por "Parâmetro ou combinação de parâmetros errado para START SLAVE UNTIL"
spa "Parametro equivocado o combinación de parametros para START SLAVE UNTIL"
ER_MISSING_SKIP_SLAVE
eng "It is recommended to use --skip-slave-start when doing step-by-step replication with START SLAVE UNTIL; otherwise, you will get problems if you get an unexpected slave's mysqld restart"
ger "Es wird empfohlen, mit --skip-slave-start zu starten, wenn mit START SLAVE UNTIL eine Schritt-für-Schritt-Replikation ausgeführt wird. Ansonsten gibt es Probleme, wenn ein Slave-Server unerwartet neu startet"
+ jpn "START SLAVE UNTIL で段階的にレプリケーションを行う際には、--skip-slave-start オプションを使うことを推奨します。使わない場合、スレーブのmysqldが不慮の再起動をすると問題が発生します。"
por "É recomendado para rodar com --skip-slave-start quando fazendo replicação passo-por-passo com START SLAVE UNTIL, de outra forma você não está seguro em caso de inesperada reinicialição do mysqld escravo"
spa "Es recomendado rodar con --skip-slave-start cuando haciendo replicación step-by-step con START SLAVE UNTIL, a menos que usted no esté seguro en caso de inesperada reinicialización del mysqld slave"
ER_UNTIL_COND_IGNORED
eng "SQL thread is not to be started so UNTIL options are ignored"
ger "SQL-Thread soll nicht gestartet werden. Daher werden UNTIL-Optionen ignoriert"
+ jpn "スレーブSQLスレッドが開始されないため、UNTILオプションは無視されました。"
por "Thread SQL não pode ser inicializado tal que opções UNTIL são ignoradas"
spa "SQL thread no es inicializado tal que opciones UNTIL son ignoradas"
ER_WRONG_NAME_FOR_INDEX 42000
eng "Incorrect index name '%-.100s'"
ger "Falscher Indexname '%-.100s'"
+ jpn "索引名 '%-.100s' は不正です。"
por "Incorreto nome de índice '%-.100s'"
spa "Nombre de índice incorrecto '%-.100s'"
swe "Felaktigt index namn '%-.100s'"
ER_WRONG_NAME_FOR_CATALOG 42000
eng "Incorrect catalog name '%-.100s'"
ger "Falscher Katalogname '%-.100s'"
+ jpn "カタログ名 '%-.100s' は不正です。"
por "Incorreto nome de catálogo '%-.100s'"
spa "Nombre de catalog incorrecto '%-.100s'"
swe "Felaktigt katalog namn '%-.100s'"
ER_WARN_QC_RESIZE
eng "Query cache failed to set size %lu; new query cache size is %lu"
ger "Änderung der Query-Cache-Größe auf %lu fehlgeschlagen; neue Query-Cache-Größe ist %lu"
+ jpn "クエリキャッシュのサイズを %lu にできませんでした。サイズは %lu になりました。"
por "Falha em Query cache para configurar tamanho %lu, novo tamanho de query cache é %lu"
rus "Кеш запросов не может установить размер %lu, новый размер кеша зпросов - %lu"
spa "Query cache fallada para configurar tamaño %lu, nuevo tamaño de query cache es %lu"
- swe "Storleken av "Query cache" kunde inte sättas till %lu, ny storlek är %lu"
+ swe "Storleken av 'Query cache' kunde inte sättas till %lu, ny storlek är %lu"
ukr "Кеш запитів неспроможен встановити розмір %lu, новий розмір кеша запитів - %lu"
ER_BAD_FT_COLUMN
eng "Column '%-.192s' cannot be part of FULLTEXT index"
ger "Feld '%-.192s' kann nicht Teil eines FULLTEXT-Index sein"
+ jpn "列 '%-.192s' は全文索引のキーにはできません。"
por "Coluna '%-.192s' não pode ser parte de índice FULLTEXT"
spa "Columna '%-.192s' no puede ser parte de FULLTEXT index"
swe "Kolumn '%-.192s' kan inte vara del av ett FULLTEXT index"
ER_UNKNOWN_KEY_CACHE
eng "Unknown key cache '%-.100s'"
ger "Unbekannter Schlüssel-Cache '%-.100s'"
+ jpn "'%-.100s' は不明なキーキャッシュです。"
por "Key cache desconhecida '%-.100s'"
spa "Desconocida key cache '%-.100s'"
swe "Okänd nyckel cache '%-.100s'"
ER_WARN_HOSTNAME_WONT_WORK
eng "MariaDB is started in --skip-name-resolve mode; you must restart it without this switch for this grant to work"
ger "MariaDB wurde mit --skip-name-resolve gestartet. Diese Option darf nicht verwendet werden, damit diese Rechtevergabe möglich ist"
+ jpn "MariaDBは --skip-name-resolve モードで起動しています。このオプションを外して再起動しなければ、この権限操作は機能しません。"
por "MariaDB foi inicializado em modo --skip-name-resolve. Você necesita reincializá-lo sem esta opção para este grant funcionar"
spa "MariaDB esta inicializado en modo --skip-name-resolve. Usted necesita reinicializarlo sin esta opción para este derecho funcionar"
ER_UNKNOWN_STORAGE_ENGINE 42000
eng "Unknown storage engine '%s'"
ger "Unbekannte Speicher-Engine '%s'"
+ jpn "'%s' は不明なストレージエンジンです。"
por "Motor de tabela desconhecido '%s'"
spa "Desconocido motor de tabla '%s'"
ER_WARN_DEPRECATED_SYNTAX
eng "'%s' is deprecated and will be removed in a future release. Please use %s instead"
ger "'%s' ist veraltet. Bitte benutzen Sie '%s'"
+ jpn "'%s' は将来のリリースで廃止予定です。代わりに %s を使用してください。"
por "'%s' é desatualizado. Use '%s' em seu lugar"
spa "'%s' está desaprobado, use '%s' en su lugar"
ER_NON_UPDATABLE_TABLE
eng "The target table %-.100s of the %s is not updatable"
ger "Die Zieltabelle %-.100s von %s ist nicht aktualisierbar"
+ jpn "対象表 %-.100s は更新可能ではないので、%s を行えません。"
por "A tabela destino %-.100s do %s não é atualizável"
rus "Таблица %-.100s в %s не может изменятся"
spa "La tabla destino %-.100s del %s no es actualizable"
@@ -5034,33 +5095,39 @@ ER_NON_UPDATABLE_TABLE
ER_FEATURE_DISABLED
eng "The '%s' feature is disabled; you need MariaDB built with '%s' to have it working"
ger "Das Feature '%s' ist ausgeschaltet, Sie müssen MariaDB mit '%s' übersetzen, damit es verfügbar ist"
+ jpn "機能 '%s' は無効です。利用するためには '%s' を含めてビルドしたMariaDBが必要です。"
por "O recurso '%s' foi desativado; você necessita MariaDB construído com '%s' para ter isto funcionando"
spa "El recurso '%s' fue deshabilitado; usted necesita construir MariaDB con '%s' para tener eso funcionando"
swe "'%s' är inte aktiverad; För att aktivera detta måste du bygga om MariaDB med '%s' definierad"
ER_OPTION_PREVENTS_STATEMENT
eng "The MariaDB server is running with the %s option so it cannot execute this statement"
ger "Der MariaDB-Server läuft mit der Option %s und kann diese Anweisung deswegen nicht ausführen"
+ jpn "MariaDBサーバーが %s オプションで実行されているので、このステートメントは実行できません。"
por "O servidor MariaDB está rodando com a opção %s razão pela qual não pode executar esse commando"
spa "El servidor MariaDB está rodando con la opción %s tal que no puede ejecutar este comando"
swe "MariaDB är startad med %s. Pga av detta kan du inte använda detta kommando"
ER_DUPLICATED_VALUE_IN_TYPE
eng "Column '%-.100s' has duplicated value '%-.64s' in %s"
ger "Feld '%-.100s' hat doppelten Wert '%-.64s' in %s"
+ jpn "列 '%-.100s' で、重複する値 '%-.64s' が %s に指定されています。"
por "Coluna '%-.100s' tem valor duplicado '%-.64s' em %s"
spa "Columna '%-.100s' tiene valor doblado '%-.64s' en %s"
ER_TRUNCATED_WRONG_VALUE 22007
eng "Truncated incorrect %-.32s value: '%-.128s'"
ger "Falscher %-.32s-Wert gekürzt: '%-.128s'"
+ jpn "不正な %-.32s の値が切り捨てられました。: '%-.128s'"
por "Truncado errado %-.32s valor: '%-.128s'"
spa "Equivocado truncado %-.32s valor: '%-.128s'"
ER_TOO_MUCH_AUTO_TIMESTAMP_COLS
eng "Incorrect table definition; there can be only one TIMESTAMP column with CURRENT_TIMESTAMP in DEFAULT or ON UPDATE clause"
ger "Fehlerhafte Tabellendefinition. Es kann nur eine einzige TIMESTAMP-Spalte mit CURRENT_TIMESTAMP als DEFAULT oder in einer ON-UPDATE-Klausel geben"
+ jpn "不正な表定義です。DEFAULT句またはON UPDATE句に CURRENT_TIMESTAMP をともなうTIMESTAMP型の列は1つまでです。"
por "Incorreta definição de tabela; Pode ter somente uma coluna TIMESTAMP com CURRENT_TIMESTAMP em DEFAULT ou ON UPDATE cláusula"
spa "Incorrecta definición de tabla; Solamente debe haber una columna TIMESTAMP con CURRENT_TIMESTAMP en DEFAULT o ON UPDATE cláusula"
ER_INVALID_ON_UPDATE
eng "Invalid ON UPDATE clause for '%-.192s' column"
ger "Ungültige ON-UPDATE-Klausel für Spalte '%-.192s'"
+ jpn "列 '%-.192s' に ON UPDATE句は無効です。"
por "Inválida cláusula ON UPDATE para campo '%-.192s'"
spa "Inválido ON UPDATE cláusula para campo '%-.192s'"
ER_UNSUPPORTED_PS
@@ -5070,11 +5137,13 @@ ER_GET_ERRMSG
dan "Modtog fejl %d '%-.100s' fra %s"
eng "Got error %d '%-.100s' from %s"
ger "Fehler %d '%-.100s' von %s"
+ jpn "エラー %d '%-.100s' が %s から返されました。"
nor "Mottok feil %d '%-.100s' fa %s"
norwegian-ny "Mottok feil %d '%-.100s' fra %s"
ER_GET_TEMPORARY_ERRMSG
dan "Modtog temporary fejl %d '%-.100s' fra %s"
eng "Got temporary error %d '%-.100s' from %s"
+ jpn "一時エラー %d '%-.100s' が %s から返されました。"
ger "Temporärer Fehler %d '%-.100s' von %s"
nor "Mottok temporary feil %d '%-.100s' fra %s"
norwegian-ny "Mottok temporary feil %d '%-.100s' fra %s"
@@ -5540,6 +5609,7 @@ ER_TRG_IN_WRONG_SCHEMA
ER_STACK_OVERRUN_NEED_MORE
eng "Thread stack overrun: %ld bytes used of a %ld byte stack, and %ld bytes needed. Use 'mysqld --thread_stack=#' to specify a bigger stack."
ger "Thread-Stack-Überlauf: %ld Bytes eines %ld-Byte-Stacks in Verwendung, und %ld Bytes benötigt. Verwenden Sie 'mysqld --thread_stack=#', um einen größeren Stack anzugeben"
+ jpn "スレッドスタック不足です(使用: %ld ; サイズ: %ld ; 要求: %ld)。より大きい値で 'mysqld --thread_stack=#' の指定をしてください。"
ER_TOO_LONG_BODY 42000 S1009
eng "Routine body for '%-.100s' is too long"
ger "Routinen-Body für '%-.100s' ist zu lang"
@@ -5645,6 +5715,7 @@ ER_WRONG_STRING_LENGTH
ER_NON_INSERTABLE_TABLE
eng "The target table %-.100s of the %s is not insertable-into"
ger "Die Zieltabelle %-.100s von %s ist nicht einfügbar"
+ jpn "対象表 %-.100s は挿入可能ではないので、%s を行えません。"
ER_ADMIN_WRONG_MRG_TABLE
eng "Table '%-.64s' is differently defined or of non-MyISAM type or doesn't exist"
ger "Tabelle '%-.64s' ist unterschiedlich definiert, nicht vom Typ MyISAM oder existiert nicht"
@@ -6041,29 +6112,28 @@ ER_NATIVE_FCT_NAME_COLLISION
# When using this error message, use the ER_DUP_ENTRY error code. See, for
# example, code in handler.cc.
ER_DUP_ENTRY_WITH_KEY_NAME 23000 S1009
- cze "Zvojen-Bý klíč '%-.64s' (číslo klíče '%-.192s')"
+ cze "Zvojený klíč '%-.64s' (číslo klíče '%-.192s')"
dan "Ens værdier '%-.64s' for indeks '%-.192s'"
nla "Dubbele ingang '%-.64s' voor zoeksleutel '%-.192s'"
eng "Duplicate entry '%-.64s' for key '%-.192s'"
- jps "'%-.64s' は key '%-.192s' において重複しています",
est "Kattuv väärtus '%-.64s' võtmele '%-.192s'"
fre "Duplicata du champ '%-.64s' pour la clef '%-.192s'"
ger "Doppelter Eintrag '%-.64s' für Schlüssel '%-.192s'"
greek "Διπλή εγγραφή '%-.64s' για το κλειδί '%-.192s'"
hun "Duplikalt bejegyzes '%-.64s' a '%-.192s' kulcs szerint."
ita "Valore duplicato '%-.64s' per la chiave '%-.192s'"
- jpn "'%-.64s' は key '%-.192s' において重複しています"
+ jpn "'%-.64s' は索引 '%-.192s' で重複しています。"
kor "중복된 입력 값 '%-.64s': key '%-.192s'"
nor "Like verdier '%-.64s' for nøkkel '%-.192s'"
norwegian-ny "Like verdiar '%-.64s' for nykkel '%-.192s'"
- pol "Powtórzone wyst?pienie '%-.64s' dla klucza '%-.192s'"
+ pol "Powtórzone wystąpienie '%-.64s' dla klucza '%-.192s'"
por "Entrada '%-.64s' duplicada para a chave '%-.192s'"
rum "Cimpul '%-.64s' e duplicat pentru cheia '%-.192s'"
rus "Дублирующаяся запись '%-.64s' по ключу '%-.192s'"
serbian "Dupliran unos '%-.64s' za ključ '%-.192s'"
slo "Opakovaný kľúč '%-.64s' (číslo kľúča '%-.192s')"
spa "Entrada duplicada '%-.64s' para la clave '%-.192s'"
- swe "Dubbel nyckel '%-.64s' för nyckel '%-.192s'"
+ swe "Dublett '%-.64s' för nyckel '%-.192s'"
ukr "Дублюючий запис '%-.64s' для ключа '%-.192s'"
ER_BINLOG_PURGE_EMFILE
eng "Too many files opened, please execute the command again"
@@ -6394,7 +6464,7 @@ ER_VALUES_IS_NOT_INT_TYPE_ERROR
swe "Värden i VALUES för partition '%-.64s' måste ha typen INT"
ER_ACCESS_DENIED_NO_PASSWORD_ERROR 28000
- cze "P-Břístup pro uživatele '%-.48s'@'%-.64s'"
+ cze "Přístup pro uživatele '%-.48s'@'%-.64s'"
dan "Adgang nægtet bruger: '%-.48s'@'%-.64s'"
nla "Toegang geweigerd voor gebruiker: '%-.48s'@'%-.64s'"
eng "Access denied for user '%-.48s'@'%-.64s'"
@@ -6404,6 +6474,7 @@ ER_ACCESS_DENIED_NO_PASSWORD_ERROR 28000
greek "Δεν επιτέρεται η πρόσβαση στο χρήστη: '%-.48s'@'%-.64s'"
hun "A(z) '%-.48s'@'%-.64s' felhasznalo szamara tiltott eleres."
ita "Accesso non consentito per l'utente: '%-.48s'@'%-.64s'"
+ jpn "ユーザー '%-.48s'@'%-.64s' のアクセスは拒否されました。"
kor "'%-.48s'@'%-.64s' 사용자는 접근이 거부 되었습니다."
nor "Tilgang nektet for bruker: '%-.48s'@'%-.64s'"
norwegian-ny "Tilgang ikke tillate for brukar: '%-.48s'@'%-.64s'"
@@ -6487,7 +6558,6 @@ ER_PLUGIN_NO_UNINSTALL
ER_PLUGIN_NO_INSTALL
eng "Plugin '%s' is marked as not dynamically installable. You have to stop the server to install it."
-
ER_BINLOG_UNSAFE_WRITE_AUTOINC_SELECT
eng "Statements writing to a table with an auto-increment column after selecting from another table are unsafe because the order in which rows are retrieved determines what (if any) rows will be written. This order cannot be predicted and may differ on master and the slave."
@@ -6500,8 +6570,8 @@ ER_BINLOG_UNSAFE_INSERT_TWO_KEYS
ER_TABLE_IN_FK_CHECK
eng "Table is being used in foreign key check."
-ER_UNUSED_1
- eng "You should never see it"
+ER_UNSUPPORTED_ENGINE
+ eng "Storage engine '%s' does not support system tables. [%s.%s]"
ER_BINLOG_UNSAFE_AUTOINC_NOT_FIRST
eng "INSERT into autoincrement field which is not the first part in the composed primary key is unsafe."
@@ -6510,15 +6580,10 @@ ER_BINLOG_UNSAFE_AUTOINC_NOT_FIRST
# End of 5.5 error messages.
#
-ER_COL_COUNT_DOESNT_MATCH_CORRUPTED_V2
- eng "Column count of %s.%s is wrong. Expected %d, found %d. The table is probably corrupted"
- ger "Spaltenanzahl von %s.%s falsch. %d erwartet, aber %d gefunden. Tabelle ist wahrscheinlich beschädigt"
-
ER_CANNOT_LOAD_FROM_TABLE_V2
eng "Cannot load from %s.%s. The table is probably corrupted"
ger "Kann %s.%s nicht einlesen. Tabelle ist wahrscheinlich beschädigt"
-
ER_MASTER_DELAY_VALUE_OUT_OF_RANGE
eng "The requested value %u for the master delay exceeds the maximum %u"
ER_ONLY_FD_AND_RBR_EVENTS_ALLOWED_IN_BINLOG_STATEMENT
@@ -6573,7 +6638,10 @@ ER_PARTITION_CLAUSE_ON_NONPARTITIONED
ER_ROW_DOES_NOT_MATCH_GIVEN_PARTITION_SET
eng "Found a row not matching the given partition set"
swe "Hittade en rad som inte passar i någon given partition"
-ER_NO_SUCH_PARTITION
+
+# Not used any longer, use ER_UNKNOWN_PARTITION which includes the tablename.
+# was ER_NO_SUCH_PARTITION
+ER_NO_SUCH_PARTITION__UNUSED
cze "partion '%-.64s' neexistuje"
dan "partition '%-.64s' eksisterer ikke"
nla "partition '%-.64s' bestaat niet"
@@ -6583,6 +6651,7 @@ ER_NO_SUCH_PARTITION
ger "Die partition '%-.64s' existiert nicht"
hun "A '%-.64s' partition nem letezik"
ita "La tabella particione '%-.64s' non esiste"
+ jpn "パーティション '%-.64s' は存在しません。"
nor "Partition '%-.64s' doesn't exist"
norwegian-ny "Partition '%-.64s' doesn't exist"
pol "Partition '%-.64s' doesn't exist"
@@ -6633,18 +6702,13 @@ ER_FOREIGN_DUPLICATE_KEY_WITHOUT_CHILD_INFO 23000 S1009
eng "Foreign key constraint for table '%.192s', record '%-.192s' would lead to a duplicate entry in a child table"
ger "Fremdschlüssel-Beschränkung für Tabelle '%.192s', Datensatz '%-.192s' würde zu einem doppelten Eintrag in einer Kind-Tabelle führen"
swe "FOREIGN KEY constraint för tabell '%.192s', posten '%-.192s' kan inte uppdatera en barntabell på grund av UNIQUE-test"
+
ER_SQLTHREAD_WITH_SECURE_SLAVE
eng "Setting authentication options is not possible when only the Slave SQL Thread is being started."
ER_TABLE_HAS_NO_FT
eng "The table does not have FULLTEXT index to support this query"
-ER_INNODB_FT_LIMIT
- eng "InnoDB presently supports one FULLTEXT index per table"
-
-ER_INNODB_NO_FT_TEMP_TABLE
- eng "Cannot create FULLTEXT index on temporary InnoDB table"
-
ER_VARIABLE_NOT_SETTABLE_IN_SF_OR_TRIGGER
eng "The system variable %.200s cannot be set in stored functions or triggers."
@@ -6682,13 +6746,13 @@ ER_BAD_SLAVE_AUTO_POSITION
eng "Parameters MASTER_LOG_FILE, MASTER_LOG_POS, RELAY_LOG_FILE and RELAY_LOG_POS cannot be set when MASTER_AUTO_POSITION is active."
ER_AUTO_POSITION_REQUIRES_GTID_MODE_ON
- eng "CHANGE MASTER TO AUTO_POSITION = 1 can only be executed when GTID_MODE = ON."
+ eng "CHANGE MASTER TO MASTER_AUTO_POSITION = 1 can only be executed when GTID_MODE = ON."
ER_CANT_DO_IMPLICIT_COMMIT_IN_TRX_WHEN_GTID_NEXT_IS_SET
eng "Cannot execute statements with implicit commit inside a transaction when GTID_NEXT != AUTOMATIC or GTID_NEXT_LIST != NULL."
-ER_GTID_MODE_2_OR_3_REQUIRES_DISABLE_GTID_UNSAFE_STATEMENTS_ON
- eng "GTID_MODE = ON or GTID_MODE = UPGRADE_STEP_2 requires DISABLE_GTID_UNSAFE_STATEMENTS = 1."
+ER_GTID_MODE_2_OR_3_REQUIRES_ENFORCE_GTID_CONSISTENCY_ON
+ eng "GTID_MODE = ON or GTID_MODE = UPGRADE_STEP_2 requires ENFORCE_GTID_CONSISTENCY = 1."
ER_GTID_MODE_REQUIRES_BINLOG
eng "GTID_MODE = ON or UPGRADE_STEP_1 or UPGRADE_STEP_2 requires --log-bin and --log-slave-updates."
@@ -6706,13 +6770,13 @@ ER_FOUND_GTID_EVENT_WHEN_GTID_MODE_IS_OFF
eng "Found a Gtid_log_event or Previous_gtids_log_event when GTID_MODE = OFF."
ER_GTID_UNSAFE_NON_TRANSACTIONAL_TABLE
- eng "Updates to non-transactional tables are forbidden when DISABLE_GTID_UNSAFE_STATEMENTS = 1."
+ eng "When ENFORCE_GTID_CONSISTENCY = 1, updates to non-transactional tables can only be done in either autocommitted statements or single-statement transactions, and never in the same statement as updates to transactional tables."
ER_GTID_UNSAFE_CREATE_SELECT
- eng "CREATE TABLE ... SELECT is forbidden when DISABLE_GTID_UNSAFE_STATEMENTS = 1."
+ eng "CREATE TABLE ... SELECT is forbidden when ENFORCE_GTID_CONSISTENCY = 1."
ER_GTID_UNSAFE_CREATE_DROP_TEMPORARY_TABLE_IN_TRANSACTION
- eng "When DISABLE_GTID_UNSAFE_STATEMENTS = 1, the statements CREATE TEMPORARY TABLE and DROP TEMPORARY TABLE can be executed in a non-transactional context only, and require that AUTOCOMMIT = 1."
+ eng "When ENFORCE_GTID_CONSISTENCY = 1, the statements CREATE TEMPORARY TABLE and DROP TEMPORARY TABLE can be executed in a non-transactional context only, and require that AUTOCOMMIT = 1."
ER_GTID_MODE_CAN_ONLY_CHANGE_ONE_STEP_AT_A_TIME
eng "The value of GTID_MODE can only change one step at a time: OFF <-> UPGRADE_STEP_1 <-> UPGRADE_STEP_2 <-> ON. Also note that this value must be stepped up or down simultaneously on all servers; see the Manual for instructions."
@@ -6730,6 +6794,241 @@ ER_UNKNOWN_EXPLAIN_FORMAT
ER_CANT_EXECUTE_IN_READ_ONLY_TRANSACTION 25006
eng "Cannot execute statement in a READ ONLY transaction."
+ER_TOO_LONG_TABLE_PARTITION_COMMENT
+ eng "Comment for table partition '%-.64s' is too long (max = %lu)"
+
+ER_SLAVE_CONFIGURATION
+ eng "Slave is not configured or failed to initialize properly. You must at least set --server-id to enable either a master or a slave. Additional error messages can be found in the MySQL error log."
+
+ER_INNODB_FT_LIMIT
+ eng "InnoDB presently supports one FULLTEXT index creation at a time"
+
+ER_INNODB_NO_FT_TEMP_TABLE
+ eng "Cannot create FULLTEXT index on temporary InnoDB table"
+
+ER_INNODB_FT_WRONG_DOCID_COLUMN
+ eng "Column '%-.192s' is of wrong type for an InnoDB FULLTEXT index"
+
+ER_INNODB_FT_WRONG_DOCID_INDEX
+ eng "Index '%-.192s' is of wrong type for an InnoDB FULLTEXT index"
+
+ER_INNODB_ONLINE_LOG_TOO_BIG
+ eng "Creating index '%-.192s' required more than 'innodb_online_alter_log_max_size' bytes of modification log. Please try again."
+
+ER_UNKNOWN_ALTER_ALGORITHM
+ eng "Unknown ALGORITHM '%s'"
+
+ER_UNKNOWN_ALTER_LOCK
+ eng "Unknown LOCK type '%s'"
+
+ER_MTS_CHANGE_MASTER_CANT_RUN_WITH_GAPS
+ eng "CHANGE MASTER cannot be executed when the slave was stopped with an error or killed in MTS mode. Consider using RESET SLAVE or START SLAVE UNTIL."
+
+ER_MTS_RECOVERY_FAILURE
+ eng "Cannot recover after SLAVE errored out in parallel execution mode. Additional error messages can be found in the MySQL error log."
+
+ER_MTS_RESET_WORKERS
+ eng "Cannot clean up worker info tables. Additional error messages can be found in the MySQL error log."
+
+ER_COL_COUNT_DOESNT_MATCH_CORRUPTED_V2
+ eng "Column count of %s.%s is wrong. Expected %d, found %d. The table is probably corrupted"
+ ger "Spaltenanzahl von %s.%s falsch. %d erwartet, aber %d gefunden. Tabelle ist wahrscheinlich beschädigt"
+
+ER_SLAVE_SILENT_RETRY_TRANSACTION
+ eng "Slave must silently retry current transaction"
+
+ER_DISCARD_FK_CHECKS_RUNNING
+ eng "There is a foreign key check running on table '%-.192s'. Cannot discard the table."
+
+ER_TABLE_SCHEMA_MISMATCH
+ eng "Schema mismatch (%s)"
+
+ER_TABLE_IN_SYSTEM_TABLESPACE
+ eng "Table '%-.192s' in system tablespace"
+
+ER_IO_READ_ERROR
+ eng "IO Read error: (%lu, %s) %s"
+
+ER_IO_WRITE_ERROR
+ eng "IO Write error: (%lu, %s) %s"
+
+ER_TABLESPACE_MISSING
+ eng "Tablespace is missing for table '%-.192s'"
+
+ER_TABLESPACE_EXISTS
+ eng "Tablespace for table '%-.192s' exists. Please DISCARD the tablespace before IMPORT."
+
+ER_TABLESPACE_DISCARDED
+ eng "Tablespace has been discarded for table '%-.192s'"
+
+ER_INTERNAL_ERROR
+ eng "Internal error: '%-.192s'"
+
+ER_INNODB_IMPORT_ERROR
+ eng "ALTER TABLE '%-.192s' IMPORT TABLESPACE failed with error %lu : '%s'"
+
+ER_INNODB_INDEX_CORRUPT
+ eng "Index corrupt: %s"
+
+ER_INVALID_YEAR_COLUMN_LENGTH
+ eng "YEAR(%lu) column type is deprecated. Creating YEAR(4) column instead."
+ rus "Тип YEAR(%lu) более не поддерживается, вместо него будет создана колонка с типом YEAR(4)."
+
+ER_NOT_VALID_PASSWORD
+ eng "Your password does not satisfy the current policy requirements"
+
+ER_MUST_CHANGE_PASSWORD
+ eng "You must SET PASSWORD before executing this statement"
+ bgn "Трябва първо да си смените паролата със SET PASSWORD за да можете да изпълните тази команда"
+
+ER_FK_NO_INDEX_CHILD
+ eng "Failed to add the foreign key constaint. Missing index for constraint '%s' in the foreign table '%s'"
+
+ER_FK_NO_INDEX_PARENT
+ eng "Failed to add the foreign key constaint. Missing index for constraint '%s' in the referenced table '%s'"
+
+ER_FK_FAIL_ADD_SYSTEM
+ eng "Failed to add the foreign key constraint '%s' to system tables"
+
+ER_FK_CANNOT_OPEN_PARENT
+ eng "Failed to open the referenced table '%s'"
+
+ER_FK_INCORRECT_OPTION
+ eng "Failed to add the foreign key constraint on table '%s'. Incorrect options in FOREIGN KEY constraint '%s'"
+
+ER_FK_DUP_NAME
+ eng "Duplicate foreign key constraint name '%s'"
+
+ER_PASSWORD_FORMAT
+ eng "The password hash doesn't have the expected format. Check if the correct password algorithm is being used with the PASSWORD() function."
+
+ER_FK_COLUMN_CANNOT_DROP
+ eng "Cannot drop column '%-.192s': needed in a foreign key constraint '%-.192s'"
+ ger "Kann Spalte '%-.192s' nicht löschen: wird für eine Fremdschlüsselbeschränkung '%-.192s' benötigt"
+
+ER_FK_COLUMN_CANNOT_DROP_CHILD
+ eng "Cannot drop column '%-.192s': needed in a foreign key constraint '%-.192s' of table '%-.192s'"
+ ger "Kann Spalte '%-.192s' nicht löschen: wird für eine Fremdschlüsselbeschränkung '%-.192s' der Tabelle '%-.192s' benötigt"
+
+ER_FK_COLUMN_NOT_NULL
+ eng "Column '%-.192s' cannot be NOT NULL: needed in a foreign key constraint '%-.192s' SET NULL"
+ ger "Spalte '%-.192s' kann nicht NOT NULL sein: wird für eine Fremdschlüsselbeschränkung '%-.192s' SET NULL benötigt"
+
+ER_DUP_INDEX
+ eng "Duplicate index '%-.64s' defined on the table '%-.64s.%-.64s'. This is deprecated and will be disallowed in a future release."
+
+ER_FK_COLUMN_CANNOT_CHANGE
+ eng "Cannot change column '%-.192s': used in a foreign key constraint '%-.192s'"
+
+ER_FK_COLUMN_CANNOT_CHANGE_CHILD
+ eng "Cannot change column '%-.192s': used in a foreign key constraint '%-.192s' of table '%-.192s'"
+
+ER_FK_CANNOT_DELETE_PARENT
+ eng "Cannot delete rows from table which is parent in a foreign key constraint '%-.192s' of table '%-.192s'"
+
+ER_MALFORMED_PACKET
+ eng "Malformed communication packet."
+
+ER_READ_ONLY_MODE
+ eng "Running in read-only mode"
+
+ER_GTID_NEXT_TYPE_UNDEFINED_GROUP
+ eng "When GTID_NEXT is set to a GTID, you must explicitly set it again after a COMMIT or ROLLBACK. If you see this error message in the slave SQL thread, it means that a table in the current transaction is transactional on the master and non-transactional on the slave. In a client connection, it means that you executed SET GTID_NEXT before a transaction and forgot to set GTID_NEXT to a different identifier or to 'AUTOMATIC' after COMMIT or ROLLBACK. Current GTID_NEXT is '%s'."
+
+ER_VARIABLE_NOT_SETTABLE_IN_SP
+ eng "The system variable %.200s cannot be set in stored procedures."
+
+ER_CANT_SET_GTID_PURGED_WHEN_GTID_MODE_IS_OFF
+ eng "GTID_PURGED can only be set when GTID_MODE = ON."
+
+ER_CANT_SET_GTID_PURGED_WHEN_GTID_EXECUTED_IS_NOT_EMPTY
+ eng "GTID_PURGED can only be set when GTID_EXECUTED is empty."
+
+ER_CANT_SET_GTID_PURGED_WHEN_OWNED_GTIDS_IS_NOT_EMPTY
+ eng "GTID_PURGED can only be set when there are no ongoing transactions (not even in other clients)."
+
+ER_GTID_PURGED_WAS_CHANGED
+ eng "GTID_PURGED was changed from '%s' to '%s'."
+
+ER_GTID_EXECUTED_WAS_CHANGED
+ eng "GTID_EXECUTED was changed from '%s' to '%s'."
+
+ER_BINLOG_STMT_MODE_AND_NO_REPL_TABLES
+ eng "Cannot execute statement: impossible to write to binary log since BINLOG_FORMAT = STATEMENT, and both replicated and non replicated tables are written to."
+
+ER_ALTER_OPERATION_NOT_SUPPORTED 0A000
+ eng "%s is not supported for this operation. Try %s."
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON 0A000
+ eng "%s is not supported. Reason: %s. Try %s."
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COPY
+ eng "COPY algorithm requires a lock"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_PARTITION
+ eng "Partition specific operations do not yet support LOCK/ALGORITHM"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_RENAME
+ eng "Columns participating in a foreign key are renamed"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COLUMN_TYPE
+ eng "Cannot change column type INPLACE"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_CHECK
+ eng "Adding foreign keys needs foreign_key_checks=OFF"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_IGNORE
+ eng "Creating unique indexes with IGNORE requires COPY algorithm to remove duplicate rows"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOPK
+ eng "Dropping a primary key is not allowed without also adding a new primary key"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_AUTOINC
+ eng "Adding an auto-increment column requires a lock"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_HIDDEN_FTS
+ eng "Cannot replace hidden FTS_DOC_ID with a user-visible one"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS
+ eng "Cannot drop or rename FTS_DOC_ID"
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS
+ eng "Fulltext index creation requires a lock"
+
+ER_SQL_SLAVE_SKIP_COUNTER_NOT_SETTABLE_IN_GTID_MODE
+ eng "sql_slave_skip_counter can not be set when the server is running with GTID_MODE = ON. Instead, for each transaction that you want to skip, generate an empty transaction with the same GTID as the transaction"
+
+ER_DUP_UNKNOWN_IN_INDEX 23000
+ cze "Zdvojený klíč (číslo klíče '%-.192s')"
+ dan "Flere ens nøgler for indeks '%-.192s'"
+ nla "Dubbele ingang voor zoeksleutel '%-.192s'"
+ eng "Duplicate entry for key '%-.192s'"
+ est "Kattuv väärtus võtmele '%-.192s'"
+ fre "Duplicata du champ pour la clef '%-.192s'"
+ ger "Doppelter Eintrag für Schlüssel '%-.192s'"
+ greek "Διπλή εγγραφή για το κλειδί '%-.192s'"
+ hun "Duplikalt bejegyzes a '%-.192s' kulcs szerint."
+ ita "Valore duplicato per la chiave '%-.192s'"
+ jpn "は索引 '%-.192s' で重複しています。"
+ kor "중복된 입력 값: key '%-.192s'"
+ nor "Like verdier for nøkkel '%-.192s'"
+ norwegian-ny "Like verdiar for nykkel '%-.192s'"
+ pol "Powtórzone wystąpienie dla klucza '%-.192s'"
+ por "Entrada duplicada para a chave '%-.192s'"
+ rum "Cimpul e duplicat pentru cheia '%-.192s'"
+ rus "Дублирующаяся запись по ключу '%-.192s'"
+ serbian "Dupliran unos za ključ '%-.192s'"
+ slo "Opakovaný kľúč (číslo kľúča '%-.192s')"
+ spa "Entrada duplicada para la clave '%-.192s'"
+ swe "Dublett för nyckel '%-.192s'"
+ ukr "Дублюючий запис для ключа '%-.192s'"
+
+ER_IDENT_CAUSES_TOO_LONG_PATH
+ eng "Long database name and identifier for object resulted in path length exceeding %d characters. Path: '%s'."
+
+ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL
+ eng "cannot silently convert NULL values, as required in this SQL_MODE"
+
#
# MariaDB error messages section starts here
#
@@ -6799,8 +7098,6 @@ ER_VIEW_ORDERBY_IGNORED
eng "View '%-.192s'.'%-.192s' ORDER BY clause ignored because there is other ORDER BY clause already."
ER_CONNECTION_KILLED 70100
eng "Connection was killed"
-ER_INTERNAL_ERROR
- eng "Internal error: '%-.192s'"
ER_INSIDE_TRANSACTION_PREVENTS_SWITCH_SKIP_REPLICATION
eng "Cannot modify @@session.skip_replication inside a transaction"
ER_STORED_FUNCTION_PREVENTS_SWITCH_SKIP_REPLICATION
@@ -6821,4 +7118,4 @@ ER_CANT_START_STOP_SLAVE
ER_SLAVE_STARTED
eng "SLAVE '%.*s' started"
ER_SLAVE_STOPPED
- eng "SLAVE '%.*s' stopped" \ No newline at end of file
+ eng "SLAVE '%.*s' stopped"
diff --git a/sql/signal_handler.cc b/sql/signal_handler.cc
index edc33c4d63b..9437db6c318 100644
--- a/sql/signal_handler.cc
+++ b/sql/signal_handler.cc
@@ -190,7 +190,7 @@ extern "C" sig_handler handle_fatal_signal(int sig)
"Some pointers may be invalid and cause the dump to abort.\n");
my_safe_printf_stderr("Query (%p): ", thd->query());
- my_safe_print_str(thd->query(), min(65536U, thd->query_length()));
+ my_safe_print_str(thd->query(), MY_MIN(65536U, thd->query_length()));
my_safe_printf_stderr("\nConnection ID (thread ID): %lu\n",
(ulong) thd->thread_id);
my_safe_printf_stderr("Status: %s\n\n", kreason);
diff --git a/sql/slave.cc b/sql/slave.cc
index 78fa7998012..a79514a0d8b 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -377,7 +377,7 @@ int init_recovery(Master_info* mi, const char** errmsg)
Relay_log_info *rli= &mi->rli;
if (rli->group_master_log_name[0])
{
- mi->master_log_pos= max(BIN_LOG_HEADER_SIZE,
+ mi->master_log_pos= MY_MAX(BIN_LOG_HEADER_SIZE,
rli->group_master_log_pos);
strmake(mi->master_log_name, rli->group_master_log_name,
sizeof(mi->master_log_name)-1);
@@ -2259,13 +2259,13 @@ static bool send_show_master_info_data(THD *thd, Master_info *mi, bool full)
slave is 2. At SHOW SLAVE STATUS time, assume that the difference
between timestamp of slave and rli->last_master_timestamp is 0
(i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
- This confuses users, so we don't go below 0: hence the max().
+ This confuses users, so we don't go below 0: hence the MY_MAX().
last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
special marker to say "consider we have caught up".
*/
protocol->store((longlong)(mi->rli.last_master_timestamp ?
- max(0, time_diff) : 0));
+ MY_MAX(0, time_diff) : 0));
}
else
{
@@ -3023,7 +3023,7 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
exec_res= 0;
rli->cleanup_context(thd, 1);
/* chance for concurrent connection to get more locks */
- slave_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
+ slave_sleep(thd, MY_MIN(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
sql_slave_killed, rli);
mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS
rli->trans_retries++;
@@ -5096,7 +5096,7 @@ static IO_CACHE *reopen_relay_log(Relay_log_info *rli, const char **errmsg)
relay_log_pos Current log pos
pending Number of bytes already processed from the event
*/
- rli->event_relay_log_pos= max(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE);
+ rli->event_relay_log_pos= MY_MAX(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE);
my_b_seek(cur_log,rli->event_relay_log_pos);
DBUG_RETURN(cur_log);
}
diff --git a/sql/sp_head.cc b/sql/sp_head.cc
index 7cd2e789351..e2901125f14 100644
--- a/sql/sp_head.cc
+++ b/sql/sp_head.cc
@@ -2663,7 +2663,7 @@ sp_head::show_create_routine(THD *thd, int type)
Item_empty_string *stmt_fld=
new Item_empty_string(col3_caption,
- max(m_defstr.length, 1024));
+ MY_MAX(m_defstr.length, 1024));
stmt_fld->maybe_null= TRUE;
@@ -2863,7 +2863,7 @@ sp_head::show_routine_code(THD *thd)
field_list.push_back(new Item_uint("Pos", 9));
// 1024 is for not to confuse old clients
field_list.push_back(new Item_empty_string("Instruction",
- max(buffer.length(), 1024)));
+ MY_MAX(buffer.length(), 1024)));
if (protocol->send_result_set_metadata(&field_list, Protocol::SEND_NUM_ROWS |
Protocol::SEND_EOF))
DBUG_RETURN(1);
diff --git a/sql/spatial.h b/sql/spatial.h
index 1277e7bc01c..1db9b5767e5 100644
--- a/sql/spatial.h
+++ b/sql/spatial.h
@@ -195,8 +195,8 @@ struct MBR
if (d != mbr->dimension() || d <= 0 || contains(mbr) || within(mbr))
return 0;
- MBR intersection(max(xmin, mbr->xmin), max(ymin, mbr->ymin),
- min(xmax, mbr->xmax), min(ymax, mbr->ymax));
+ MBR intersection(MY_MAX(xmin, mbr->xmin), MY_MAX(ymin, mbr->ymin),
+ MY_MIN(xmax, mbr->xmax), MY_MIN(ymax, mbr->ymax));
return (d == intersection.dimension());
}
diff --git a/sql/sql_acl.cc b/sql/sql_acl.cc
index cb7e35fae09..9f085133059 100644
--- a/sql/sql_acl.cc
+++ b/sql/sql_acl.cc
@@ -1328,7 +1328,7 @@ static ulong get_sort(uint count,...)
chars= 128; // Marker that chars existed
}
}
- sort= (sort << 8) + (wild_pos ? min(wild_pos, 127U) : chars);
+ sort= (sort << 8) + (wild_pos ? MY_MIN(wild_pos, 127U) : chars);
}
va_end(args);
return sort;
diff --git a/sql/sql_alter.h b/sql/sql_alter.h
index 6660748f666..f0c0a873a5c 100644
--- a/sql/sql_alter.h
+++ b/sql/sql_alter.h
@@ -16,51 +16,412 @@
#ifndef SQL_ALTER_TABLE_H
#define SQL_ALTER_TABLE_H
+class Alter_drop;
+class Alter_column;
+class Key;
+
+/**
+ Data describing the table being created by CREATE TABLE or
+ altered by ALTER TABLE.
+*/
+
+class Alter_info
+{
+public:
+ /*
+ These flags are set by the parser and describes the type of
+ operation(s) specified by the ALTER TABLE statement.
+
+ They do *not* describe the type operation(s) to be executed
+ by the storage engine. For example, we don't yet know the
+ type of index to be added/dropped.
+ */
+
+ // Set for ADD [COLUMN]
+ static const uint ALTER_ADD_COLUMN = 1L << 0;
+
+ // Set for DROP [COLUMN]
+ static const uint ALTER_DROP_COLUMN = 1L << 1;
+
+ // Set for CHANGE [COLUMN] | MODIFY [CHANGE]
+ // Set by mysql_recreate_table()
+ static const uint ALTER_CHANGE_COLUMN = 1L << 2;
+
+ // Set for ADD INDEX | ADD KEY | ADD PRIMARY KEY | ADD UNIQUE KEY |
+ // ADD UNIQUE INDEX | ALTER ADD [COLUMN]
+ static const uint ALTER_ADD_INDEX = 1L << 3;
+
+ // Set for DROP PRIMARY KEY | DROP FOREIGN KEY | DROP KEY | DROP INDEX
+ static const uint ALTER_DROP_INDEX = 1L << 4;
+
+ // Set for RENAME [TO]
+ static const uint ALTER_RENAME = 1L << 5;
+
+ // Set for ORDER BY
+ static const uint ALTER_ORDER = 1L << 6;
+
+ // Set for table_options
+ static const uint ALTER_OPTIONS = 1L << 7;
+
+ // Set for ALTER [COLUMN] ... SET DEFAULT ... | DROP DEFAULT
+ static const uint ALTER_CHANGE_COLUMN_DEFAULT = 1L << 8;
+
+ // Set for DISABLE KEYS | ENABLE KEYS
+ static const uint ALTER_KEYS_ONOFF = 1L << 9;
+
+ // Set for CONVERT TO CHARACTER SET
+ static const uint ALTER_CONVERT = 1L << 10;
+
+ // Set for FORCE
+ // Set by mysql_recreate_table()
+ static const uint ALTER_RECREATE = 1L << 11;
+
+ // Set for ADD PARTITION
+ static const uint ALTER_ADD_PARTITION = 1L << 12;
+
+ // Set for DROP PARTITION
+ static const uint ALTER_DROP_PARTITION = 1L << 13;
+
+ // Set for COALESCE PARTITION
+ static const uint ALTER_COALESCE_PARTITION = 1L << 14;
+
+ // Set for REORGANIZE PARTITION ... INTO
+ static const uint ALTER_REORGANIZE_PARTITION = 1L << 15;
+
+ // Set for partition_options
+ static const uint ALTER_PARTITION = 1L << 16;
+
+ // Set for LOAD INDEX INTO CACHE ... PARTITION
+ // Set for CACHE INDEX ... PARTITION
+ static const uint ALTER_ADMIN_PARTITION = 1L << 17;
+
+ // Set for REORGANIZE PARTITION
+ static const uint ALTER_TABLE_REORG = 1L << 18;
+
+ // Set for REBUILD PARTITION
+ static const uint ALTER_REBUILD_PARTITION = 1L << 19;
+
+ // Set for partitioning operations specifying ALL keyword
+ static const uint ALTER_ALL_PARTITION = 1L << 20;
+
+ // Set for REMOVE PARTITIONING
+ static const uint ALTER_REMOVE_PARTITIONING = 1L << 21;
+
+ // Set for ADD FOREIGN KEY
+ static const uint ADD_FOREIGN_KEY = 1L << 22;
+
+ // Set for DROP FOREIGN KEY
+ static const uint DROP_FOREIGN_KEY = 1L << 23;
+
+ // Set for EXCHANGE PARITION
+ static const uint ALTER_EXCHANGE_PARTITION = 1L << 24;
+
+ // Set by Sql_cmd_alter_table_truncate_partition::execute()
+ static const uint ALTER_TRUNCATE_PARTITION = 1L << 25;
+
+ // Set for ADD [COLUMN] FIRST | AFTER
+ static const uint ALTER_COLUMN_ORDER = 1L << 26;
+
+
+ enum enum_enable_or_disable { LEAVE_AS_IS, ENABLE, DISABLE };
+
+ /**
+ The different values of the ALGORITHM clause.
+ Describes which algorithm to use when altering the table.
+ */
+ enum enum_alter_table_algorithm
+ {
+ // In-place if supported, copy otherwise.
+ ALTER_TABLE_ALGORITHM_DEFAULT,
+
+ // In-place if supported, error otherwise.
+ ALTER_TABLE_ALGORITHM_INPLACE,
+
+ // Copy if supported, error otherwise.
+ ALTER_TABLE_ALGORITHM_COPY
+ };
+
+
+ /**
+ The different values of the LOCK clause.
+ Describes the level of concurrency during ALTER TABLE.
+ */
+ enum enum_alter_table_lock
+ {
+ // Maximum supported level of concurency for the given operation.
+ ALTER_TABLE_LOCK_DEFAULT,
+
+ // Allow concurrent reads & writes. If not supported, give erorr.
+ ALTER_TABLE_LOCK_NONE,
+
+ // Allow concurrent reads only. If not supported, give error.
+ ALTER_TABLE_LOCK_SHARED,
+
+ // Block reads and writes.
+ ALTER_TABLE_LOCK_EXCLUSIVE
+ };
+
+
+ // Columns and keys to be dropped.
+ List<Alter_drop> drop_list;
+ // Columns for ALTER_COLUMN_CHANGE_DEFAULT.
+ List<Alter_column> alter_list;
+ // List of keys, used by both CREATE and ALTER TABLE.
+ List<Key> key_list;
+ // List of columns, used by both CREATE and ALTER TABLE.
+ List<Create_field> create_list;
+ // Type of ALTER TABLE operation.
+ uint flags;
+ // Enable or disable keys.
+ enum_enable_or_disable keys_onoff;
+ // List of partitions.
+ List<char> partition_names;
+ // Number of partitions.
+ uint num_parts;
+ // Type of ALTER TABLE algorithm.
+ enum_alter_table_algorithm requested_algorithm;
+ // Type of ALTER TABLE lock.
+ enum_alter_table_lock requested_lock;
+
+
+ Alter_info() :
+ flags(0),
+ keys_onoff(LEAVE_AS_IS),
+ num_parts(0),
+ requested_algorithm(ALTER_TABLE_ALGORITHM_DEFAULT),
+ requested_lock(ALTER_TABLE_LOCK_DEFAULT)
+ {}
+
+ void reset()
+ {
+ drop_list.empty();
+ alter_list.empty();
+ key_list.empty();
+ create_list.empty();
+ flags= 0;
+ keys_onoff= LEAVE_AS_IS;
+ num_parts= 0;
+ partition_names.empty();
+ requested_algorithm= ALTER_TABLE_ALGORITHM_DEFAULT;
+ requested_lock= ALTER_TABLE_LOCK_DEFAULT;
+ }
+
+
+ /**
+ Construct a copy of this object to be used for mysql_alter_table
+ and mysql_create_table.
+
+ Historically, these two functions modify their Alter_info
+ arguments. This behaviour breaks re-execution of prepared
+ statements and stored procedures and is compensated by always
+ supplying a copy of Alter_info to these functions.
+
+ @param rhs Alter_info to make copy of
+ @param mem_root Mem_root for new Alter_info
+
+ @note You need to use check the error in THD for out
+ of memory condition after calling this function.
+ */
+ Alter_info(const Alter_info &rhs, MEM_ROOT *mem_root);
+
+
+ /**
+ Parses the given string and sets requested_algorithm
+ if the string value matches a supported value.
+ Supported values: INPLACE, COPY, DEFAULT
+
+ @param str String containing the supplied value
+ @retval false Supported value found, state updated
+ @retval true Not supported value, no changes made
+ */
+ bool set_requested_algorithm(const LEX_STRING *str);
+
+
+ /**
+ Parses the given string and sets requested_lock
+ if the string value matches a supported value.
+ Supported values: NONE, SHARED, EXCLUSIVE, DEFAULT
+
+ @param str String containing the supplied value
+ @retval false Supported value found, state updated
+ @retval true Not supported value, no changes made
+ */
+
+ bool set_requested_lock(const LEX_STRING *str);
+
+private:
+ Alter_info &operator=(const Alter_info &rhs); // not implemented
+ Alter_info(const Alter_info &rhs); // not implemented
+};
+
+
+/** Runtime context for ALTER TABLE. */
+class Alter_table_ctx
+{
+public:
+ Alter_table_ctx();
+
+ Alter_table_ctx(THD *thd, TABLE_LIST *table_list, uint tables_opened_arg,
+ char *new_db_arg, char *new_name_arg);
+
+ /**
+ @return true if the table is moved to another database, false otherwise.
+ */
+ bool is_database_changed() const
+ { return (new_db != db); };
+
+ /**
+ @return true if the table is renamed, false otherwise.
+ */
+ bool is_table_renamed() const
+ { return (is_database_changed() || new_name != table_name); };
+
+ /**
+ @return filename (including .frm) for the new table.
+ */
+ const char *get_new_filename() const
+ {
+ DBUG_ASSERT(!tmp_table);
+ return new_filename;
+ }
+
+ /**
+ @return path to the original table.
+ */
+ const char *get_path() const
+ {
+ DBUG_ASSERT(!tmp_table);
+ return path;
+ }
+
+ /**
+ @return path to the new table.
+ */
+ const char *get_new_path() const
+ {
+ DBUG_ASSERT(!tmp_table);
+ return new_path;
+ }
+
+ /**
+ @return path to the temporary table created during ALTER TABLE.
+ */
+ const char *get_tmp_path() const
+ { return tmp_path; }
+
+ /**
+ Mark ALTER TABLE as needing to produce foreign key error if
+ it deletes a row from the table being changed.
+ */
+ void set_fk_error_if_delete_row(FOREIGN_KEY_INFO *fk)
+ {
+ fk_error_if_delete_row= true;
+ fk_error_id= fk->foreign_id->str;
+ fk_error_table= fk->foreign_table->str;
+ }
+
+public:
+ Create_field *datetime_field;
+ bool error_if_not_empty;
+ uint tables_opened;
+ char *db;
+ char *table_name;
+ char *alias;
+ char *new_db;
+ char *new_name;
+ char *new_alias;
+ char tmp_name[80];
+ /**
+ Indicates that if a row is deleted during copying of data from old version
+ of table to the new version ER_FK_CANNOT_DELETE_PARENT error should be
+ emitted.
+ */
+ bool fk_error_if_delete_row;
+ /** Name of foreign key for the above error. */
+ const char *fk_error_id;
+ /** Name of table for the above error. */
+ const char *fk_error_table;
+
+private:
+ char new_filename[FN_REFLEN + 1];
+ char new_alias_buff[FN_REFLEN + 1];
+ char path[FN_REFLEN + 1];
+ char new_path[FN_REFLEN + 1];
+ char tmp_path[FN_REFLEN + 1];
+
+#ifndef DBUG_OFF
+ /** Indicates that we are altering temporary table. Used only in asserts. */
+ bool tmp_table;
+#endif
+
+ Alter_table_ctx &operator=(const Alter_table_ctx &rhs); // not implemented
+ Alter_table_ctx(const Alter_table_ctx &rhs); // not implemented
+};
+
+
/**
- Alter_table_common represents the common properties of the ALTER TABLE
+ Sql_cmd_common_alter_table represents the common properties of the ALTER TABLE
statements.
@todo move Alter_info and other ALTER generic structures from Lex here.
*/
-class Alter_table_common : public Sql_statement
+class Sql_cmd_common_alter_table : public Sql_cmd
{
protected:
/**
Constructor.
- @param lex the LEX structure for this statement.
*/
- Alter_table_common(LEX *lex)
- : Sql_statement(lex)
+ Sql_cmd_common_alter_table()
{}
- virtual ~Alter_table_common()
+ virtual ~Sql_cmd_common_alter_table()
{}
+ virtual enum_sql_command sql_command_code() const
+ {
+ return SQLCOM_ALTER_TABLE;
+ }
};
/**
- Alter_table_statement represents the generic ALTER TABLE statement.
+ Sql_cmd_alter_table represents the generic ALTER TABLE statement.
@todo move Alter_info and other ALTER specific structures from Lex here.
*/
-class Alter_table_statement : public Alter_table_common
+class Sql_cmd_alter_table : public Sql_cmd_common_alter_table
{
public:
/**
Constructor, used to represent a ALTER TABLE statement.
- @param lex the LEX structure for this statement.
*/
- Alter_table_statement(LEX *lex)
- : Alter_table_common(lex)
+ Sql_cmd_alter_table()
{}
- ~Alter_table_statement()
+ ~Sql_cmd_alter_table()
{}
- /**
- Execute a ALTER TABLE statement at runtime.
- @param thd the current thread.
- @return false on success.
- */
bool execute(THD *thd);
};
+
+/**
+ Sql_cmd_alter_table_tablespace represents ALTER TABLE
+ IMPORT/DISCARD TABLESPACE statements.
+*/
+class Sql_cmd_discard_import_tablespace : public Sql_cmd_common_alter_table
+{
+public:
+ enum enum_tablespace_op_type
+ {
+ DISCARD_TABLESPACE, IMPORT_TABLESPACE
+ };
+
+ Sql_cmd_discard_import_tablespace(enum_tablespace_op_type tablespace_op_arg)
+ : m_tablespace_op(tablespace_op_arg)
+ {}
+
+ bool execute(THD *thd);
+
+private:
+ const enum_tablespace_op_type m_tablespace_op;
+};
+
#endif
diff --git a/sql/sql_analyse.cc b/sql/sql_analyse.cc
index 31e13882515..6a590c91e5e 100644
--- a/sql/sql_analyse.cc
+++ b/sql/sql_analyse.cc
@@ -282,16 +282,16 @@ bool get_ev_num_info(EV_NUM_INFO *ev_info, NUM_INFO *info, const char *num)
{
if (((longlong) info->ullval) < 0)
return 0; // Impossible to store as a negative number
- ev_info->llval = -(longlong) max((ulonglong) -ev_info->llval,
+ ev_info->llval = -(longlong) MY_MAX((ulonglong) -ev_info->llval,
info->ullval);
- ev_info->min_dval = (double) -max(-ev_info->min_dval, info->dval);
+ ev_info->min_dval = (double) -MY_MAX(-ev_info->min_dval, info->dval);
}
else // ulonglong is as big as bigint in MySQL
{
if ((check_ulonglong(num, info->integers) == DECIMAL_NUM))
return 0;
- ev_info->ullval = (ulonglong) max(ev_info->ullval, info->ullval);
- ev_info->max_dval = (double) max(ev_info->max_dval, info->dval);
+ ev_info->ullval = (ulonglong) MY_MAX(ev_info->ullval, info->ullval);
+ ev_info->max_dval = (double) MY_MAX(ev_info->max_dval, info->dval);
}
return 1;
} // get_ev_num_info
@@ -1040,7 +1040,7 @@ String *field_decimal::avg(String *s, ha_rows rows)
my_decimal_div(E_DEC_FATAL_ERROR, &avg_val, sum+cur_sum, &num, prec_increment);
/* TODO remove this after decimal_div returns proper frac */
my_decimal_round(E_DEC_FATAL_ERROR, &avg_val,
- min(sum[cur_sum].frac + prec_increment, DECIMAL_MAX_SCALE),
+ MY_MIN(sum[cur_sum].frac + prec_increment, DECIMAL_MAX_SCALE),
FALSE,&rounded_avg);
my_decimal2string(E_DEC_FATAL_ERROR, &rounded_avg, 0, 0, '0', s);
return s;
@@ -1065,7 +1065,7 @@ String *field_decimal::std(String *s, ha_rows rows)
my_decimal_div(E_DEC_FATAL_ERROR, &tmp, &sum2, &num, prec_increment);
my_decimal2double(E_DEC_FATAL_ERROR, &tmp, &std_sqr);
s->set_real(((double) std_sqr <= 0.0 ? 0.0 : sqrt(std_sqr)),
- min(item->decimals + prec_increment, NOT_FIXED_DEC), my_thd_charset);
+ MY_MIN(item->decimals + prec_increment, NOT_FIXED_DEC), my_thd_charset);
return s;
}
@@ -1182,7 +1182,7 @@ bool analyse::change_columns(List<Item> &field_list)
func_items[8] = new Item_proc_string("Std", 255);
func_items[8]->maybe_null = 1;
func_items[9] = new Item_proc_string("Optimal_fieldtype",
- max(64, output_str_length));
+ MY_MAX(64, output_str_length));
for (uint i = 0; i < array_elements(func_items); i++)
field_list.push_back(func_items[i]);
diff --git a/sql/sql_cache.cc b/sql/sql_cache.cc
index f5e1deab546..dca0da651f7 100644
--- a/sql/sql_cache.cc
+++ b/sql/sql_cache.cc
@@ -1221,7 +1221,7 @@ void Query_cache::end_of_result(THD *thd)
}
last_result_block= header->result()->prev;
allign_size= ALIGN_SIZE(last_result_block->used);
- len= max(query_cache.min_allocation_unit, allign_size);
+ len= MY_MAX(query_cache.min_allocation_unit, allign_size);
if (last_result_block->length >= query_cache.min_allocation_unit + len)
query_cache.split_block(last_result_block,len);
@@ -2875,7 +2875,7 @@ Query_cache::write_block_data(ulong data_len, uchar* data,
DBUG_ENTER("Query_cache::write_block_data");
DBUG_PRINT("qcache", ("data: %ld, header: %ld, all header: %ld",
data_len, header_len, all_headers_len));
- Query_cache_block *block= allocate_block(max(align_len,
+ Query_cache_block *block= allocate_block(MY_MAX(align_len,
min_allocation_unit),1, 0);
if (block != 0)
{
@@ -2930,7 +2930,7 @@ Query_cache::append_result_data(Query_cache_block **current_block,
ulong append_min = get_min_append_result_data_size();
if (last_block_free_space < data_len &&
append_next_free_block(last_block,
- max(tail, append_min)))
+ MY_MAX(tail, append_min)))
last_block_free_space = last_block->length - last_block->used;
// If no space in last block (even after join) allocate new block
if (last_block_free_space < data_len)
@@ -2958,7 +2958,7 @@ Query_cache::append_result_data(Query_cache_block **current_block,
// Now finally write data to the last block
if (success && last_block_free_space > 0)
{
- ulong to_copy = min(data_len,last_block_free_space);
+ ulong to_copy = MY_MIN(data_len,last_block_free_space);
DBUG_PRINT("qcache", ("use free space %lub at block 0x%lx to copy %lub",
last_block_free_space, (ulong)last_block, to_copy));
memcpy((uchar*) last_block + last_block->used, data, to_copy);
@@ -3046,8 +3046,8 @@ inline ulong Query_cache::get_min_first_result_data_size()
if (queries_in_cache < QUERY_CACHE_MIN_ESTIMATED_QUERIES_NUMBER)
return min_result_data_size;
ulong avg_result = (query_cache_size - free_memory) / queries_in_cache;
- avg_result = min(avg_result, query_cache_limit);
- return max(min_result_data_size, avg_result);
+ avg_result = MY_MIN(avg_result, query_cache_limit);
+ return MY_MAX(min_result_data_size, avg_result);
}
inline ulong Query_cache::get_min_append_result_data_size()
@@ -3079,7 +3079,7 @@ my_bool Query_cache::allocate_data_chain(Query_cache_block **result_block,
ulong len= data_len + all_headers_len;
ulong align_len= ALIGN_SIZE(len);
- if (!(new_block= allocate_block(max(min_size, align_len),
+ if (!(new_block= allocate_block(MY_MAX(min_size, align_len),
min_result_data_size == 0,
all_headers_len + min_result_data_size)))
{
@@ -3088,7 +3088,7 @@ my_bool Query_cache::allocate_data_chain(Query_cache_block **result_block,
}
new_block->n_tables = 0;
- new_block->used = min(len, new_block->length);
+ new_block->used = MY_MIN(len, new_block->length);
new_block->type = Query_cache_block::RES_INCOMPLETE;
new_block->next = new_block->prev = new_block;
Query_cache_result *header = new_block->result();
@@ -3494,7 +3494,7 @@ Query_cache::allocate_block(ulong len, my_bool not_less, ulong min)
DBUG_PRINT("qcache", ("len %lu, not less %d, min %lu",
len, not_less,min));
- if (len >= min(query_cache_size, query_cache_limit))
+ if (len >= MY_MIN(query_cache_size, query_cache_limit))
{
DBUG_PRINT("qcache", ("Query cache hase only %lu memory and limit %lu",
query_cache_size, query_cache_limit));
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index a068cdc8f88..3f95f13c464 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -361,18 +361,6 @@ void thd_lock_thread_count(THD *)
}
/**
- Lock connection data for the set of connections this connection
- belongs to
-
- @param thd THD object
-*/
-void thd_unlock_thread_count(THD *)
-{
- mysql_cond_broadcast(&COND_thread_count);
- mysql_mutex_unlock(&LOCK_thread_count);
-}
-
-/**
Close the socket used by this connection
@param thd THD object
@@ -604,7 +592,7 @@ void THD::enter_stage(const PSI_stage_info *new_stage,
proc_info= msg;
#ifdef HAVE_PSI_THREAD_INTERFACE
- PSI_CALL(set_thread_state)(msg);
+ PSI_THREAD_CALL(set_thread_state)(msg);
MYSQL_SET_STAGE(m_current_stage_key, calling_file, calling_line);
#endif
}
@@ -782,7 +770,7 @@ char *thd_security_context(THD *thd, char *buffer, unsigned int length,
if (max_query_len < 1)
len= thd->query_length();
else
- len= min(thd->query_length(), max_query_len);
+ len= MY_MIN(thd->query_length(), max_query_len);
str.append('\n');
str.append(thd->query(), len);
}
@@ -797,7 +785,7 @@ char *thd_security_context(THD *thd, char *buffer, unsigned int length,
was reallocated to a larger buffer to be able to fit.
*/
DBUG_ASSERT(buffer != NULL);
- length= min(str.length(), length-1);
+ length= MY_MIN(str.length(), length-1);
memcpy(buffer, str.c_ptr_quick(), length);
/* Make sure that the new string is null terminated */
buffer[length]= '\0';
@@ -937,6 +925,7 @@ THD::THD()
mysys_var=0;
binlog_evt_union.do_union= FALSE;
enable_slow_log= 0;
+ durability_property= HA_REGULAR_DURABILITY;
#ifndef DBUG_OFF
dbug_sentry=THD_SENTRY_MAGIC;
@@ -1953,6 +1942,42 @@ void THD::cleanup_after_query()
table_map_for_update= 0;
m_binlog_invoker= FALSE;
+extern "C" enum durability_properties thd_get_durability_property(const MYSQL_THD thd)
+{
+ enum durability_properties ret= HA_REGULAR_DURABILITY;
+
+ if (thd != NULL)
+ ret= thd->durability_property;
+
+ return ret;
+}
+
+/** Get the auto_increment_offset auto_increment_increment.
+Needed by InnoDB.
+@param thd Thread object
+@param off auto_increment_offset
+@param inc auto_increment_increment */
+extern "C" void thd_get_autoinc(const MYSQL_THD thd, ulong* off, ulong* inc)
+{
+ *off = thd->variables.auto_increment_offset;
+ *inc = thd->variables.auto_increment_increment;
+}
+
+
+/**
+ Is strict sql_mode set.
+ Needed by InnoDB.
+ @param thd Thread object
+ @return True if sql_mode has strict mode (all or trans).
+ @retval true sql_mode has strict mode (all or trans).
+ @retval false sql_mode has not strict mode (all or trans).
+*/
+extern "C" bool thd_is_strict_mode(const MYSQL_THD thd)
+{
+ return thd->is_strict_mode();
+}
+
+
#ifndef EMBEDDED_LIBRARY
if (rli_slave)
rli_slave->cleanup_after_query();
@@ -2826,7 +2851,7 @@ int select_export::send_data(List<Item> &items)
else
{
if (fixed_row_size)
- used_length=min(res->length(),item->max_length);
+ used_length=MY_MIN(res->length(),item->max_length);
else
used_length=res->length();
if ((result_type == STRING_RESULT || is_unsafe_field_sep) &&
@@ -4000,7 +4025,7 @@ static void thd_send_progress(THD *thd)
ulonglong report_time= my_interval_timer();
if (report_time > thd->progress.next_report_time)
{
- uint seconds_to_next= max(thd->variables.progress_report_time,
+ uint seconds_to_next= MY_MAX(thd->variables.progress_report_time,
global_system_variables.progress_report_time);
if (seconds_to_next == 0) // Turned off
seconds_to_next= 1; // Check again after 1 second
@@ -4412,7 +4437,7 @@ void THD::inc_status_created_tmp_disk_tables()
{
status_var_increment(status_var.created_tmp_disk_tables_);
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(inc_statement_created_tmp_disk_tables)(m_statement_psi, 1);
+ PSI_STATEMENT_CALL(inc_statement_created_tmp_disk_tables)(m_statement_psi, 1);
#endif
}
@@ -4420,7 +4445,7 @@ void THD::inc_status_created_tmp_tables()
{
status_var_increment(status_var.created_tmp_tables_);
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(inc_statement_created_tmp_tables)(m_statement_psi, 1);
+ PSI_STATEMENT_CALL(inc_statement_created_tmp_tables)(m_statement_psi, 1);
#endif
}
@@ -4428,7 +4453,7 @@ void THD::inc_status_select_full_join()
{
status_var_increment(status_var.select_full_join_count_);
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(inc_statement_select_full_join)(m_statement_psi, 1);
+ PSI_STATEMENT_CALL(inc_statement_select_full_join)(m_statement_psi, 1);
#endif
}
@@ -4436,7 +4461,7 @@ void THD::inc_status_select_full_range_join()
{
status_var_increment(status_var.select_full_range_join_count_);
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(inc_statement_select_full_range_join)(m_statement_psi, 1);
+ PSI_STATEMENT_CALL(inc_statement_select_full_range_join)(m_statement_psi, 1);
#endif
}
@@ -4444,7 +4469,7 @@ void THD::inc_status_select_range()
{
status_var_increment(status_var.select_range_count_);
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(inc_statement_select_range)(m_statement_psi, 1);
+ PSI_STATEMENT_CALL(inc_statement_select_range)(m_statement_psi, 1);
#endif
}
@@ -4452,7 +4477,7 @@ void THD::inc_status_select_range_check()
{
status_var_increment(status_var.select_range_check_count_);
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(inc_statement_select_range_check)(m_statement_psi, 1);
+ PSI_STATEMENT_CALL(inc_statement_select_range_check)(m_statement_psi, 1);
#endif
}
@@ -4460,7 +4485,7 @@ void THD::inc_status_select_scan()
{
status_var_increment(status_var.select_scan_count_);
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(inc_statement_select_scan)(m_statement_psi, 1);
+ PSI_STATEMENT_CALL(inc_statement_select_scan)(m_statement_psi, 1);
#endif
}
@@ -4468,7 +4493,7 @@ void THD::inc_status_sort_merge_passes()
{
status_var_increment(status_var.filesort_merge_passes_);
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(inc_statement_sort_merge_passes)(m_statement_psi, 1);
+ PSI_STATEMENT_CALL(inc_statement_sort_merge_passes)(m_statement_psi, 1);
#endif
}
@@ -4476,7 +4501,7 @@ void THD::inc_status_sort_range()
{
status_var_increment(status_var.filesort_range_count_);
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(inc_statement_sort_range)(m_statement_psi, 1);
+ PSI_STATEMENT_CALL(inc_statement_sort_range)(m_statement_psi, 1);
#endif
}
@@ -4484,7 +4509,7 @@ void THD::inc_status_sort_rows(ha_rows count)
{
statistic_add(status_var.filesort_rows_, count, &LOCK_status);
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(inc_statement_sort_rows)(m_statement_psi, count);
+ PSI_STATEMENT_CALL(inc_statement_sort_rows)(m_statement_psi, count);
#endif
}
@@ -4492,7 +4517,7 @@ void THD::inc_status_sort_scan()
{
status_var_increment(status_var.filesort_scan_count_);
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(inc_statement_sort_scan)(m_statement_psi, 1);
+ PSI_STATEMENT_CALL(inc_statement_sort_scan)(m_statement_psi, 1);
#endif
}
@@ -4500,7 +4525,7 @@ void THD::set_status_no_index_used()
{
server_status|= SERVER_QUERY_NO_INDEX_USED;
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(set_statement_no_index_used)(m_statement_psi);
+ PSI_STATEMENT_CALL(set_statement_no_index_used)(m_statement_psi);
#endif
}
@@ -4508,7 +4533,7 @@ void THD::set_status_no_good_index_used()
{
server_status|= SERVER_QUERY_NO_GOOD_INDEX_USED;
#ifdef HAVE_PSI_STATEMENT_INTERFACE
- PSI_CALL(set_statement_no_good_index_used)(m_statement_psi);
+ PSI_STATEMENT_CALL(set_statement_no_good_index_used)(m_statement_psi);
#endif
}
@@ -4516,7 +4541,7 @@ void THD::set_command(enum enum_server_command command)
{
m_command= command;
#ifdef HAVE_PSI_THREAD_INTERFACE
- PSI_CALL(set_thread_command)(m_command);
+ PSI_STATEMENT_CALL(set_thread_command)(m_command);
#endif
}
@@ -4529,7 +4554,7 @@ void THD::set_query(const CSET_STRING &string_arg)
mysql_mutex_unlock(&LOCK_thd_data);
#ifdef HAVE_PSI_THREAD_INTERFACE
- PSI_CALL(set_thread_info)(query(), query_length());
+ PSI_THREAD_CALL(set_thread_info)(query(), query_length());
#endif
}
diff --git a/sql/sql_class.h b/sql/sql_class.h
index fccca9e8cbf..ba9f75b5d84 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -232,11 +232,14 @@ public:
class Alter_drop :public Sql_alloc {
public:
- enum drop_type {KEY, COLUMN };
+ enum drop_type {KEY, COLUMN, FOREIGN_KEY };
const char *name;
enum drop_type type;
Alter_drop(enum drop_type par_type,const char *par_name)
- :name(par_name), type(par_type) {}
+ :name(par_name), type(par_type)
+ {
+ DBUG_ASSERT(par_name != NULL);
+ }
/**
Used to make a clone of this object for ALTER/CREATE TABLE
@sa comment for Key_part_spec::clone
@@ -309,17 +312,22 @@ public:
enum fk_option { FK_OPTION_UNDEF, FK_OPTION_RESTRICT, FK_OPTION_CASCADE,
FK_OPTION_SET_NULL, FK_OPTION_NO_ACTION, FK_OPTION_DEFAULT};
- Table_ident *ref_table;
+ LEX_STRING ref_db;
+ LEX_STRING ref_table;
List<Key_part_spec> ref_columns;
uint delete_opt, update_opt, match_opt;
Foreign_key(const LEX_STRING &name_arg, List<Key_part_spec> &cols,
- Table_ident *table, List<Key_part_spec> &ref_cols,
+ const LEX_STRING &ref_db_arg, const LEX_STRING &ref_table_arg,
+ List<Key_part_spec> &ref_cols,
uint delete_opt_arg, uint update_opt_arg, uint match_opt_arg)
:Key(FOREIGN_KEY, name_arg, &default_key_create_info, 0, cols, NULL),
- ref_table(table), ref_columns(ref_cols),
+ ref_db(ref_db_arg), ref_table(ref_table_arg), ref_columns(ref_cols),
delete_opt(delete_opt_arg), update_opt(update_opt_arg),
match_opt(match_opt_arg)
- {}
+ {
+ // We don't check for duplicate FKs.
+ key_create_info.check_for_duplicate_indexes= false;
+ }
Foreign_key(const Foreign_key &rhs, MEM_ROOT *mem_root);
/**
Used to make a clone of this object for ALTER/CREATE TABLE
@@ -327,8 +335,6 @@ public:
*/
virtual Key *clone(MEM_ROOT *mem_root) const
{ return new (mem_root) Foreign_key(*this, mem_root); }
- /* Used to validate foreign key options */
- bool validate(List<Create_field> &table_fields);
};
typedef struct st_mysql_lock
@@ -2289,6 +2295,12 @@ public:
MEM_ROOT *user_var_events_alloc; /* Allocate above array elements here */
/*
+ Define durability properties that engines may check to
+ improve performance. Not yet used in MariaDB
+ */
+ enum durability_properties durability_property;
+
+ /*
If checking this in conjunction with a wait condition, please
include a check after enter_cond() if you want to avoid a race
condition. For details see the implementation of awake(),
@@ -2586,7 +2598,7 @@ public:
start_time= hrtime_to_my_time(hrtime);
start_time_sec_part= hrtime_sec_part(hrtime);
#ifdef HAVE_PSI_THREAD_INTERFACE
- PSI_CALL(set_thread_start_time)(start_time);
+ PSI_THREAD_CALL(set_thread_start_time)(start_time);
#endif
}
inline void set_start_time()
@@ -2596,7 +2608,7 @@ public:
start_time= hrtime_to_my_time(user_time);
start_time_sec_part= hrtime_sec_part(user_time);
#ifdef HAVE_PSI_THREAD_INTERFACE
- PSI_CALL(set_thread_start_time)(start_time);
+ PSI_THREAD_CALL(set_thread_start_time)(start_time);
#endif
}
else
@@ -3032,7 +3044,7 @@ public:
result= new_db && !db;
#ifdef HAVE_PSI_THREAD_INTERFACE
if (result)
- PSI_CALL(set_thread_db)(new_db, new_db_len);
+ PSI_THREAD_CALL(set_thread_db)(new_db, new_db_len);
#endif
return result;
}
@@ -3053,7 +3065,7 @@ public:
db= new_db;
db_length= new_db_len;
#ifdef HAVE_PSI_THREAD_INTERFACE
- PSI_CALL(set_thread_db)(new_db, new_db_len);
+ PSI_THREAD_CALL(set_thread_db)(new_db, new_db_len);
#endif
}
/*
diff --git a/sql/sql_client.cc b/sql/sql_client.cc
index eb6c039c065..e7c555b5947 100644
--- a/sql/sql_client.cc
+++ b/sql/sql_client.cc
@@ -36,7 +36,7 @@ void my_net_local_init(NET *net)
(uint)global_system_variables.net_write_timeout);
net->retry_count= (uint) global_system_variables.net_retry_count;
- net->max_packet_size= max(global_system_variables.net_buffer_length,
+ net->max_packet_size= MY_MAX(global_system_variables.net_buffer_length,
global_system_variables.max_allowed_packet);
#endif
}
diff --git a/sql/sql_connect.cc b/sql/sql_connect.cc
index 59aa51916fb..33b5394de9f 100644
--- a/sql/sql_connect.cc
+++ b/sql/sql_connect.cc
@@ -431,7 +431,7 @@ void init_user_stats(USER_STATS *user_stats,
DBUG_ENTER("init_user_stats");
DBUG_PRINT("enter", ("user: %s priv_user: %s", user, priv_user));
- user_length= min(user_length, sizeof(user_stats->user)-1);
+ user_length= MY_MIN(user_length, sizeof(user_stats->user)-1);
memcpy(user_stats->user, user, user_length);
user_stats->user[user_length]= 0;
user_stats->user_name_length= user_length;
@@ -934,7 +934,7 @@ static int check_connection(THD *thd)
if (thd->main_security_ctx.host)
{
if (thd->main_security_ctx.host != my_localhost)
- thd->main_security_ctx.host[min(strlen(thd->main_security_ctx.host),
+ thd->main_security_ctx.host[MY_MIN(strlen(thd->main_security_ctx.host),
HOSTNAME_LENGTH)]= 0;
thd->main_security_ctx.host_or_ip= thd->main_security_ctx.host;
}
diff --git a/sql/sql_const.h b/sql/sql_const.h
index c6aa52197d5..9849f10b6ac 100644
--- a/sql/sql_const.h
+++ b/sql/sql_const.h
@@ -67,7 +67,7 @@
#define PSEUDO_TABLE_BITS (PARAM_TABLE_BIT | OUTER_REF_TABLE_BIT | \
RAND_TABLE_BIT)
#define MAX_FIELDS 4096 /* Limit in the .frm file */
-#define MAX_PARTITIONS 1024
+#define MAX_PARTITIONS 8192
#define MAX_SELECT_NESTING (sizeof(nesting_map)*8-1)
@@ -128,6 +128,13 @@
*/
#define TABLE_DEF_CACHE_MIN 400
+/**
+ Maximum number of connections default value.
+ 151 is larger than Apache's default max children,
+ to avoid "too many connections" error in a common setup.
+*/
+#define MAX_CONNECTIONS_DEFAULT 151
+
/*
Stack reservation.
Feel free to raise this by the smallest amount you can to get the
@@ -232,7 +239,7 @@
#define DELAYED_LIMIT 100 /**< pause after xxx inserts */
#define DELAYED_QUEUE_SIZE 1000
#define DELAYED_WAIT_TIMEOUT 5*60 /**< Wait for delayed insert */
-#define MAX_CONNECT_ERRORS 10 ///< errors before disabling host
+#define MAX_CONNECT_ERRORS 100 ///< errors before disabling host
#define LONG_TIMEOUT ((ulong) 3600L*24L*365L)
diff --git a/sql/sql_error.cc b/sql/sql_error.cc
index 23a60267737..038f4876b14 100644
--- a/sql/sql_error.cc
+++ b/sql/sql_error.cc
@@ -834,7 +834,7 @@ uint32 convert_error_message(char *to, uint32 to_length, CHARSET_INFO *to_cs,
if (!to_cs || from_cs == to_cs || to_cs == &my_charset_bin)
{
- length= min(to_length, from_length);
+ length= MY_MIN(to_length, from_length);
memmove(to, from, length);
to[length]= 0;
return length;
diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc
index fde9f70fa79..ad327c378b8 100644
--- a/sql/sql_join_cache.cc
+++ b/sql/sql_join_cache.cc
@@ -696,7 +696,7 @@ void JOIN_CACHE::set_constants()
pack_length_with_blob_ptrs= pack_length + blobs*sizeof(uchar *);
min_buff_size= 0;
min_records= 1;
- buff_size= max(join->thd->variables.join_buff_size,
+ buff_size= MY_MAX(join->thd->variables.join_buff_size,
get_min_join_buffer_size());
size_of_rec_ofs= offset_size(buff_size);
size_of_rec_len= blobs ? size_of_rec_ofs : offset_size(len);
@@ -2739,7 +2739,7 @@ int JOIN_CACHE_HASHED::init_hash_table()
key_entries= 0;
/* Calculate the minimal possible value of size_of_key_ofs greater than 1 */
- uint max_size_of_key_ofs= max(2, get_size_of_rec_offset());
+ uint max_size_of_key_ofs= MY_MAX(2, get_size_of_rec_offset());
for (size_of_key_ofs= 2;
size_of_key_ofs <= max_size_of_key_ofs;
size_of_key_ofs+= 2)
diff --git a/sql/sql_join_cache.h b/sql/sql_join_cache.h
index 6953f6881ee..1c56fc9b178 100644
--- a/sql/sql_join_cache.h
+++ b/sql/sql_join_cache.h
@@ -420,7 +420,7 @@ protected:
/* Shall calculate how much space is remaining in the join buffer */
virtual size_t rem_space()
{
- return max(buff_size-(end_pos-buff)-aux_buff_size,0);
+ return MY_MAX(buff_size-(end_pos-buff)-aux_buff_size,0);
}
/*
@@ -943,7 +943,7 @@ protected:
*/
size_t rem_space()
{
- return max(last_key_entry-end_pos-aux_buff_size,0);
+ return MY_MAX(last_key_entry-end_pos-aux_buff_size,0);
}
/*
diff --git a/sql/sql_lex.h b/sql/sql_lex.h
index cc117e18d1e..61230113506 100644
--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -26,6 +26,7 @@
#include "thr_lock.h" /* thr_lock_type, TL_UNLOCK */
#include "mem_root_array.h"
#include "sql_cmd.h"
+#include "sql_alter.h" // Alter_info
/* YACC and LEX Definitions */
@@ -991,110 +992,6 @@ inline bool st_select_lex_unit::is_union ()
first_select()->next_select()->linkage == UNION_TYPE;
}
-#define ALTER_ADD_COLUMN (1L << 0)
-#define ALTER_DROP_COLUMN (1L << 1)
-#define ALTER_CHANGE_COLUMN (1L << 2)
-#define ALTER_ADD_INDEX (1L << 3)
-#define ALTER_DROP_INDEX (1L << 4)
-#define ALTER_RENAME (1L << 5)
-#define ALTER_ORDER (1L << 6)
-#define ALTER_OPTIONS (1L << 7)
-#define ALTER_CHANGE_COLUMN_DEFAULT (1L << 8)
-#define ALTER_KEYS_ONOFF (1L << 9)
-#define ALTER_CONVERT (1L << 10)
-#define ALTER_RECREATE (1L << 11)
-#define ALTER_ADD_PARTITION (1L << 12)
-#define ALTER_DROP_PARTITION (1L << 13)
-#define ALTER_COALESCE_PARTITION (1L << 14)
-#define ALTER_REORGANIZE_PARTITION (1L << 15)
-#define ALTER_PARTITION (1L << 16)
-#define ALTER_ADMIN_PARTITION (1L << 17)
-#define ALTER_TABLE_REORG (1L << 18)
-#define ALTER_REBUILD_PARTITION (1L << 19)
-#define ALTER_ALL_PARTITION (1L << 20)
-#define ALTER_REMOVE_PARTITIONING (1L << 21)
-#define ALTER_FOREIGN_KEY (1L << 22)
-#define ALTER_TRUNCATE_PARTITION (1L << 23)
-
-enum enum_alter_table_change_level
-{
- ALTER_TABLE_METADATA_ONLY= 0,
- ALTER_TABLE_DATA_CHANGED= 1,
- ALTER_TABLE_INDEX_CHANGED= 2
-};
-
-
-/**
- Temporary hack to enable a class bound forward declaration
- of the enum_alter_table_change_level enumeration. To be
- removed once Alter_info is moved to the sql_alter.h
- header.
-*/
-class Alter_table_change_level
-{
-private:
- typedef enum enum_alter_table_change_level enum_type;
- enum_type value;
-public:
- void operator = (enum_type v) { value = v; }
- operator enum_type () { return value; }
-};
-
-
-/**
- @brief Parsing data for CREATE or ALTER TABLE.
-
- This structure contains a list of columns or indexes to be created,
- altered or dropped.
-*/
-
-class Alter_info
-{
-public:
- List<Alter_drop> drop_list;
- List<Alter_column> alter_list;
- List<Key> key_list;
- List<Create_field> create_list;
- uint flags;
- enum enum_enable_or_disable keys_onoff;
- enum tablespace_op_type tablespace_op;
- List<char> partition_names;
- uint num_parts;
- enum_alter_table_change_level change_level;
- Create_field *datetime_field;
- bool error_if_not_empty;
-
-
- Alter_info() :
- flags(0),
- keys_onoff(LEAVE_AS_IS),
- tablespace_op(NO_TABLESPACE_OP),
- num_parts(0),
- change_level(ALTER_TABLE_METADATA_ONLY),
- datetime_field(NULL),
- error_if_not_empty(FALSE)
- {}
-
- void reset()
- {
- drop_list.empty();
- alter_list.empty();
- key_list.empty();
- create_list.empty();
- flags= 0;
- keys_onoff= LEAVE_AS_IS;
- tablespace_op= NO_TABLESPACE_OP;
- num_parts= 0;
- partition_names.empty();
- change_level= ALTER_TABLE_METADATA_ONLY;
- datetime_field= 0;
- error_if_not_empty= FALSE;
- }
- Alter_info(const Alter_info &rhs, MEM_ROOT *mem_root);
-private:
- Alter_info &operator=(const Alter_info &rhs); // not implemented
- Alter_info(const Alter_info &rhs); // not implemented
-};
struct st_sp_chistics
{
diff --git a/sql/sql_load.cc b/sql/sql_load.cc
index 11e23b56f71..64505b82413 100644
--- a/sql/sql_load.cc
+++ b/sql/sql_load.cc
@@ -1364,7 +1364,7 @@ READ_INFO::READ_INFO(File file_par, uint tot_length, CHARSET_INFO *cs,
line_term_char= line_term_length ? (uchar) line_term_ptr[0] : INT_MAX;
/* Set of a stack for unget if long terminators */
- uint length= max(cs->mbmaxlen, max(field_term_length, line_term_length)) + 1;
+ uint length= MY_MAX(cs->mbmaxlen, MY_MAX(field_term_length, line_term_length)) + 1;
set_if_bigger(length,line_start.length());
stack=stack_pos=(int*) sql_alloc(sizeof(int)*length);
diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc
index f1362674d0c..5beba029b20 100644
--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -5739,7 +5739,7 @@ bool check_stack_overrun(THD *thd, long margin,
return 1;
}
#ifndef DBUG_OFF
- max_stack_used= max(max_stack_used, stack_used);
+ max_stack_used= MY_MAX(max_stack_used, stack_used);
#endif
return 0;
}
@@ -7217,7 +7217,7 @@ bool check_simple_select()
char command[80];
Lex_input_stream *lip= & thd->m_parser_state->m_lip;
strmake(command, lip->yylval->symbol.str,
- min(lip->yylval->symbol.length, sizeof(command)-1));
+ MY_MIN(lip->yylval->symbol.length, sizeof(command)-1));
my_error(ER_CANT_USE_OPTION_HERE, MYF(0), command);
return 1;
}
diff --git a/sql/sql_partition.cc b/sql/sql_partition.cc
index f042f028450..b8aa4c7e944 100644
--- a/sql/sql_partition.cc
+++ b/sql/sql_partition.cc
@@ -4839,7 +4839,7 @@ that are reorganised.
*/
start_part= 0;
end_part= new_total_partitions - (upper_2n + 1);
- end_part= max(lower_2n - 1, end_part);
+ end_part= MY_MAX(lower_2n - 1, end_part);
}
else if (new_total_partitions <= upper_2n)
{
diff --git a/sql/sql_plugin.cc b/sql/sql_plugin.cc
index 8778713d7e7..a81c05c5513 100644
--- a/sql/sql_plugin.cc
+++ b/sql/sql_plugin.cc
@@ -695,7 +695,7 @@ static my_bool read_maria_plugin_info(struct st_plugin_dl *plugin_dl,
for (i=0;
(old= (struct st_maria_plugin *)(ptr + i * sizeof_st_plugin))->info;
i++)
- memcpy(cur + i, old, min(sizeof(cur[i]), sizeof_st_plugin));
+ memcpy(cur + i, old, MY_MIN(sizeof(cur[i]), sizeof_st_plugin));
sym= cur;
plugin_dl->allocated= true;
diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc
index 025ff8820e6..002fdb40036 100644
--- a/sql/sql_prepare.cc
+++ b/sql/sql_prepare.cc
@@ -343,7 +343,7 @@ static bool send_prep_stmt(Prepared_statement *stmt, uint columns)
int2store(buff+5, columns);
int2store(buff+7, stmt->param_count);
buff[9]= 0; // Guard against a 4.1 client
- tmp= min(stmt->thd->warning_info->statement_warn_count(), 65535);
+ tmp= MY_MIN(stmt->thd->warning_info->statement_warn_count(), 65535);
int2store(buff+10, tmp);
/*
diff --git a/sql/sql_profile.cc b/sql/sql_profile.cc
index feb7810fa28..dc7aacb3d94 100644
--- a/sql/sql_profile.cc
+++ b/sql/sql_profile.cc
@@ -288,7 +288,7 @@ void QUERY_PROFILE::set_query_source(char *query_source_arg,
uint query_length_arg)
{
/* Truncate to avoid DoS attacks. */
- uint length= min(MAX_QUERY_LENGTH, query_length_arg);
+ uint length= MY_MIN(MAX_QUERY_LENGTH, query_length_arg);
DBUG_ASSERT(query_source == NULL); /* we don't leak memory */
if (query_source_arg != NULL)
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 917f4ea1a80..9e6cd55fe07 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -1818,7 +1818,7 @@ bool change_master(THD* thd, Master_info* mi, bool *master_info_added)
if (lex_mi->heartbeat_opt != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
mi->heartbeat_period = lex_mi->heartbeat_period;
else
- mi->heartbeat_period= (float) min(SLAVE_MAX_HEARTBEAT_PERIOD,
+ mi->heartbeat_period= (float) MY_MIN(SLAVE_MAX_HEARTBEAT_PERIOD,
(slave_net_timeout/2.0));
mi->received_heartbeats= LL(0); // counter lives until master is CHANGEd
/*
@@ -1916,12 +1916,12 @@ bool change_master(THD* thd, Master_info* mi, bool *master_info_added)
{
/*
Sometimes mi->rli.master_log_pos == 0 (it happens when the SQL thread is
- not initialized), so we use a max().
+ not initialized), so we use a MY_MAX().
What happens to mi->rli.master_log_pos during the initialization stages
of replication is not 100% clear, so we guard against problems using
- max().
+ MY_MAX().
*/
- mi->master_log_pos = max(BIN_LOG_HEADER_SIZE,
+ mi->master_log_pos = MY_MAX(BIN_LOG_HEADER_SIZE,
mi->rli.group_master_log_pos);
strmake(mi->master_log_name, mi->rli.group_master_log_name,
sizeof(mi->master_log_name)-1);
@@ -2106,7 +2106,7 @@ bool mysql_show_binlog_events(THD* thd)
LEX_MASTER_INFO *lex_mi= &thd->lex->mi;
SELECT_LEX_UNIT *unit= &thd->lex->unit;
ha_rows event_count, limit_start, limit_end;
- my_off_t pos = max(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
+ my_off_t pos = MY_MAX(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly
char search_file_name[FN_REFLEN], *name;
const char *log_file_name = lex_mi->log_file_name;
mysql_mutex_t *log_lock = binary_log->get_log_lock();
@@ -2394,14 +2394,14 @@ int log_loaded_block(IO_CACHE* file)
DBUG_RETURN(0);
for (block_len= (uint) (my_b_get_bytes_in_buffer(file)); block_len > 0;
- buffer += min(block_len, max_event_size),
- block_len -= min(block_len, max_event_size))
+ buffer += MY_MIN(block_len, max_event_size),
+ block_len -= MY_MIN(block_len, max_event_size))
{
lf_info->last_pos_in_file= my_b_get_pos_in_file(file);
if (lf_info->wrote_create_file)
{
Append_block_log_event a(lf_info->thd, lf_info->thd->db, buffer,
- min(block_len, max_event_size),
+ MY_MIN(block_len, max_event_size),
lf_info->log_delayed);
if (mysql_bin_log.write(&a))
DBUG_RETURN(1);
@@ -2410,7 +2410,7 @@ int log_loaded_block(IO_CACHE* file)
{
Begin_load_query_log_event b(lf_info->thd, lf_info->thd->db,
buffer,
- min(block_len, max_event_size),
+ MY_MIN(block_len, max_event_size),
lf_info->log_delayed);
if (mysql_bin_log.write(&b))
DBUG_RETURN(1);
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 27b93cff189..e1b2beabd32 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -1188,7 +1188,7 @@ JOIN::optimize_inner()
#endif
/*
- Try to optimize count(*), min() and max() to const fields if
+ Try to optimize count(*), MY_MIN() and MY_MAX() to const fields if
there is implicit grouping (aggregate functions but no
group_list). In this case, the result set shall only contain one
row.
@@ -3741,7 +3741,7 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
This is can't be to high as otherwise we are likely to use
table scan.
*/
- s->worst_seeks= min((double) s->found_records / 10,
+ s->worst_seeks= MY_MIN((double) s->found_records / 10,
(double) s->read_time*3);
if (s->worst_seeks < 2.0) // Fix for small tables
s->worst_seeks=2.0;
@@ -4910,7 +4910,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
uint and_level,i;
KEY_FIELD *key_fields, *end, *field;
uint sz;
- uint m= max(select_lex->max_equal_elems,1);
+ uint m= MY_MAX(select_lex->max_equal_elems,1);
/*
We use the same piece of memory to store both KEY_FIELD
@@ -4933,7 +4933,7 @@ update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
can be not more than select_lex->max_equal_elems such
substitutions.
*/
- sz= max(sizeof(KEY_FIELD),sizeof(SARGABLE_PARAM))*
+ sz= MY_MAX(sizeof(KEY_FIELD),sizeof(SARGABLE_PARAM))*
(((thd->lex->current_select->cond_count+1)*2 +
thd->lex->current_select->between_count)*m+1);
if (!(key_fields=(KEY_FIELD*) thd->alloc(sz)))
@@ -5117,7 +5117,7 @@ static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
DBUG_ASSERT(tablenr != Table_map_iterator::BITMAP_END);
TABLE *tmp_table=join->table[tablenr];
if (tmp_table) // already created
- keyuse->ref_table_rows= max(tmp_table->file->stats.records, 100);
+ keyuse->ref_table_rows= MY_MAX(tmp_table->file->stats.records, 100);
}
}
/*
@@ -5591,7 +5591,7 @@ best_access_path(JOIN *join,
tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
else
tmp= table->file->read_time(key, 1,
- (ha_rows) min(tmp,s->worst_seeks));
+ (ha_rows) MY_MIN(tmp,s->worst_seeks));
tmp*= record_count;
}
}
@@ -5755,7 +5755,7 @@ best_access_path(JOIN *join,
tmp= table->file->keyread_time(key, 1, (ha_rows) tmp);
else
tmp= table->file->read_time(key, 1,
- (ha_rows) min(tmp,s->worst_seeks));
+ (ha_rows) MY_MIN(tmp,s->worst_seeks));
tmp*= record_count;
}
else
@@ -10667,7 +10667,7 @@ bool TABLE_REF::is_access_triggered()
a correlated subquery itself, but has subqueries, we can free it
fully and also free JOINs of all its subqueries. The exception
is a subquery in SELECT list, e.g: @n
- SELECT a, (select max(b) from t1) group by c @n
+ SELECT a, (select MY_MAX(b) from t1) group by c @n
This subquery will not be evaluated at first sweep and its value will
not be inserted into the temporary table. Instead, it's evaluated
when selecting from the temporary table. Therefore, it can't be freed
@@ -14837,7 +14837,7 @@ create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
share->max_rows= ~(ha_rows) 0;
else
share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ?
- min(thd->variables.tmp_table_size,
+ MY_MIN(thd->variables.tmp_table_size,
thd->variables.max_heap_table_size) :
thd->variables.tmp_table_size) /
share->reclength);
@@ -15416,7 +15416,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo,
start_recinfo,
share->uniques, &uniquedef,
&create_info,
- HA_CREATE_TMP_TABLE)))
+ HA_CREATE_TMP_TABLE | HA_CREATE_INTERNAL_TABLE)))
{
table->file->print_error(error,MYF(0)); /* purecov: inspected */
table->db_stat=0;
@@ -15562,7 +15562,7 @@ bool create_internal_tmp_table(TABLE *table, KEY *keyinfo,
start_recinfo,
share->uniques, &uniquedef,
&create_info,
- HA_CREATE_TMP_TABLE)))
+ HA_CREATE_TMP_TABLE | HA_CREATE_INTERNAL_TABLE)))
{
table->file->print_error(error,MYF(0)); /* purecov: inspected */
table->db_stat=0;
@@ -19576,7 +19576,7 @@ SORT_FIELD *make_unireg_sortorder(ORDER *order, uint *length,
count++;
if (!sortorder)
sortorder= (SORT_FIELD*) sql_alloc(sizeof(SORT_FIELD) *
- (max(count, *length) + 1));
+ (MY_MAX(count, *length) + 1));
pos= sort= sortorder;
if (!pos)
@@ -23290,7 +23290,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
index entry.
*/
index_scan_time= select_limit/rec_per_key *
- min(rec_per_key, table->file->scan_time());
+ MY_MIN(rec_per_key, table->file->scan_time());
if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
index_scan_time < read_time)
{
@@ -23301,7 +23301,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
if (table->quick_keys.is_set(nr))
quick_records= table->quick_rows[nr];
if (best_key < 0 ||
- (select_limit <= min(quick_records,best_records) ?
+ (select_limit <= MY_MIN(quick_records,best_records) ?
keyinfo->key_parts < best_key_parts :
quick_records < best_records) ||
(!is_best_covering && is_covering))
diff --git a/sql/sql_show.cc b/sql/sql_show.cc
index 1f860fe23db..74a554d8c6c 100644
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@@ -1046,7 +1046,7 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list)
{
field_list.push_back(new Item_empty_string("View",NAME_CHAR_LEN));
field_list.push_back(new Item_empty_string("Create View",
- max(buffer.length(),1024)));
+ MY_MAX(buffer.length(),1024)));
field_list.push_back(new Item_empty_string("character_set_client",
MY_CS_NAME_SIZE));
field_list.push_back(new Item_empty_string("collation_connection",
@@ -1057,7 +1057,7 @@ mysqld_show_create(THD *thd, TABLE_LIST *table_list)
field_list.push_back(new Item_empty_string("Table",NAME_CHAR_LEN));
// 1024 is for not to confuse old clients
field_list.push_back(new Item_empty_string("Create Table",
- max(buffer.length(),1024)));
+ MY_MAX(buffer.length(),1024)));
}
if (protocol->send_result_set_metadata(&field_list,
@@ -2249,7 +2249,7 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
/* Lock THD mutex that protects its data when looking at it. */
if (tmp->query())
{
- uint length= min(max_query_length, tmp->query_length());
+ uint length= MY_MIN(max_query_length, tmp->query_length());
char *q= thd->strmake(tmp->query(),length);
/* Safety: in case strmake failed, we set length to 0. */
thd_info->query_string=
@@ -2262,7 +2262,7 @@ void mysqld_list_processes(THD *thd,const char *user, bool verbose)
*/
if (tmp->progress.max_counter)
{
- uint max_stage= max(tmp->progress.max_stage, 1);
+ uint max_stage= MY_MAX(tmp->progress.max_stage, 1);
thd_info->progress= (((tmp->progress.stage / (double) max_stage) +
((tmp->progress.counter /
(double) tmp->progress.max_counter) /
@@ -2574,7 +2574,7 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond)
if (tmp->query())
{
table->field[7]->store(tmp->query(),
- min(PROCESS_LIST_INFO_WIDTH,
+ MY_MIN(PROCESS_LIST_INFO_WIDTH,
tmp->query_length()), cs);
table->field[7]->set_notnull();
}
@@ -3003,7 +3003,7 @@ static int aggregate_user_stats(HASH *all_user_stats, HASH *agg_user_stats)
{
DBUG_ENTER("aggregate_user_stats");
if (my_hash_init(agg_user_stats, system_charset_info,
- max(all_user_stats->records, 1),
+ MY_MAX(all_user_stats->records, 1),
0, 0, (my_hash_get_key)get_key_user_stats,
(my_hash_free_key)free_user_stats, 0))
{
@@ -4225,7 +4225,7 @@ uint get_table_open_method(TABLE_LIST *tables,
for (ptr=tables->table->field; (field= *ptr) ; ptr++)
{
star_table_open_method=
- min(star_table_open_method,
+ MY_MIN(star_table_open_method,
schema_table->fields_info[field_indx].open_method);
if (bitmap_is_set(tables->table->read_set, field->field_index))
{
@@ -9107,7 +9107,7 @@ static bool show_create_trigger_impl(THD *thd,
Item_empty_string *stmt_fld=
new Item_empty_string("SQL Original Statement",
- max(trg_sql_original_stmt.length, 1024));
+ MY_MAX(trg_sql_original_stmt.length, 1024));
stmt_fld->maybe_null= TRUE;
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 9d11677666f..8ccc8aff365 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -661,7 +661,7 @@ int String::reserve(uint32 space_needed, uint32 grow_by)
{
if (Alloced_length < str_length + space_needed)
{
- if (realloc(Alloced_length + max(space_needed, grow_by) - 1))
+ if (realloc(Alloced_length + MY_MAX(space_needed, grow_by) - 1))
return TRUE;
}
return FALSE;
@@ -748,7 +748,7 @@ int sortcmp(const String *s,const String *t, CHARSET_INFO *cs)
int stringcmp(const String *s,const String *t)
{
- uint32 s_len=s->length(),t_len=t->length(),len=min(s_len,t_len);
+ uint32 s_len=s->length(),t_len=t->length(),len=MY_MIN(s_len,t_len);
int cmp= memcmp(s->ptr(), t->ptr(), len);
return (cmp) ? cmp : (int) (s_len - t_len);
}
@@ -765,7 +765,7 @@ String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
}
if (to->realloc(from_length))
return from; // Actually an error
- if ((to->str_length=min(from->str_length,from_length)))
+ if ((to->str_length=MY_MIN(from->str_length,from_length)))
memcpy(to->Ptr,from->Ptr,to->str_length);
to->str_charset=from->str_charset;
return to;
@@ -776,67 +776,6 @@ String *copy_if_not_alloced(String *to,String *from,uint32 from_length)
Help functions
****************************************************************************/
-
-
-/*
- Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
-*/
-uint32
-copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
- const char *from, uint32 from_length, CHARSET_INFO *from_cs,
- uint *errors)
-{
- /*
- If any of the character sets is not ASCII compatible,
- immediately switch to slow mb_wc->wc_mb method.
- */
- if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
- return copy_and_convert_extended(to, to_length, to_cs,
- from, from_length, from_cs, errors);
-
- uint32 length= min(to_length, from_length), length2= length;
-
-#if defined(__i386__)
- /*
- Special loop for i386, it allows to refer to a
- non-aligned memory block as UINT32, which makes
- it possible to copy four bytes at once. This
- gives about 10% performance improvement comparing
- to byte-by-byte loop.
- */
- for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
- {
- if ((*(uint32*)from) & 0x80808080)
- break;
- *((uint32*) to)= *((const uint32*) from);
- }
-#endif
-
- for (; ; *to++= *from++, length--)
- {
- if (!length)
- {
- *errors= 0;
- return length2;
- }
- if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
- {
- uint32 copied_length= length2 - length;
- to_length-= copied_length;
- from_length-= copied_length;
- return copied_length + copy_and_convert_extended(to, to_length,
- to_cs,
- from, from_length,
- from_cs,
- errors);
- }
- }
-
- DBUG_ASSERT(FALSE); // Should never get to here
- return 0; // Make compiler happy
-}
-
-
/**
Copy string with HEX-encoding of "bad" characters.
@@ -954,7 +893,7 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
if (to_cs == &my_charset_bin)
{
- res= min(min(nchars, to_length), from_length);
+ res= MY_MIN(MY_MIN(nchars, to_length), from_length);
memmove(to, from, res);
*from_end_pos= from + res;
*well_formed_error_pos= NULL;
@@ -1166,7 +1105,7 @@ uint convert_to_printable(char *to, size_t to_len,
char *t= to;
char *t_end= to + to_len - 1; // '- 1' is for the '\0' at the end
const char *f= from;
- const char *f_end= from + (nbytes ? min(from_len, nbytes) : from_len);
+ const char *f_end= from + (nbytes ? MY_MIN(from_len, nbytes) : from_len);
char *dots= to; // last safe place to append '...'
if (!f || t == t_end)
diff --git a/sql/sql_string.h b/sql/sql_string.h
index 58cda343dac..40096466a92 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -34,9 +34,13 @@ typedef struct st_mem_root MEM_ROOT;
int sortcmp(const String *a,const String *b, CHARSET_INFO *cs);
String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
-uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
- const char *from, uint32 from_length,
- CHARSET_INFO *from_cs, uint *errors);
+inline uint32 copy_and_convert(char *to, uint32 to_length,
+ const CHARSET_INFO *to_cs,
+ const char *from, uint32 from_length,
+ const CHARSET_INFO *from_cs, uint *errors)
+{
+ return my_convert(to, to_length, to_cs, from, from_length, from_cs, errors);
+}
uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
diff --git a/sql/sql_table.cc b/sql/sql_table.cc
index 3c094e1740e..f9dea58bb32 100644
--- a/sql/sql_table.cc
+++ b/sql/sql_table.cc
@@ -3621,7 +3621,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
if ((length=column->length) > max_key_length ||
length > file->max_key_part_length())
{
- length=min(max_key_length, file->max_key_part_length());
+ length=MY_MIN(max_key_length, file->max_key_part_length());
if (key->type == Key::MULTIPLE)
{
/* not a critical problem */
diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy
index 56e7db96a1a..0fa18b46ff4 100644
--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -2173,7 +2173,7 @@ master_file_def:
from 0" (4 in fact), unspecified means "don't change the position
(keep the preceding value)").
*/
- Lex->mi.pos = max(BIN_LOG_HEADER_SIZE, Lex->mi.pos);
+ Lex->mi.pos= MY_MAX(BIN_LOG_HEADER_SIZE, Lex->mi.pos);
}
| RELAY_LOG_FILE_SYM EQ TEXT_STRING_sys
{
@@ -2183,7 +2183,7 @@ master_file_def:
{
Lex->mi.relay_log_pos = $3;
/* Adjust if < BIN_LOG_HEADER_SIZE (same comment as Lex->mi.pos) */
- Lex->mi.relay_log_pos = max(BIN_LOG_HEADER_SIZE, Lex->mi.relay_log_pos);
+ Lex->mi.relay_log_pos= MY_MAX(BIN_LOG_HEADER_SIZE, Lex->mi.relay_log_pos);
}
;
diff --git a/sql/structs.h b/sql/structs.h
index a3a54c524e6..e5e65e01064 100644
--- a/sql/structs.h
+++ b/sql/structs.h
@@ -89,8 +89,8 @@ struct ha_index_option_struct;
typedef struct st_key {
uint key_length; /* Tot length of key */
ulong flags; /* dupp key and pack flags */
- uint key_parts; /* How many key_parts */
- uint usable_key_parts; /* Should normally be = key_parts */
+ uint user_defined_key_parts; /* How many key_parts */
+ uint usable_key_parts; /* Should normally be = user_defined_key_parts */
uint ext_key_parts; /* Number of key parts in extended key */
ulong ext_key_flags; /* Flags for extended key */
key_part_map ext_key_part_map; /* Bitmap of pk key parts in extension */
@@ -256,10 +256,10 @@ typedef struct user_conn {
typedef struct st_user_stats
{
- char user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
+ char user[MY_MAX(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
// Account name the user is mapped to when this is a user from mapped_user.
// Otherwise, the same value as user.
- char priv_user[max(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
+ char priv_user[MY_MAX(USERNAME_LENGTH, LIST_PROCESS_HOST_LEN) + 1];
uint user_name_length;
uint total_connections;
uint concurrent_connections;
diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc
index ed5e34463e9..5bdb0a4a235 100644
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -1254,8 +1254,9 @@ static bool fix_max_connections(sys_var *self, THD *thd, enum_var_type type)
// children, to avoid "too many connections" error in a common setup
static Sys_var_ulong Sys_max_connections(
"max_connections", "The number of simultaneous clients allowed",
- GLOBAL_VAR(max_connections), CMD_LINE(REQUIRED_ARG),
- VALID_RANGE(1, 100000), DEFAULT(151), BLOCK_SIZE(1), NO_MUTEX_GUARD,
+ PARSED_EARLY GLOBAL_VAR(max_connections), CMD_LINE(REQUIRED_ARG),
+ VALID_RANGE(1, 100000),
+ DEFAULT(MAX_CONNECTIONS_DEFAULT), BLOCK_SIZE(1), NO_MUTEX_GUARD,
NOT_IN_BINLOG, ON_CHECK(0), ON_UPDATE(fix_max_connections));
static Sys_var_ulong Sys_max_connect_errors(
@@ -2686,7 +2687,7 @@ static bool fix_autocommit(sys_var *self, THD *thd, enum_var_type type)
Don't close thread tables or release metadata locks: if we do so, we
risk releasing locks/closing tables of expressions used to assign
other variables, as in:
- set @var=my_stored_function1(), @@autocommit=1, @var2=(select max(a)
+ set @var=my_stored_function1(), @@autocommit=1, @var2=(select MY_MAX(a)
from my_table), ...
The locks will be released at statement end anyway, as SET
statement that assigns autocommit is marked to commit
diff --git a/sql/table.cc b/sql/table.cc
index c8dc2b4ed5a..d7fd370dde5 100644
--- a/sql/table.cc
+++ b/sql/table.cc
@@ -854,6 +854,8 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
share->table_charset= get_charset((((uint) head[41]) << 8) +
(uint) head[38],MYF(0));
share->null_field_first= 1;
+ share->stats_sample_pages= uint2korr(head+42);
+ share->stats_auto_recalc= static_cast<enum_stats_auto_recalc>(head[44]);
}
if (!share->table_charset)
{
@@ -883,12 +885,12 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
if (disk_buff[0] & 0x80)
{
share->keys= keys= (disk_buff[1] << 7) | (disk_buff[0] & 0x7f);
- share->key_parts= key_parts= uint2korr(disk_buff+2);
+ share->user_defined_key_parts= key_parts= uint2korr(disk_buff+2);
}
else
{
share->keys= keys= disk_buff[0];
- share->key_parts= key_parts= disk_buff[1];
+ share->user_defined_key_parts= key_parts= disk_buff[1];
}
share->keys_for_keyread.init(0);
share->keys_in_use.init(keys);
@@ -931,7 +933,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
{
keyinfo->flags= (uint) uint2korr(strpos) ^ HA_NOSAME;
keyinfo->key_length= (uint) uint2korr(strpos+2);
- keyinfo->key_parts= (uint) strpos[4];
+ keyinfo->user_defined_key_parts= (uint) strpos[4];
keyinfo->algorithm= (enum ha_key_alg) strpos[5];
keyinfo->block_size= uint2korr(strpos+6);
strpos+=8;
@@ -940,7 +942,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
{
keyinfo->flags= ((uint) strpos[0]) ^ HA_NOSAME;
keyinfo->key_length= (uint) uint2korr(strpos+1);
- keyinfo->key_parts= (uint) strpos[3];
+ keyinfo->user_defined_key_parts= (uint) strpos[3];
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
strpos+=4;
}
@@ -965,7 +967,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
first_key_parts= first_keyinfo.key_parts;
keyinfo->flags= first_keyinfo.flags;
keyinfo->key_length= first_keyinfo.key_length;
- keyinfo->key_parts= first_keyinfo.key_parts;
+ keyinfo->user_defined_key_parts= first_keyinfo.key_parts;
keyinfo->algorithm= first_keyinfo.algorithm;
if (new_frm_ver >= 3)
keyinfo->block_size= first_keyinfo.block_size;
@@ -973,7 +975,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
keyinfo->key_part= key_part;
keyinfo->rec_per_key= rec_per_key;
- for (j=keyinfo->key_parts ; j-- ; key_part++)
+ for (j=keyinfo->user_defined_key_parts ; j-- ; key_part++)
{
*rec_per_key++=0;
key_part->fieldnr= (uint16) (uint2korr(strpos) & FIELD_NR_MASK);
@@ -999,7 +1001,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
}
key_part->store_length=key_part->length;
}
- keyinfo->ext_key_parts= keyinfo->key_parts;
+ keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
keyinfo->ext_key_flags= keyinfo->flags;
keyinfo->ext_key_part_map= 0;
if (share->use_ext_keys && i)
@@ -1009,7 +1011,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
j < first_key_parts && keyinfo->ext_key_parts < MAX_REF_PARTS;
j++)
{
- uint key_parts= keyinfo->key_parts;
+ uint key_parts= keyinfo->user_defined_key_parts;
KEY_PART_INFO* curr_key_part= keyinfo->key_part;
KEY_PART_INFO* curr_key_part_end= curr_key_part+key_parts;
for ( ; curr_key_part < curr_key_part_end; curr_key_part++)
@@ -1689,7 +1691,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
keyinfo->name_length+1);
}
- if (ext_key_parts > share->key_parts && key)
+ if (ext_key_parts > share->user_defined_key_parts && key)
{
KEY_PART_INFO *new_key_part= (keyinfo-1)->key_part +
(keyinfo-1)->ext_key_parts;
@@ -1698,7 +1700,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
Do not extend the key that contains a component
defined over the beginning of a field.
*/
- for (i= 0; i < keyinfo->key_parts; i++)
+ for (i= 0; i < keyinfo->user_defined_key_parts; i++)
{
uint fieldnr= keyinfo->key_part[i].fieldnr;
if (share->field[fieldnr-1]->key_length() !=
@@ -1709,11 +1711,11 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
}
}
- if (add_first_key_parts < keyinfo->ext_key_parts-keyinfo->key_parts)
+ if (add_first_key_parts < keyinfo->ext_key_parts-keyinfo->user_defined_key_parts)
{
share->ext_key_parts-= keyinfo->ext_key_parts;
key_part_map ext_key_part_map= keyinfo->ext_key_part_map;
- keyinfo->ext_key_parts= keyinfo->key_parts;
+ keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
keyinfo->ext_key_flags= keyinfo->flags;
keyinfo->ext_key_part_map= 0;
for (i= 0; i < add_first_key_parts; i++)
@@ -1746,7 +1748,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
*/
primary_key=key;
key_part= keyinfo->key_part;
- for (i=0 ; i < keyinfo->key_parts ;i++)
+ for (i=0 ; i < keyinfo->user_defined_key_parts ;i++)
{
uint fieldnr= key_part[i].fieldnr;
if (!fieldnr ||
@@ -1762,7 +1764,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
key_part= keyinfo->key_part;
uint key_parts= share->use_ext_keys ? keyinfo->ext_key_parts :
- keyinfo->key_parts;
+ keyinfo->user_defined_key_parts;
for (i=0; i < key_parts; key_part++, i++)
{
Field *field;
@@ -1804,7 +1806,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
if (i == 0 && key != primary_key)
field->flags |= (((keyinfo->flags & HA_NOSAME) &&
- (keyinfo->key_parts == 1)) ?
+ (keyinfo->user_defined_key_parts == 1)) ?
UNIQUE_KEY_FLAG : MULTIPLE_KEY_FLAG);
if (i == 0)
field->key_start.set_bit(key);
@@ -1815,7 +1817,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
{
share->keys_for_keyread.set_bit(key);
field->part_of_key.set_bit(key);
- if (i < keyinfo->key_parts)
+ if (i < keyinfo->user_defined_key_parts)
field->part_of_key_not_clustered.set_bit(key);
}
if (handler_file->index_flags(key, i, 1) & HA_READ_ORDER)
@@ -1889,7 +1891,7 @@ static int open_binary_frm(THD *thd, TABLE_SHARE *share, uchar *head,
keyinfo->usable_key_parts= usable_parts; // Filesort
set_if_bigger(share->max_key_length,keyinfo->key_length+
- keyinfo->key_parts);
+ keyinfo->user_defined_key_parts);
share->total_key_length+= keyinfo->key_length;
/*
MERGE tables do not have unique indexes. But every key could be
@@ -2450,7 +2452,7 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
outparam->field[(uint) (share->found_next_number_field - share->field)];
/* Fix key->name and key_part->field */
- if (share->key_parts)
+ if (share->user_defined_key_parts)
{
KEY *key_info, *key_info_end;
KEY_PART_INFO *key_part;
@@ -2475,7 +2477,7 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
key_info->key_part= key_part;
key_part_end= key_part + (share->use_ext_keys ? key_info->ext_key_parts :
- key_info->key_parts) ;
+ key_info->user_defined_key_parts) ;
for ( ; key_part < key_part_end; key_part++)
{
Field *field= key_part->field= outparam->field[key_part->fieldnr - 1];
@@ -2493,7 +2495,7 @@ int open_table_from_share(THD *thd, TABLE_SHARE *share, const char *alias,
}
}
if (!share->use_ext_keys)
- key_part+= key_info->ext_key_parts - key_info->key_parts;
+ key_part+= key_info->ext_key_parts - key_info->user_defined_key_parts;
}
}
@@ -3303,11 +3305,10 @@ File create_frm(THD *thd, const char *name, const char *db,
fileinfo[39]= (uchar) ((uint) create_info->transactional |
((uint) create_info->page_checksum << 2));
fileinfo[40]= (uchar) create_info->row_type;
- /* Next few bytes where for RAID support */
+ /* Bytes 41-46 were for RAID support; now reused for other purposes */
fileinfo[41]= (uchar) (csid >> 8);
- fileinfo[42]= 0;
- fileinfo[43]= 0;
- fileinfo[44]= 0;
+ int2store(fileinfo+42, create_info->stats_sample_pages & 0xffff);
+ fileinfo[44]= (uchar) create_info->stats_auto_recalc;
fileinfo[45]= 0;
fileinfo[46]= 0;
int4store(fileinfo+47, key_length);
@@ -6068,8 +6069,8 @@ bool TABLE::add_tmp_key(uint key, uint key_parts,
return TRUE;
keyinfo= key_info + key;
keyinfo->key_part= key_part_info;
- keyinfo->usable_key_parts= keyinfo->key_parts = key_parts;
- keyinfo->ext_key_parts= keyinfo->key_parts;
+ keyinfo->usable_key_parts= keyinfo->user_defined_key_parts = key_parts;
+ keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
keyinfo->key_length=0;
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
keyinfo->flags= HA_GENERATED_KEY;
@@ -6168,7 +6169,7 @@ bool TABLE::is_filled_at_execution()
uint TABLE::actual_n_key_parts(KEY *keyinfo)
{
return optimizer_flag(in_use, OPTIMIZER_SWITCH_EXTENDED_KEYS) ?
- keyinfo->ext_key_parts : keyinfo->key_parts;
+ keyinfo->ext_key_parts : keyinfo->user_defined_key_parts;
}
diff --git a/sql/table.h b/sql/table.h
index 1a567ae75d1..531b08bf813 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -646,7 +646,8 @@ struct TABLE_SHARE
key_map keys_for_keyread;
ha_rows min_rows, max_rows; /* create information */
ulong avg_row_length; /* create information */
- ulong version, mysql_version;
+ ulong version;
+ ulong mysql_version; /* 0 if .frm is created before 5.0 */
ulong reclength; /* Recordlength */
/* Stored record length. No generated-only virtual fields are included */
ulong stored_rec_length;
@@ -668,6 +669,9 @@ struct TABLE_SHARE
uint ref_count; /* How many TABLE objects uses this */
uint blob_ptr_size; /* 4 or 8 */
uint key_block_size; /* create key_block_size, if used */
+ uint stats_sample_pages; /* number of pages to sample during
+ stats estimation, if used, otherwise 0. */
+ enum_stats_auto_recalc stats_auto_recalc; /* Automatic recalc of stats. */
uint null_bytes, last_null_bit_pos;
/*
Same as null_bytes, except that if there is only a 'delete-marker' in
diff --git a/sql/thr_malloc.cc b/sql/thr_malloc.cc
index 8c7db0673ac..548426587c5 100644
--- a/sql/thr_malloc.cc
+++ b/sql/thr_malloc.cc
@@ -134,7 +134,7 @@ char *sql_strmake_with_convert(const char *str, size_t arg_length,
if ((from_cs == &my_charset_bin) || (to_cs == &my_charset_bin))
{
// Safety if to_cs->mbmaxlen > 0
- new_length= min(arg_length, max_res_length);
+ new_length= MY_MIN(arg_length, max_res_length);
memcpy(pos, str, new_length);
}
else
diff --git a/sql/tztime.cc b/sql/tztime.cc
index b16cc65d6bb..665e624cd41 100644
--- a/sql/tztime.cc
+++ b/sql/tztime.cc
@@ -176,7 +176,7 @@ tz_load(const char *name, TIME_ZONE_INFO *sp, MEM_ROOT *storage)
uchar buf[sizeof(struct tzhead) + sizeof(my_time_t) * TZ_MAX_TIMES +
TZ_MAX_TIMES + sizeof(TRAN_TYPE_INFO) * TZ_MAX_TYPES +
#ifdef ABBR_ARE_USED
- max(TZ_MAX_CHARS + 1, (2 * (MY_TZNAME_MAX + 1))) +
+ MY_MAX(TZ_MAX_CHARS + 1, (2 * (MY_TZNAME_MAX + 1))) +
#endif
sizeof(LS_INFO) * TZ_MAX_LEAPS];
} u;
@@ -405,7 +405,7 @@ prepare_tz_info(TIME_ZONE_INFO *sp, MEM_ROOT *storage)
Let us choose end_t as point before next time type change or leap
second correction.
*/
- end_t= min((next_trans_idx < sp->timecnt) ? sp->ats[next_trans_idx] - 1:
+ end_t= MY_MIN((next_trans_idx < sp->timecnt) ? sp->ats[next_trans_idx] - 1:
MY_TIME_T_MAX,
(next_leap_idx < sp->leapcnt) ?
sp->lsis[next_leap_idx].ls_trans - 1: MY_TIME_T_MAX);
@@ -1875,7 +1875,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
uchar types[TZ_MAX_TIMES];
TRAN_TYPE_INFO ttis[TZ_MAX_TYPES];
#ifdef ABBR_ARE_USED
- char chars[max(TZ_MAX_CHARS + 1, (2 * (MY_TZNAME_MAX + 1)))];
+ char chars[MY_MAX(TZ_MAX_CHARS + 1, (2 * (MY_TZNAME_MAX + 1)))];
#endif
/*
Used as a temporary tz_info until we decide that we actually want to
@@ -1942,7 +1942,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
field->store((longlong) tzid, TRUE);
DBUG_ASSERT(field->key_length() <= sizeof(keybuff));
field->get_key_image(keybuff,
- min(field->key_length(), sizeof(keybuff)),
+ MY_MIN(field->key_length(), sizeof(keybuff)),
Field::itRAW);
if (table->file->ha_index_init(0, 1))
goto end;
@@ -1975,7 +1975,7 @@ tz_load_from_open_tables(const String *tz_name, TABLE_LIST *tz_tables)
field->store((longlong) tzid, TRUE);
DBUG_ASSERT(field->key_length() <= sizeof(keybuff));
field->get_key_image(keybuff,
- min(field->key_length(), sizeof(keybuff)),
+ MY_MIN(field->key_length(), sizeof(keybuff)),
Field::itRAW);
if (table->file->ha_index_init(0, 1))
goto end;
diff --git a/sql/unireg.cc b/sql/unireg.cc
index e40dc02c21b..4596bb52747 100644
--- a/sql/unireg.cc
+++ b/sql/unireg.cc
@@ -574,7 +574,7 @@ static uchar *pack_screens(List<Create_field> &create_fields,
}
cfield->row=(uint8) row;
cfield->col=(uint8) (length+1);
- cfield->sc_length=(uint8) min(cfield->length,cols-(length+2));
+ cfield->sc_length=(uint8) MY_MIN(cfield->length,cols-(length+2));
}
length=(uint) (pos-start_screen);
int2store(start_screen,length);
@@ -830,7 +830,7 @@ static bool pack_header(uchar *forminfo, enum legacy_db_type table_type,
DBUG_RETURN(1);
}
/* Hack to avoid bugs with small static rows in MySQL */
- reclength=max(file->min_record_length(table_options),reclength);
+ reclength=MY_MAX(file->min_record_length(table_options),reclength);
if (info_length+(ulong) create_fields.elements*FCOMP+288+
n_length+int_length+com_length+vcol_info_length > 65535L ||
int_count > 255)
diff --git a/storage/archive/ha_archive.cc b/storage/archive/ha_archive.cc
index 342f8be956e..5a6251a5de6 100644
--- a/storage/archive/ha_archive.cc
+++ b/storage/archive/ha_archive.cc
@@ -699,7 +699,7 @@ int ha_archive::create(const char *name, TABLE *table_arg,
{
KEY *pos= table_arg->key_info+key;
KEY_PART_INFO *key_part= pos->key_part;
- KEY_PART_INFO *key_part_end= key_part + pos->key_parts;
+ KEY_PART_INFO *key_part_end= key_part + pos->user_defined_key_parts;
for (; key_part != key_part_end; key_part++)
{
diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc
index 916c7b151de..c25bc4f2713 100644
--- a/storage/csv/ha_tina.cc
+++ b/storage/csv/ha_tina.cc
@@ -1308,7 +1308,7 @@ bool ha_tina::get_write_pos(my_off_t *end_pos, tina_set *closest_hole)
if (closest_hole == chain_ptr) /* no more chains */
*end_pos= file_buff->end();
else
- *end_pos= min(file_buff->end(), closest_hole->begin);
+ *end_pos= MY_MIN(file_buff->end(), closest_hole->begin);
return (closest_hole != chain_ptr) && (*end_pos == closest_hole->begin);
}
@@ -1545,7 +1545,7 @@ int ha_tina::repair(THD* thd, HA_CHECK_OPT* check_opt)
/* write repaired file */
while (1)
{
- write_end= min(file_buff->end(), current_position);
+ write_end= MY_MIN(file_buff->end(), current_position);
if ((write_end - write_begin) &&
(mysql_file_write(repair_file, (uchar*)file_buff->ptr(),
(size_t) (write_end - write_begin), MYF_RW)))
diff --git a/storage/federated/ha_federated.cc b/storage/federated/ha_federated.cc
index 0c07af0a554..333e3b0b672 100644
--- a/storage/federated/ha_federated.cc
+++ b/storage/federated/ha_federated.cc
@@ -584,7 +584,7 @@ static int parse_url_error(FEDERATED_SHARE *share, TABLE *table, int error_num)
size_t buf_len;
DBUG_ENTER("ha_federated parse_url_error");
- buf_len= min(table->s->connect_string.length,
+ buf_len= MY_MIN(table->s->connect_string.length,
FEDERATED_QUERY_BUFFER_SIZE-1);
strmake(buf, table->s->connect_string.str, buf_len);
my_error(error_num, MYF(0), buf);
@@ -1317,7 +1317,7 @@ bool ha_federated::create_where_from_key(String *to,
}
for (key_part= key_info->key_part,
- remainder= key_info->key_parts,
+ remainder= key_info->user_defined_key_parts,
length= ranges[i]->length,
ptr= ranges[i]->key; ;
remainder--,
@@ -1325,7 +1325,7 @@ bool ha_federated::create_where_from_key(String *to,
{
Field *field= key_part->field;
uint store_length= key_part->store_length;
- uint part_length= min(store_length, length);
+ uint part_length= MY_MIN(store_length, length);
needs_quotes= field->str_needs_quotes();
DBUG_DUMP("key, start of loop", ptr, length);
diff --git a/storage/federatedx/ha_federatedx.cc b/storage/federatedx/ha_federatedx.cc
index e1c2a38964a..f5cb284c7c4 100644
--- a/storage/federatedx/ha_federatedx.cc
+++ b/storage/federatedx/ha_federatedx.cc
@@ -522,7 +522,7 @@ static int parse_url_error(FEDERATEDX_SHARE *share, TABLE *table, int error_num)
int buf_len;
DBUG_ENTER("ha_federatedx parse_url_error");
- buf_len= min(table->s->connect_string.length,
+ buf_len= MY_MIN(table->s->connect_string.length,
FEDERATEDX_QUERY_BUFFER_SIZE-1);
strmake(buf, table->s->connect_string.str, buf_len);
my_error(error_num, MYF(0), buf);
@@ -1246,7 +1246,7 @@ bool ha_federatedx::create_where_from_key(String *to,
{
Field *field= key_part->field;
uint store_length= key_part->store_length;
- uint part_length= min(store_length, length);
+ uint part_length= MY_MIN(store_length, length);
needs_quotes= field->str_needs_quotes();
DBUG_DUMP("key, start of loop", ptr, length);
diff --git a/storage/heap/ha_heap.cc b/storage/heap/ha_heap.cc
index 8e63799680b..66d64c54b89 100644
--- a/storage/heap/ha_heap.cc
+++ b/storage/heap/ha_heap.cc
@@ -221,14 +221,14 @@ void ha_heap::update_key_stats()
if (key->algorithm != HA_KEY_ALG_BTREE)
{
if (key->flags & HA_NOSAME)
- key->rec_per_key[key->key_parts-1]= 1;
+ key->rec_per_key[key->user_defined_key_parts-1]= 1;
else
{
ha_rows hash_buckets= file->s->keydef[i].hash_buckets;
uint no_records= hash_buckets ? (uint) (file->s->records/hash_buckets) : 2;
if (no_records < 2)
no_records= 2;
- key->rec_per_key[key->key_parts-1]= no_records;
+ key->rec_per_key[key->user_defined_key_parts-1]= no_records;
}
}
}
@@ -611,7 +611,7 @@ ha_rows ha_heap::records_in_range(uint inx, key_range *min_key,
/* Assert that info() did run. We need current statistics here. */
DBUG_ASSERT(key_stat_version == file->s->key_stat_version);
- return key->rec_per_key[key->key_parts-1];
+ return key->rec_per_key[key->user_defined_key_parts-1];
}
@@ -630,7 +630,7 @@ heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table,
bzero(hp_create_info, sizeof(*hp_create_info));
for (key= parts= 0; key < keys; key++)
- parts+= table_arg->key_info[key].key_parts;
+ parts+= table_arg->key_info[key].user_defined_key_parts;
if (!(keydef= (HP_KEYDEF*) my_malloc(keys * sizeof(HP_KEYDEF) +
parts * sizeof(HA_KEYSEG),
@@ -641,9 +641,9 @@ heap_prepare_hp_create_info(TABLE *table_arg, bool internal_table,
{
KEY *pos= table_arg->key_info+key;
KEY_PART_INFO *key_part= pos->key_part;
- KEY_PART_INFO *key_part_end= key_part + pos->key_parts;
+ KEY_PART_INFO *key_part_end= key_part + pos->user_defined_key_parts;
- keydef[key].keysegs= (uint) pos->key_parts;
+ keydef[key].keysegs= (uint) pos->user_defined_key_parts;
keydef[key].flag= (pos->flags & (HA_NOSAME | HA_NULL_ARE_EQUAL));
keydef[key].seg= seg;
diff --git a/storage/heap/hp_create.c b/storage/heap/hp_create.c
index a8bc8e63810..e286ff69e61 100644
--- a/storage/heap/hp_create.c
+++ b/storage/heap/hp_create.c
@@ -254,18 +254,18 @@ static void init_block(HP_BLOCK *block, uint reclength, ulong min_records,
If not min_records and max_records are given, optimize for 1000 rows
*/
if (!min_records)
- min_records= min(1000, max_records);
+ min_records= MY_MIN(1000, max_records);
if (!max_records)
- max_records= max(min_records, 1000);
+ max_records= MY_MAX(min_records, 1000);
/*
We don't want too few records_in_block as otherwise the overhead of
of the HP_PTRS block will be too notable
*/
- records_in_block= max(1000, min_records);
- records_in_block= min(records_in_block, max_records);
+ records_in_block= MY_MAX(1000, min_records);
+ records_in_block= MY_MIN(records_in_block, max_records);
/* If big max_records is given, allocate bigger blocks */
- records_in_block= max(records_in_block, max_records / 10);
+ records_in_block= MY_MAX(records_in_block, max_records / 10);
/* We don't want too few blocks per row either */
if (records_in_block < 10)
records_in_block= 10;
diff --git a/storage/heap/hp_test2.c b/storage/heap/hp_test2.c
index 058a2904697..13b49fbb7ec 100644
--- a/storage/heap/hp_test2.c
+++ b/storage/heap/hp_test2.c
@@ -132,7 +132,7 @@ int main(int argc, char *argv[])
for (i=0 ; i < recant ; i++)
{
- n1=rnd(1000); n2=rnd(100); n3=rnd(min(recant*5,MAX_RECORDS));
+ n1=rnd(1000); n2=rnd(100); n3=rnd(MY_MIN(recant*5,MAX_RECORDS));
make_record(record,n1,n2,n3,"Pos",write_count);
if (heap_write(file,record))
@@ -208,7 +208,7 @@ int main(int argc, char *argv[])
printf("- Update\n");
for (i=0 ; i < write_count/10 ; i++)
{
- n1=rnd(1000); n2=rnd(100); n3=rnd(min(recant*2,MAX_RECORDS));
+ n1=rnd(1000); n2=rnd(100); n3=rnd(MY_MIN(recant*2,MAX_RECORDS));
make_record(record2, n1, n2, n3, "XXX", update);
if (rnd(2) == 1)
{
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index 318b45e43ae..ee8758a08d2 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -54,6 +54,8 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUNIV_DEBUG -DUNIV_SYNC_DEB
#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion")
#ENDIF()
+CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU)
+
IF(NOT MSVC)
# either define HAVE_IB_GCC_ATOMIC_BUILTINS or not
IF(NOT CMAKE_CROSSCOMPILING)
@@ -95,12 +97,36 @@ IF(NOT CMAKE_CROSSCOMPILING)
}"
HAVE_IB_GCC_ATOMIC_BUILTINS
)
+ CHECK_C_SOURCE_RUNS(
+ "#include<stdint.h>
+ int main()
+ {
+ int64_t x,y,res;
+
+ x = 10;
+ y = 123;
+ res = __sync_sub_and_fetch(&y, x);
+ if (res != y || y != 113) {
+ return(1);
+ }
+ res = __sync_add_and_fetch(&y, x);
+ if (res != y || y != 123) {
+ return(1);
+ }
+ return(0);
+ }"
+ HAVE_IB_GCC_ATOMIC_BUILTINS_64
+ )
ENDIF()
IF(HAVE_IB_GCC_ATOMIC_BUILTINS)
ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1)
ENDIF()
+IF(HAVE_IB_GCC_ATOMIC_BUILTINS_64)
+ ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_64=1)
+ENDIF()
+
# either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
IF(NOT CMAKE_CROSSCOMPILING)
CHECK_C_SOURCE_RUNS(
@@ -129,7 +155,8 @@ ENDIF()
ENDIF(NOT MSVC)
-SET(LINKER_SCRIPT)
+CHECK_FUNCTION_EXISTS(asprintf HAVE_ASPRINTF)
+CHECK_FUNCTION_EXISTS(vasprintf HAVE_VASPRINTF)
# Solaris atomics
IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
@@ -150,10 +177,6 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
ADD_DEFINITIONS(-DHAVE_IB_SOLARIS_ATOMICS=1)
ENDIF()
- IF(CMAKE_COMPILER_IS_GNUCC AND NOT HAVE_VISIBILITY_HIDDEN)
- SET(LINKER_SCRIPT "-Wl,-M${CMAKE_CURRENT_SOURCE_DIR}/plugin_exports")
- ENDIF()
-
IF(NOT CMAKE_CROSSCOMPILING)
# either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not
CHECK_C_SOURCE_COMPILES(
@@ -233,13 +256,16 @@ ENDIF()
IF(MSVC)
# Avoid "unreferenced label" warning in generated file
GET_FILENAME_COMPONENT(_SRC_DIR ${CMAKE_CURRENT_LIST_FILE} PATH)
- SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/pars0grm.cc
+ SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/pars0grm.c
PROPERTIES COMPILE_FLAGS "/wd4102")
- SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/lexyy.cc
+ SET_SOURCE_FILES_PROPERTIES(${_SRC_DIR}/pars/lexyy.c
PROPERTIES COMPILE_FLAGS "/wd4003")
ENDIF()
-
+
+
SET(INNOBASE_SOURCES
+ api/api0api.cc
+ api/api0misc.cc
btr/btr0btr.cc
btr/btr0cur.cc
btr/btr0pcur.cc
@@ -260,6 +286,7 @@ SET(INNOBASE_SOURCES
dict/dict0load.cc
dict/dict0mem.cc
dict/dict0stats.cc
+ dict/dict0stats_bg.cc
dyn/dyn0dyn.cc
eval/eval0eval.cc
eval/eval0proc.cc
@@ -311,9 +338,11 @@ SET(INNOBASE_SOURCES
rem/rem0rec.cc
row/row0ext.cc
row/row0ftsort.cc
+ row/row0import.cc
row/row0ins.cc
row/row0merge.cc
row/row0mysql.cc
+ row/row0log.cc
row/row0purge.cc
row/row0row.cc
row/row0sel.cc
@@ -321,6 +350,7 @@ SET(INNOBASE_SOURCES
row/row0umod.cc
row/row0undo.cc
row/row0upd.cc
+ row/row0quiesce.cc
row/row0vers.cc
srv/srv0conc.cc
srv/srv0mon.cc
@@ -355,7 +385,18 @@ IF(WITH_INNODB)
SET(WITH_INNOBASE_STORAGE_ENGINE TRUE)
ENDIF()
+
+# On solaris, reduce symbol visibility, so loader does not mix
+# the same symbols from builtin innodb and from shared one.
+# Only required for old GCC (3.4.3) that does not support hidden visibility
+IF(CMAKE_SYSTEM_NAME MATCHES "SunOS" AND CMAKE_COMPILER_IS_GNUCC
+ AND NOT HAVE_VISIBILITY_HIDDEN)
+ SET(LINKER_SCRIPT "-Wl,-M${CMAKE_CURRENT_SOURCE_DIR}/plugin_exports")
+ELSE()
+ SET(LINKER_SCRIPT)
+ENDIF()
+
MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
DEFAULT
MODULE_OUTPUT_NAME ha_innodb
- LINK_LIBRARIES ${ZLIB_LIBRARY})
+ LINK_LIBRARIES ${ZLIB_LIBRARY} ${LINKER_SCRIPT})
diff --git a/storage/innobase/api/api0api.cc b/storage/innobase/api/api0api.cc
new file mode 100644
index 00000000000..5f9762a1846
--- /dev/null
+++ b/storage/innobase/api/api0api.cc
@@ -0,0 +1,3859 @@
+/*****************************************************************************
+
+Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file api/api0api.cc
+InnoDB Native API
+
+2008-08-01 Created Sunny Bains
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+*******************************************************/
+
+#include "univ.i"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#include "api0api.h"
+#include "api0misc.h"
+#include "srv0start.h"
+#include "dict0dict.h"
+#include "btr0pcur.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "row0vers.h"
+#include "trx0roll.h"
+#include "dict0crea.h"
+#include "row0merge.h"
+#include "pars0pars.h"
+#include "lock0types.h"
+#include "row0sel.h"
+#include "lock0lock.h"
+#include "rem0cmp.h"
+#include "ut0dbg.h"
+#include "dict0priv.h"
+#include "ut0ut.h"
+#include "ha_prototypes.h"
+#include "trx0roll.h"
+
+/** configure variable for binlog option with InnoDB APIs */
+my_bool ib_binlog_enabled = FALSE;
+
+/** configure variable for MDL option with InnoDB APIs */
+my_bool ib_mdl_enabled = FALSE;
+
+/** configure variable for disable rowlock with InnoDB APIs */
+my_bool ib_disable_row_lock = FALSE;
+
+/** configure variable for Transaction isolation levels */
+ulong ib_trx_level_setting = IB_TRX_READ_UNCOMMITTED;
+
+/** configure variable for background commit interval in seconds */
+ulong ib_bk_commit_interval = 0;
+
+/** InnoDB tuple types. */
+enum ib_tuple_type_t{
+ TPL_TYPE_ROW, /*!< Data row tuple */
+ TPL_TYPE_KEY /*!< Index key tuple */
+};
+
+/** Query types supported. */
+enum ib_qry_type_t{
+ QRY_NON, /*!< None/Sentinel */
+ QRY_INS, /*!< Insert operation */
+ QRY_UPD, /*!< Update operation */
+ QRY_SEL /*!< Select operation */
+};
+
+/** Query graph types. */
+struct ib_qry_grph_t {
+ que_fork_t* ins; /*!< Innobase SQL query graph used
+ in inserts */
+ que_fork_t* upd; /*!< Innobase SQL query graph used
+ in updates or deletes */
+ que_fork_t* sel; /*!< dummy query graph used in
+ selects */
+};
+
+/** Query node types. */
+struct ib_qry_node_t {
+ ins_node_t* ins; /*!< Innobase SQL insert node
+ used to perform inserts to the table */
+ upd_node_t* upd; /*!< Innobase SQL update node
+ used to perform updates and deletes */
+ sel_node_t* sel; /*!< Innobase SQL select node
+ used to perform selects on the table */
+};
+
+/** Query processing fields. */
+struct ib_qry_proc_t {
+
+ ib_qry_node_t node; /*!< Query node*/
+
+ ib_qry_grph_t grph; /*!< Query graph */
+};
+
+/** Cursor instance for traversing tables/indexes. This will eventually
+become row_prebuilt_t. */
+struct ib_cursor_t {
+ mem_heap_t* heap; /*!< Instance heap */
+
+ mem_heap_t* query_heap; /*!< Heap to use for query graphs */
+
+ ib_qry_proc_t q_proc; /*!< Query processing info */
+
+ ib_match_mode_t match_mode; /*!< ib_cursor_moveto match mode */
+
+ row_prebuilt_t* prebuilt; /*!< For reading rows */
+
+ bool valid_trx; /*!< Valid transaction attached */
+};
+
+/** InnoDB table columns used during table and index schema creation. */
+struct ib_col_t {
+ const char* name; /*!< Name of column */
+
+ ib_col_type_t ib_col_type; /*!< Main type of the column */
+
+ ulint len; /*!< Length of the column */
+
+ ib_col_attr_t ib_col_attr; /*!< Column attributes */
+
+};
+
+/** InnoDB index columns used during index and index schema creation. */
+struct ib_key_col_t {
+ const char* name; /*!< Name of column */
+
+ ulint prefix_len; /*!< Column index prefix len or 0 */
+};
+
+struct ib_table_def_t;
+
+/** InnoDB index schema used during index creation */
+struct ib_index_def_t {
+ mem_heap_t* heap; /*!< Heap used to build this and all
+ its columns in the list */
+
+ const char* name; /*!< Index name */
+
+ dict_table_t* table; /*!< Parent InnoDB table */
+
+ ib_table_def_t* schema; /*!< Parent table schema that owns
+ this instance */
+
+ ibool clustered; /*!< True if clustered index */
+
+ ibool unique; /*!< True if unique index */
+
+ ib_vector_t* cols; /*!< Vector of columns */
+
+ trx_t* usr_trx; /*!< User transacton covering the
+ DDL operations */
+};
+
+/** InnoDB table schema used during table creation */
+struct ib_table_def_t {
+ mem_heap_t* heap; /*!< Heap used to build this and all
+ its columns in the list */
+ const char* name; /*!< Table name */
+
+ ib_tbl_fmt_t ib_tbl_fmt; /*!< Row format */
+
+ ulint page_size; /*!< Page size */
+
+ ib_vector_t* cols; /*!< Vector of columns */
+
+ ib_vector_t* indexes; /*!< Vector of indexes */
+
+ dict_table_t* table; /* Table read from or NULL */
+};
+
+/** InnoDB tuple used for key operations. */
+struct ib_tuple_t {
+ mem_heap_t* heap; /*!< Heap used to build
+ this and for copying
+ the column values. */
+
+ ib_tuple_type_t type; /*!< Tuple discriminitor. */
+
+ const dict_index_t* index; /*!< Index for tuple can be either
+ secondary or cluster index. */
+
+ dtuple_t* ptr; /*!< The internal tuple
+ instance */
+};
+
+/** The following counter is used to convey information to InnoDB
+about server activity: in selects it is not sensible to call
+srv_active_wake_master_thread after each fetch or search, we only do
+it every INNOBASE_WAKE_INTERVAL'th step. */
+
+#define INNOBASE_WAKE_INTERVAL 32
+
+/*****************************************************************//**
+Check whether the Innodb persistent cursor is positioned.
+@return IB_TRUE if positioned */
+UNIV_INLINE
+ib_bool_t
+ib_btr_cursor_is_positioned(
+/*========================*/
+ btr_pcur_t* pcur) /*!< in: InnoDB persistent cursor */
+{
+ return(pcur->old_stored == BTR_PCUR_OLD_STORED
+ && (pcur->pos_state == BTR_PCUR_IS_POSITIONED
+ || pcur->pos_state == BTR_PCUR_WAS_POSITIONED));
+}
+
+
+/********************************************************************//**
+Open a table using the table id, if found then increment table ref count.
+@return table instance if found */
+static
+dict_table_t*
+ib_open_table_by_id(
+/*================*/
+ ib_id_u64_t tid, /*!< in: table id to lookup */
+ ib_bool_t locked) /*!< in: TRUE if own dict mutex */
+{
+ dict_table_t* table;
+ table_id_t table_id;
+
+ table_id = tid;
+
+ if (!locked) {
+ dict_mutex_enter_for_mysql();
+ }
+
+ table = dict_table_open_on_id(table_id, FALSE, FALSE);
+
+ if (table != NULL && table->ibd_file_missing) {
+ table = NULL;
+ }
+
+ if (!locked) {
+ dict_mutex_exit_for_mysql();
+ }
+
+ return(table);
+}
+
+/********************************************************************//**
+Open a table using the table name, if found then increment table ref count.
+@return table instance if found */
+UNIV_INTERN
+void*
+ib_open_table_by_name(
+/*==================*/
+ const char* name) /*!< in: table name to lookup */
+{
+ dict_table_t* table;
+
+ table = dict_table_open_on_name(name, FALSE, FALSE,
+ DICT_ERR_IGNORE_NONE);
+
+ if (table != NULL && table->ibd_file_missing) {
+ table = NULL;
+ }
+
+ return(table);
+}
+
+/********************************************************************//**
+Find table using table name.
+@return table instance if found */
+static
+dict_table_t*
+ib_lookup_table_by_name(
+/*====================*/
+ const char* name) /*!< in: table name to lookup */
+{
+ dict_table_t* table;
+
+ table = dict_table_get_low(name);
+
+ if (table != NULL && table->ibd_file_missing) {
+ table = NULL;
+ }
+
+ return(table);
+}
+
+/********************************************************************//**
+Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
+time calls srv_active_wake_master_thread. This function should be used
+when a single database operation may introduce a small need for
+server utility activity, like checkpointing. */
+UNIV_INLINE
+void
+ib_wake_master_thread(void)
+/*=======================*/
+{
+ static ulint ib_signal_counter = 0;
+
+ ++ib_signal_counter;
+
+ if ((ib_signal_counter % INNOBASE_WAKE_INTERVAL) == 0) {
+ srv_active_wake_master_thread();
+ }
+}
+
+/*********************************************************************//**
+Calculate the max row size of the columns in a cluster index.
+@return max row length */
+UNIV_INLINE
+ulint
+ib_get_max_row_len(
+/*===============*/
+ dict_index_t* cluster) /*!< in: cluster index */
+{
+ ulint i;
+ ulint max_len = 0;
+ ulint n_fields = cluster->n_fields;
+
+ /* Add the size of the ordering columns in the
+ clustered index. */
+ for (i = 0; i < n_fields; ++i) {
+ const dict_col_t* col;
+
+ col = dict_index_get_nth_col(cluster, i);
+
+ /* Use the maximum output size of
+ mach_write_compressed(), although the encoded
+ length should always fit in 2 bytes. */
+ max_len += dict_col_get_max_size(col);
+ }
+
+ return(max_len);
+}
+
+/*****************************************************************//**
+Read the columns from a rec into a tuple. */
+static
+void
+ib_read_tuple(
+/*==========*/
+ const rec_t* rec, /*!< in: Record to read */
+ ib_bool_t page_format, /*!< in: IB_TRUE if compressed format */
+ ib_tuple_t* tuple) /*!< in: tuple to read into */
+{
+ ulint i;
+ void* ptr;
+ rec_t* copy;
+ ulint rec_meta_data;
+ ulint n_index_fields;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ dtuple_t* dtuple = tuple->ptr;
+ const dict_index_t* index = tuple->index;
+
+ rec_offs_init(offsets_);
+
+ offsets = rec_get_offsets(
+ rec, index, offsets, ULINT_UNDEFINED, &tuple->heap);
+
+ rec_meta_data = rec_get_info_bits(rec, page_format);
+ dtuple_set_info_bits(dtuple, rec_meta_data);
+
+ /* Make a copy of the rec. */
+ ptr = mem_heap_alloc(tuple->heap, rec_offs_size(offsets));
+ copy = rec_copy(ptr, rec, offsets);
+
+ n_index_fields = ut_min(
+ rec_offs_n_fields(offsets), dtuple_get_n_fields(dtuple));
+
+ for (i = 0; i < n_index_fields; ++i) {
+ ulint len;
+ const byte* data;
+ dfield_t* dfield;
+
+ if (tuple->type == TPL_TYPE_ROW) {
+ const dict_col_t* col;
+ ulint col_no;
+ const dict_field_t* index_field;
+
+ index_field = dict_index_get_nth_field(index, i);
+ col = dict_field_get_col(index_field);
+ col_no = dict_col_get_no(col);
+
+ dfield = dtuple_get_nth_field(dtuple, col_no);
+ } else {
+ dfield = dtuple_get_nth_field(dtuple, i);
+ }
+
+ data = rec_get_nth_field(copy, offsets, i, &len);
+
+ /* Fetch and copy any externally stored column. */
+ if (rec_offs_nth_extern(offsets, i)) {
+
+ ulint zip_size;
+
+ zip_size = dict_table_zip_size(index->table);
+
+ data = btr_rec_copy_externally_stored_field(
+ copy, offsets, zip_size, i, &len,
+ tuple->heap);
+
+ ut_a(len != UNIV_SQL_NULL);
+ }
+
+ dfield_set_data(dfield, data, len);
+ }
+}
+
+/*****************************************************************//**
+Create an InnoDB key tuple.
+@return tuple instance created, or NULL */
+static
+ib_tpl_t
+ib_key_tuple_new_low(
+/*=================*/
+ const dict_index_t* index, /*!< in: index for which tuple
+ required */
+ ulint n_cols, /*!< in: no. of user defined cols */
+ mem_heap_t* heap) /*!< in: memory heap */
+{
+ ib_tuple_t* tuple;
+ ulint i;
+ ulint n_cmp_cols;
+
+ tuple = static_cast<ib_tuple_t*>(
+ mem_heap_alloc(heap, sizeof(*tuple)));
+
+ if (tuple == NULL) {
+ mem_heap_free(heap);
+ return(NULL);
+ }
+
+ tuple->heap = heap;
+ tuple->index = index;
+ tuple->type = TPL_TYPE_KEY;
+
+ /* Is it a generated clustered index ? */
+ if (n_cols == 0) {
+ ++n_cols;
+ }
+
+ tuple->ptr = dtuple_create(heap, n_cols);
+
+ /* Copy types and set to SQL_NULL. */
+ dict_index_copy_types(tuple->ptr, index, n_cols);
+
+ for (i = 0; i < n_cols; i++) {
+
+ dfield_t* dfield;
+
+ dfield = dtuple_get_nth_field(tuple->ptr, i);
+ dfield_set_null(dfield);
+ }
+
+ n_cmp_cols = dict_index_get_n_ordering_defined_by_user(index);
+
+ dtuple_set_n_fields_cmp(tuple->ptr, n_cmp_cols);
+
+ return((ib_tpl_t) tuple);
+}
+
+/*****************************************************************//**
+Create an InnoDB key tuple.
+@return tuple instance created, or NULL */
+static
+ib_tpl_t
+ib_key_tuple_new(
+/*=============*/
+ const dict_index_t* index, /*!< in: index of tuple */
+ ulint n_cols) /*!< in: no. of user defined cols */
+{
+ mem_heap_t* heap;
+
+ heap = mem_heap_create(64);
+
+ if (heap == NULL) {
+ return(NULL);
+ }
+
+ return(ib_key_tuple_new_low(index, n_cols, heap));
+}
+
+/*****************************************************************//**
+Create an InnoDB row tuple.
+@return tuple instance, or NULL */
+static
+ib_tpl_t
+ib_row_tuple_new_low(
+/*=================*/
+ const dict_index_t* index, /*!< in: index of tuple */
+ ulint n_cols, /*!< in: no. of cols in tuple */
+ mem_heap_t* heap) /*!< in: memory heap */
+{
+ ib_tuple_t* tuple;
+
+ tuple = static_cast<ib_tuple_t*>(mem_heap_alloc(heap, sizeof(*tuple)));
+
+ if (tuple == NULL) {
+ mem_heap_free(heap);
+ return(NULL);
+ }
+
+ tuple->heap = heap;
+ tuple->index = index;
+ tuple->type = TPL_TYPE_ROW;
+
+ tuple->ptr = dtuple_create(heap, n_cols);
+
+ /* Copy types and set to SQL_NULL. */
+ dict_table_copy_types(tuple->ptr, index->table);
+
+ return((ib_tpl_t) tuple);
+}
+
+/*****************************************************************//**
+Create an InnoDB row tuple.
+@return tuple instance, or NULL */
+static
+ib_tpl_t
+ib_row_tuple_new(
+/*=============*/
+ const dict_index_t* index, /*!< in: index of tuple */
+ ulint n_cols) /*!< in: no. of cols in tuple */
+{
+ mem_heap_t* heap;
+
+ heap = mem_heap_create(64);
+
+ if (heap == NULL) {
+ return(NULL);
+ }
+
+ return(ib_row_tuple_new_low(index, n_cols, heap));
+}
+
+/*****************************************************************//**
+Begin a transaction.
+@return innobase txn handle */
+UNIV_INTERN
+ib_err_t
+ib_trx_start(
+/*=========*/
+ ib_trx_t ib_trx, /*!< in: transaction to restart */
+ ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */
+ void* thd) /*!< in: THD */
+{
+ ib_err_t err = DB_SUCCESS;
+ trx_t* trx = (trx_t*) ib_trx;
+
+ ut_a(ib_trx_level <= IB_TRX_SERIALIZABLE);
+
+ trx_start_if_not_started(trx);
+
+ trx->isolation_level = ib_trx_level;
+
+ /* FIXME: This is a place holder, we should add an arg that comes
+ from the client. */
+ trx->mysql_thd = static_cast<THD*>(thd);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Begin a transaction. This will allocate a new transaction handle.
+put the transaction in the active state.
+@return innobase txn handle */
+UNIV_INTERN
+ib_trx_t
+ib_trx_begin(
+/*=========*/
+ ib_trx_level_t ib_trx_level) /*!< in: trx isolation level */
+{
+ trx_t* trx;
+ ib_bool_t started;
+
+ trx = trx_allocate_for_mysql();
+ started = ib_trx_start((ib_trx_t) trx, ib_trx_level, NULL);
+ ut_a(started);
+
+ return((ib_trx_t) trx);
+}
+
+/*****************************************************************//**
+Get the transaction's state.
+@return transaction state */
+UNIV_INTERN
+ib_trx_state_t
+ib_trx_state(
+/*=========*/
+ ib_trx_t ib_trx) /*!< in: trx handle */
+{
+ trx_t* trx = (trx_t*) ib_trx;
+
+ return((ib_trx_state_t) trx->state);
+}
+
+/*****************************************************************//**
+Get a trx start time.
+@return trx start_time */
+UNIV_INTERN
+ib_u64_t
+ib_trx_get_start_time(
+/*==================*/
+ ib_trx_t ib_trx) /*!< in: transaction */
+{
+ trx_t* trx = (trx_t*) ib_trx;
+ return(static_cast<ib_u64_t>(trx->start_time));
+}
+/*****************************************************************//**
+Release the resources of the transaction.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_trx_release(
+/*===========*/
+ ib_trx_t ib_trx) /*!< in: trx handle */
+{
+ trx_t* trx = (trx_t*) ib_trx;
+
+ ut_ad(trx != NULL);
+ trx_free_for_mysql(trx);
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Commit a transaction. This function will also release the schema
+latches too.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_commit(
+/*==========*/
+ ib_trx_t ib_trx) /*!< in: trx handle */
+{
+ ib_err_t err = DB_SUCCESS;
+ trx_t* trx = (trx_t*) ib_trx;
+
+ if (trx->state == TRX_STATE_NOT_STARTED) {
+ err = ib_trx_release(ib_trx);
+ return(err);
+ }
+
+ trx_commit(trx);
+
+ err = ib_trx_release(ib_trx);
+ ut_a(err == DB_SUCCESS);
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Rollback a transaction. This function will also release the schema
+latches too.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_trx_rollback(
+/*============*/
+ ib_trx_t ib_trx) /*!< in: trx handle */
+{
+ ib_err_t err;
+ trx_t* trx = (trx_t*) ib_trx;
+
+ err = static_cast<ib_err_t>(trx_rollback_for_mysql(trx));
+
+ /* It should always succeed */
+ ut_a(err == DB_SUCCESS);
+
+ err = ib_trx_release(ib_trx);
+ ut_a(err == DB_SUCCESS);
+
+ ib_wake_master_thread();
+
+ return(err);
+}
+
+/*****************************************************************//**
+Find an index definition from the index vector using index name.
+@return index def. if found else NULL */
+UNIV_INLINE
+const ib_index_def_t*
+ib_table_find_index(
+/*================*/
+ ib_vector_t* indexes, /*!< in: vector of indexes */
+ const char* name) /*!< in: index name */
+{
+ ulint i;
+
+ for (i = 0; i < ib_vector_size(indexes); ++i) {
+ const ib_index_def_t* index_def;
+
+ index_def = (ib_index_def_t*) ib_vector_get(indexes, i);
+
+ if (innobase_strcasecmp(name, index_def->name) == 0) {
+ return(index_def);
+ }
+ }
+
+ return(NULL);
+}
+
+/*****************************************************************//**
+Get the InnoDB internal precise type from the schema column definition.
+@return precise type in api format */
+UNIV_INLINE
+ulint
+ib_col_get_prtype(
+/*==============*/
+ const ib_col_t* ib_col) /*!< in: column definition */
+{
+ ulint prtype = 0;
+
+ if (ib_col->ib_col_attr & IB_COL_UNSIGNED) {
+ prtype |= DATA_UNSIGNED;
+
+ ut_a(ib_col->ib_col_type == IB_INT);
+ }
+
+ if (ib_col->ib_col_attr & IB_COL_NOT_NULL) {
+ prtype |= DATA_NOT_NULL;
+ }
+
+ return(prtype);
+}
+
+/*****************************************************************//**
+Get the InnoDB internal main type from the schema column definition.
+@return column main type */
+UNIV_INLINE
+ulint
+ib_col_get_mtype(
+/*==============*/
+ const ib_col_t* ib_col) /*!< in: column definition */
+{
+ /* Note: The api0api.h types should map directly to
+ the internal numeric codes. */
+ return(ib_col->ib_col_type);
+}
+
+/*****************************************************************//**
+Find a column in the the column vector with the same name.
+@return col. def. if found else NULL */
+UNIV_INLINE
+const ib_col_t*
+ib_table_find_col(
+/*==============*/
+ const ib_vector_t* cols, /*!< in: column list head */
+ const char* name) /*!< in: column name to find */
+{
+ ulint i;
+
+ for (i = 0; i < ib_vector_size(cols); ++i) {
+ const ib_col_t* ib_col;
+
+ ib_col = static_cast<const ib_col_t*>(
+ ib_vector_get((ib_vector_t*) cols, i));
+
+ if (innobase_strcasecmp(ib_col->name, name) == 0) {
+ return(ib_col);
+ }
+ }
+
+ return(NULL);
+}
+
+/*****************************************************************//**
+Find a column in the the column list with the same name.
+@return col. def. if found else NULL */
+UNIV_INLINE
+const ib_key_col_t*
+ib_index_find_col(
+/*==============*/
+ ib_vector_t* cols, /*!< in: column list head */
+ const char* name) /*!< in: column name to find */
+{
+ ulint i;
+
+ for (i = 0; i < ib_vector_size(cols); ++i) {
+ const ib_key_col_t* ib_col;
+
+ ib_col = static_cast<ib_key_col_t*>(ib_vector_get(cols, i));
+
+ if (innobase_strcasecmp(ib_col->name, name) == 0) {
+ return(ib_col);
+ }
+ }
+
+ return(NULL);
+}
+
+#ifdef __WIN__
+/*****************************************************************//**
+Convert a string to lower case. */
+static
+void
+ib_to_lower_case(
+/*=============*/
+ char* ptr) /*!< string to convert to lower case */
+{
+ while (*ptr) {
+ *ptr = tolower(*ptr);
+ ++ptr;
+ }
+}
+#endif /* __WIN__ */
+
+/*****************************************************************//**
+Normalizes a table name string. A normalized name consists of the
+database name catenated to '/' and table name. An example:
+test/mytable. On Windows normalization puts both the database name and the
+table name always to lower case. This function can be called for system
+tables and they don't have a database component. For tables that don't have
+a database component, we don't normalize them to lower case on Windows.
+The assumption is that they are system tables that reside in the system
+table space. */
+static
+void
+ib_normalize_table_name(
+/*====================*/
+ char* norm_name, /*!< out: normalized name as a
+ null-terminated string */
+ const char* name) /*!< in: table name string */
+{
+ const char* ptr = name;
+
+ /* Scan name from the end */
+
+ ptr += ut_strlen(name) - 1;
+
+ /* Find the start of the table name. */
+ while (ptr >= name && *ptr != '\\' && *ptr != '/' && ptr > name) {
+ --ptr;
+ }
+
+
+ /* For system tables there is no '/' or dbname. */
+ ut_a(ptr >= name);
+
+ if (ptr > name) {
+ const char* db_name;
+ const char* table_name;
+
+ table_name = ptr + 1;
+
+ --ptr;
+
+ while (ptr >= name && *ptr != '\\' && *ptr != '/') {
+ ptr--;
+ }
+
+ db_name = ptr + 1;
+
+ memcpy(norm_name, db_name,
+ ut_strlen(name) + 1 - (db_name - name));
+
+ norm_name[table_name - db_name - 1] = '/';
+#ifdef __WIN__
+ ib_to_lower_case(norm_name);
+#endif
+ } else {
+ ut_strcpy(norm_name, name);
+ }
+}
+
+/*****************************************************************//**
+Check whether the table name conforms to our requirements. Currently
+we only do a simple check for the presence of a '/'.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_table_name_check(
+/*================*/
+ const char* name) /*!< in: table name to check */
+{
+ const char* slash = NULL;
+ ulint len = ut_strlen(name);
+
+ if (len < 2
+ || *name == '/'
+ || name[len - 1] == '/'
+ || (name[0] == '.' && name[1] == '/')
+ || (name[0] == '.' && name[1] == '.' && name[2] == '/')) {
+
+ return(DB_DATA_MISMATCH);
+ }
+
+ for ( ; *name; ++name) {
+#ifdef __WIN__
+ /* Check for reserved characters in DOS filenames. */
+ switch (*name) {
+ case ':':
+ case '|':
+ case '"':
+ case '*':
+ case '<':
+ case '>':
+ return(DB_DATA_MISMATCH);
+ }
+#endif /* __WIN__ */
+ if (*name == '/') {
+ if (slash) {
+ return(DB_DATA_MISMATCH);
+ }
+ slash = name;
+ }
+ }
+
+ return(slash ? DB_SUCCESS : DB_DATA_MISMATCH);
+}
+
+
+
+/*****************************************************************//**
+Get an index definition that is tagged as a clustered index.
+@return cluster index schema */
+UNIV_INLINE
+ib_index_def_t*
+ib_find_clustered_index(
+/*====================*/
+ ib_vector_t* indexes) /*!< in: index defs. to search */
+{
+ ulint i;
+ ulint n_indexes;
+
+ n_indexes = ib_vector_size(indexes);
+
+ for (i = 0; i < n_indexes; ++i) {
+ ib_index_def_t* ib_index_def;
+
+ ib_index_def = static_cast<ib_index_def_t*>(
+ ib_vector_get(indexes, i));
+
+ if (ib_index_def->clustered) {
+ return(ib_index_def);
+ }
+ }
+
+ return(NULL);
+}
+
+/*****************************************************************//**
+Get a table id. The caller must have acquired the dictionary mutex.
+@return DB_SUCCESS if found */
+static
+ib_err_t
+ib_table_get_id_low(
+/*================*/
+ const char* table_name, /*!< in: table to find */
+ ib_id_u64_t* table_id) /*!< out: table id if found */
+{
+ dict_table_t* table;
+ ib_err_t err = DB_TABLE_NOT_FOUND;
+
+ *table_id = 0;
+
+ table = ib_lookup_table_by_name(table_name);
+
+ if (table != NULL) {
+ *table_id = (table->id);
+
+ err = DB_SUCCESS;
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Create an internal cursor instance.
+@return DB_SUCCESS or err code */
+static
+ib_err_t
+ib_create_cursor(
+/*=============*/
+ ib_crsr_t* ib_crsr, /*!< out: InnoDB cursor */
+ dict_table_t* table, /*!< in: table instance */
+ dict_index_t* index, /*!< in: index to use */
+ trx_t* trx) /*!< in: transaction */
+{
+ mem_heap_t* heap;
+ ib_cursor_t* cursor;
+ ib_err_t err = DB_SUCCESS;
+
+ heap = mem_heap_create(sizeof(*cursor) * 2);
+
+ if (heap != NULL) {
+ row_prebuilt_t* prebuilt;
+
+ cursor = static_cast<ib_cursor_t*>(
+ mem_heap_zalloc(heap, sizeof(*cursor)));
+
+ cursor->heap = heap;
+
+ cursor->query_heap = mem_heap_create(64);
+
+ if (cursor->query_heap == NULL) {
+ mem_heap_free(heap);
+
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ cursor->prebuilt = row_create_prebuilt(table, 0);
+
+ prebuilt = cursor->prebuilt;
+
+ prebuilt->trx = trx;
+
+ cursor->valid_trx = TRUE;
+
+ prebuilt->table = table;
+ prebuilt->select_lock_type = LOCK_NONE;
+ prebuilt->innodb_api = TRUE;
+
+ prebuilt->index = index;
+
+ ut_a(prebuilt->index != NULL);
+
+ if (prebuilt->trx != NULL) {
+ ++prebuilt->trx->n_mysql_tables_in_use;
+
+ prebuilt->index_usable =
+ row_merge_is_index_usable(
+ prebuilt->trx, prebuilt->index);
+
+ /* Assign a read view if the transaction does
+ not have it yet */
+
+ trx_assign_read_view(prebuilt->trx);
+ }
+
+ *ib_crsr = (ib_crsr_t) cursor;
+ } else {
+ err = DB_OUT_OF_MEMORY;
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Create an internal cursor instance, and set prebuilt->index to index
+with supplied index_id.
+@return DB_SUCCESS or err code */
+static
+ib_err_t
+ib_create_cursor_with_index_id(
+/*===========================*/
+ ib_crsr_t* ib_crsr, /*!< out: InnoDB cursor */
+ dict_table_t* table, /*!< in: table instance */
+ ib_id_u64_t index_id, /*!< in: index id or 0 */
+ trx_t* trx) /*!< in: transaction */
+{
+ dict_index_t* index;
+
+ if (index_id != 0) {
+ mutex_enter(&dict_sys->mutex);
+ index = dict_index_find_on_id_low(index_id);
+ mutex_exit(&dict_sys->mutex);
+ } else {
+ index = dict_table_get_first_index(table);
+ }
+
+ return(ib_create_cursor(ib_crsr, table, index, trx));
+}
+
+/*****************************************************************//**
+Open an InnoDB table and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_open_table_using_id(
+/*==========================*/
+ ib_id_u64_t table_id, /*!< in: table id of table to open */
+ ib_trx_t ib_trx, /*!< in: Current transaction handle
+ can be NULL */
+ ib_crsr_t* ib_crsr) /*!< out,own: InnoDB cursor */
+{
+ ib_err_t err;
+ dict_table_t* table;
+
+ if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
+ table = ib_open_table_by_id(table_id, FALSE);
+ } else {
+ table = ib_open_table_by_id(table_id, TRUE);
+ }
+
+ if (table == NULL) {
+
+ return(DB_TABLE_NOT_FOUND);
+ }
+
+ err = ib_create_cursor_with_index_id(ib_crsr, table, 0,
+ (trx_t*) ib_trx);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Open an InnoDB index and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_open_index_using_id(
+/*==========================*/
+ ib_id_u64_t index_id, /*!< in: index id of index to open */
+ ib_trx_t ib_trx, /*!< in: Current transaction handle
+ can be NULL */
+ ib_crsr_t* ib_crsr) /*!< out: InnoDB cursor */
+{
+ ib_err_t err;
+ dict_table_t* table;
+ ulint table_id = (ulint)( index_id >> 32);
+
+ if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
+ table = ib_open_table_by_id(table_id, FALSE);
+ } else {
+ table = ib_open_table_by_id(table_id, TRUE);
+ }
+
+ if (table == NULL) {
+
+ return(DB_TABLE_NOT_FOUND);
+ }
+
+ /* We only return the lower 32 bits of the dulint. */
+ err = ib_create_cursor_with_index_id(
+ ib_crsr, table, index_id, (trx_t*) ib_trx);
+
+ if (ib_crsr != NULL) {
+ const ib_cursor_t* cursor;
+
+ cursor = *(ib_cursor_t**) ib_crsr;
+
+ if (cursor->prebuilt->index == NULL) {
+ ib_err_t crsr_err;
+
+ crsr_err = ib_cursor_close(*ib_crsr);
+ ut_a(crsr_err == DB_SUCCESS);
+
+ *ib_crsr = NULL;
+ }
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Open an InnoDB secondary index cursor and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_open_index_using_name(
+/*============================*/
+ ib_crsr_t ib_open_crsr, /*!< in: open/active cursor */
+ const char* index_name, /*!< in: secondary index name */
+ ib_crsr_t* ib_crsr, /*!< out,own: InnoDB index cursor */
+ int* idx_type, /*!< out: index is cluster index */
+ ib_id_u64_t* idx_id) /*!< out: index id */
+{
+ dict_table_t* table;
+ dict_index_t* index;
+ index_id_t index_id = 0;
+ ib_err_t err = DB_TABLE_NOT_FOUND;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_open_crsr;
+
+ *idx_type = 0;
+ *idx_id = 0;
+ *ib_crsr = NULL;
+
+ /* We want to increment the ref count, so we do a redundant search. */
+ table = dict_table_open_on_id(cursor->prebuilt->table->id,
+ FALSE, FALSE);
+ ut_a(table != NULL);
+
+ /* The first index is always the cluster index. */
+ index = dict_table_get_first_index(table);
+
+ /* Traverse the user defined indexes. */
+ while (index != NULL) {
+ if (innobase_strcasecmp(index->name, index_name) == 0) {
+ index_id = index->id;
+ *idx_type = index->type;
+ *idx_id = index_id;
+ break;
+ }
+ index = UT_LIST_GET_NEXT(indexes, index);
+ }
+
+ if (!index_id) {
+ dict_table_close(table, FALSE, FALSE);
+ return(DB_ERROR);
+ }
+
+ if (index_id > 0) {
+ ut_ad(index->id == index_id);
+ err = ib_create_cursor(
+ ib_crsr, table, index, cursor->prebuilt->trx);
+ }
+
+ if (*ib_crsr != NULL) {
+ const ib_cursor_t* cursor;
+
+ cursor = *(ib_cursor_t**) ib_crsr;
+
+ if (cursor->prebuilt->index == NULL) {
+ err = ib_cursor_close(*ib_crsr);
+ ut_a(err == DB_SUCCESS);
+ *ib_crsr = NULL;
+ }
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Open an InnoDB table and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_open_table(
+/*=================*/
+ const char* name, /*!< in: table name */
+ ib_trx_t ib_trx, /*!< in: Current transaction handle
+ can be NULL */
+ ib_crsr_t* ib_crsr) /*!< out,own: InnoDB cursor */
+{
+ ib_err_t err;
+ dict_table_t* table;
+ char* normalized_name;
+
+ normalized_name = static_cast<char*>(mem_alloc(ut_strlen(name) + 1));
+ ib_normalize_table_name(normalized_name, name);
+
+ if (ib_trx != NULL) {
+ if (!ib_schema_lock_is_exclusive(ib_trx)) {
+ table = (dict_table_t*)ib_open_table_by_name(
+ normalized_name);
+ } else {
+ /* NOTE: We do not acquire MySQL metadata lock */
+ table = ib_lookup_table_by_name(normalized_name);
+ }
+ } else {
+ table = (dict_table_t*)ib_open_table_by_name(normalized_name);
+ }
+
+ mem_free(normalized_name);
+ normalized_name = NULL;
+
+ /* It can happen that another thread has created the table but
+ not the cluster index or it's a broken table definition. Refuse to
+ open if that's the case. */
+ if (table != NULL && dict_table_get_first_index(table) == NULL) {
+ table = NULL;
+ }
+
+ if (table != NULL) {
+ err = ib_create_cursor_with_index_id(ib_crsr, table, 0,
+ (trx_t*) ib_trx);
+ } else {
+ err = DB_TABLE_NOT_FOUND;
+ }
+
+ return(err);
+}
+
+/********************************************************************//**
+Free a context struct for a table handle. */
+static
+void
+ib_qry_proc_free(
+/*=============*/
+ ib_qry_proc_t* q_proc) /*!< in, own: qproc struct */
+{
+ que_graph_free_recursive(q_proc->grph.ins);
+ que_graph_free_recursive(q_proc->grph.upd);
+ que_graph_free_recursive(q_proc->grph.sel);
+
+ memset(q_proc, 0x0, sizeof(*q_proc));
+}
+
+/*****************************************************************//**
+set a cursor trx to NULL */
+UNIV_INTERN
+void
+ib_cursor_clear_trx(
+/*================*/
+ ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
+{
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+
+ cursor->prebuilt->trx = NULL;
+}
+
+/*****************************************************************//**
+Reset the cursor.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_reset(
+/*============*/
+ ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
+{
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+ if (cursor->valid_trx && prebuilt->trx != NULL
+ && prebuilt->trx->n_mysql_tables_in_use > 0) {
+
+ --prebuilt->trx->n_mysql_tables_in_use;
+ }
+
+ /* The fields in this data structure are allocated from
+ the query heap and so need to be reset too. */
+ ib_qry_proc_free(&cursor->q_proc);
+
+ mem_heap_empty(cursor->query_heap);
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+update the cursor with new transactions and also reset the cursor
+@return DB_SUCCESS or err code */
+ib_err_t
+ib_cursor_new_trx(
+/*==============*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_trx_t ib_trx) /*!< in: transaction */
+{
+ ib_err_t err = DB_SUCCESS;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ trx_t* trx = (trx_t*) ib_trx;
+
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+ row_update_prebuilt_trx(prebuilt, trx);
+
+ cursor->valid_trx = TRUE;
+
+ trx_assign_read_view(prebuilt->trx);
+
+ ib_qry_proc_free(&cursor->q_proc);
+
+ mem_heap_empty(cursor->query_heap);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Commit the transaction in a cursor
+@return DB_SUCCESS or err code */
+ib_err_t
+ib_cursor_commit_trx(
+/*=================*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_trx_t ib_trx) /*!< in: transaction */
+{
+ ib_err_t err = DB_SUCCESS;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+ ut_ad(prebuilt->trx == (trx_t*) ib_trx);
+ err = ib_trx_commit(ib_trx);
+ prebuilt->trx = NULL;
+ cursor->valid_trx = FALSE;
+ return(err);
+}
+
+/*****************************************************************//**
+Close an InnoDB table and free the cursor.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_close(
+/*============*/
+ ib_crsr_t ib_crsr) /*!< in,own: InnoDB cursor */
+{
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt;
+ trx_t* trx;
+
+ if (!cursor) {
+ return(DB_SUCCESS);
+ }
+
+ prebuilt = cursor->prebuilt;
+ trx = prebuilt->trx;
+
+ ib_qry_proc_free(&cursor->q_proc);
+
+ /* The transaction could have been detached from the cursor. */
+ if (cursor->valid_trx && trx != NULL
+ && trx->n_mysql_tables_in_use > 0) {
+ --trx->n_mysql_tables_in_use;
+ }
+
+ row_prebuilt_free(prebuilt, FALSE);
+ cursor->prebuilt = NULL;
+
+ mem_heap_free(cursor->query_heap);
+ mem_heap_free(cursor->heap);
+ cursor = NULL;
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Close the table, decrement n_ref_count count.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_close_table(
+/*==================*/
+ ib_crsr_t ib_crsr) /*!< in,own: InnoDB cursor */
+{
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+ if (prebuilt && prebuilt->table) {
+ dict_table_close(prebuilt->table, FALSE, FALSE);
+ }
+
+ return(DB_SUCCESS);
+}
+/**********************************************************************//**
+Run the insert query and do error handling.
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+ib_err_t
+ib_insert_row_with_lock_retry(
+/*==========================*/
+ que_thr_t* thr, /*!< in: insert query graph */
+ ins_node_t* node, /*!< in: insert node for the query */
+ trx_savept_t* savept) /*!< in: savepoint to rollback to
+ in case of an error */
+{
+ trx_t* trx;
+ ib_err_t err;
+ ib_bool_t lock_wait;
+
+ trx = thr_get_trx(thr);
+
+ do {
+ thr->run_node = node;
+ thr->prev_node = node;
+
+ row_ins_step(thr);
+
+ err = trx->error_state;
+
+ if (err != DB_SUCCESS) {
+ que_thr_stop_for_mysql(thr);
+
+ thr->lock_state = QUE_THR_LOCK_ROW;
+ lock_wait = ib_handle_errors(&err, trx, thr, savept);
+ thr->lock_state = QUE_THR_LOCK_NOLOCK;
+ } else {
+ lock_wait = FALSE;
+ }
+ } while (lock_wait);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Write a row.
+@return DB_SUCCESS or err code */
+static
+ib_err_t
+ib_execute_insert_query_graph(
+/*==========================*/
+ dict_table_t* table, /*!< in: table where to insert */
+ que_fork_t* ins_graph, /*!< in: query graph */
+ ins_node_t* node) /*!< in: insert node */
+{
+ trx_t* trx;
+ que_thr_t* thr;
+ trx_savept_t savept;
+ ib_err_t err = DB_SUCCESS;
+
+ trx = ins_graph->trx;
+
+ savept = trx_savept_take(trx);
+
+ thr = que_fork_get_first_thr(ins_graph);
+
+ que_thr_move_to_run_state_for_mysql(thr, trx);
+
+ err = ib_insert_row_with_lock_retry(thr, node, &savept);
+
+ if (err == DB_SUCCESS) {
+ que_thr_stop_for_mysql_no_error(thr, trx);
+
+ dict_table_n_rows_inc(table);
+
+ srv_stats.n_rows_inserted.inc();
+ }
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*****************************************************************//**
+Create an insert query graph node. */
+static
+void
+ib_insert_query_graph_create(
+/*==========================*/
+ ib_cursor_t* cursor) /*!< in: Cursor instance */
+{
+ ib_qry_proc_t* q_proc = &cursor->q_proc;
+ ib_qry_node_t* node = &q_proc->node;
+ trx_t* trx = cursor->prebuilt->trx;
+
+ ut_a(trx->state != TRX_STATE_NOT_STARTED);
+
+ if (node->ins == NULL) {
+ dtuple_t* row;
+ ib_qry_grph_t* grph = &q_proc->grph;
+ mem_heap_t* heap = cursor->query_heap;
+ dict_table_t* table = cursor->prebuilt->table;
+
+ node->ins = ins_node_create(INS_DIRECT, table, heap);
+
+ node->ins->select = NULL;
+ node->ins->values_list = NULL;
+
+ row = dtuple_create(heap, dict_table_get_n_cols(table));
+ dict_table_copy_types(row, table);
+
+ ins_node_set_new_row(node->ins, row);
+
+ grph->ins = static_cast<que_fork_t*>(
+ que_node_get_parent(
+ pars_complete_graph_for_exec(node->ins, trx,
+ heap)));
+
+ grph->ins->state = QUE_FORK_ACTIVE;
+ }
+}
+
+/*****************************************************************//**
+Insert a row to a table.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_insert_row(
+/*=================*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor instance */
+ const ib_tpl_t ib_tpl) /*!< in: tuple to insert */
+{
+ ib_ulint_t i;
+ ib_qry_node_t* node;
+ ib_qry_proc_t* q_proc;
+ ulint n_fields;
+ dtuple_t* dst_dtuple;
+ ib_err_t err = DB_SUCCESS;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ const ib_tuple_t* src_tuple = (const ib_tuple_t*) ib_tpl;
+
+ ib_insert_query_graph_create(cursor);
+
+ ut_ad(src_tuple->type == TPL_TYPE_ROW);
+
+ q_proc = &cursor->q_proc;
+ node = &q_proc->node;
+
+ node->ins->state = INS_NODE_ALLOC_ROW_ID;
+ dst_dtuple = node->ins->row;
+
+ n_fields = dtuple_get_n_fields(src_tuple->ptr);
+ ut_ad(n_fields == dtuple_get_n_fields(dst_dtuple));
+
+ /* Do a shallow copy of the data fields and check for NULL
+ constraints on columns. */
+ for (i = 0; i < n_fields; i++) {
+ ulint mtype;
+ dfield_t* src_field;
+ dfield_t* dst_field;
+
+ src_field = dtuple_get_nth_field(src_tuple->ptr, i);
+
+ mtype = dtype_get_mtype(dfield_get_type(src_field));
+
+ /* Don't touch the system columns. */
+ if (mtype != DATA_SYS) {
+ ulint prtype;
+
+ prtype = dtype_get_prtype(dfield_get_type(src_field));
+
+ if ((prtype & DATA_NOT_NULL)
+ && dfield_is_null(src_field)) {
+
+ err = DB_DATA_MISMATCH;
+ break;
+ }
+
+ dst_field = dtuple_get_nth_field(dst_dtuple, i);
+ ut_ad(mtype
+ == dtype_get_mtype(dfield_get_type(dst_field)));
+
+ /* Do a shallow copy. */
+ dfield_set_data(
+ dst_field, src_field->data, src_field->len);
+
+ if (dst_field->len != IB_SQL_NULL) {
+ UNIV_MEM_ASSERT_RW(dst_field->data,
+ dst_field->len);
+ }
+ }
+ }
+
+ if (err == DB_SUCCESS) {
+ err = ib_execute_insert_query_graph(
+ src_tuple->index->table, q_proc->grph.ins, node->ins);
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Gets pointer to a prebuilt update vector used in updates.
+@return update vector */
+UNIV_INLINE
+upd_t*
+ib_update_vector_create(
+/*====================*/
+ ib_cursor_t* cursor) /*!< in: current cursor */
+{
+ trx_t* trx = cursor->prebuilt->trx;
+ mem_heap_t* heap = cursor->query_heap;
+ dict_table_t* table = cursor->prebuilt->table;
+ ib_qry_proc_t* q_proc = &cursor->q_proc;
+ ib_qry_grph_t* grph = &q_proc->grph;
+ ib_qry_node_t* node = &q_proc->node;
+
+ ut_a(trx->state != TRX_STATE_NOT_STARTED);
+
+ if (node->upd == NULL) {
+ node->upd = static_cast<upd_node_t*>(
+ row_create_update_node_for_mysql(table, heap));
+ }
+
+ grph->upd = static_cast<que_fork_t*>(
+ que_node_get_parent(
+ pars_complete_graph_for_exec(node->upd, trx, heap)));
+
+ grph->upd->state = QUE_FORK_ACTIVE;
+
+ return(node->upd->update);
+}
+
+/**********************************************************************//**
+Note that a column has changed. */
+static
+void
+ib_update_col(
+/*==========*/
+
+ ib_cursor_t* cursor, /*!< in: current cursor */
+ upd_field_t* upd_field, /*!< in/out: update field */
+ ulint col_no, /*!< in: column number */
+ dfield_t* dfield) /*!< in: updated dfield */
+{
+ ulint data_len;
+ dict_table_t* table = cursor->prebuilt->table;
+ dict_index_t* index = dict_table_get_first_index(table);
+
+ data_len = dfield_get_len(dfield);
+
+ if (data_len == UNIV_SQL_NULL) {
+ dfield_set_null(&upd_field->new_val);
+ } else {
+ dfield_copy_data(&upd_field->new_val, dfield);
+ }
+
+ upd_field->exp = NULL;
+
+ upd_field->orig_len = 0;
+
+ upd_field->field_no = dict_col_get_clust_pos(
+ &table->cols[col_no], index);
+}
+
+/**********************************************************************//**
+Checks which fields have changed in a row and stores the new data
+to an update vector.
+@return DB_SUCCESS or err code */
+static
+ib_err_t
+ib_calc_diff(
+/*=========*/
+ ib_cursor_t* cursor, /*!< in: current cursor */
+ upd_t* upd, /*!< in/out: update vector */
+ const ib_tuple_t*old_tuple, /*!< in: Old tuple in table */
+ const ib_tuple_t*new_tuple) /*!< in: New tuple to update */
+{
+ ulint i;
+ ulint n_changed = 0;
+ ib_err_t err = DB_SUCCESS;
+ ulint n_fields = dtuple_get_n_fields(new_tuple->ptr);
+
+ ut_a(old_tuple->type == TPL_TYPE_ROW);
+ ut_a(new_tuple->type == TPL_TYPE_ROW);
+ ut_a(old_tuple->index->table == new_tuple->index->table);
+
+ for (i = 0; i < n_fields; ++i) {
+ ulint mtype;
+ ulint prtype;
+ upd_field_t* upd_field;
+ dfield_t* new_dfield;
+ dfield_t* old_dfield;
+
+ new_dfield = dtuple_get_nth_field(new_tuple->ptr, i);
+ old_dfield = dtuple_get_nth_field(old_tuple->ptr, i);
+
+ mtype = dtype_get_mtype(dfield_get_type(old_dfield));
+ prtype = dtype_get_prtype(dfield_get_type(old_dfield));
+
+ /* Skip the system columns */
+ if (mtype == DATA_SYS) {
+ continue;
+
+ } else if ((prtype & DATA_NOT_NULL)
+ && dfield_is_null(new_dfield)) {
+
+ err = DB_DATA_MISMATCH;
+ break;
+ }
+
+ if (dfield_get_len(new_dfield) != dfield_get_len(old_dfield)
+ || (!dfield_is_null(old_dfield)
+ && memcmp(dfield_get_data(new_dfield),
+ dfield_get_data(old_dfield),
+ dfield_get_len(old_dfield)) != 0)) {
+
+ upd_field = &upd->fields[n_changed];
+
+ ib_update_col(cursor, upd_field, i, new_dfield);
+
+ ++n_changed;
+ }
+ }
+
+ if (err == DB_SUCCESS) {
+ upd->info_bits = 0;
+ upd->n_fields = n_changed;
+ }
+
+ return(err);
+}
+
+/**********************************************************************//**
+Run the update query and do error handling.
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+ib_err_t
+ib_update_row_with_lock_retry(
+/*==========================*/
+ que_thr_t* thr, /*!< in: Update query graph */
+ upd_node_t* node, /*!< in: Update node for the query */
+ trx_savept_t* savept) /*!< in: savepoint to rollback to
+ in case of an error */
+
+{
+ trx_t* trx;
+ ib_err_t err;
+ ib_bool_t lock_wait;
+
+ trx = thr_get_trx(thr);
+
+ do {
+ thr->run_node = node;
+ thr->prev_node = node;
+
+ row_upd_step(thr);
+
+ err = trx->error_state;
+
+ if (err != DB_SUCCESS) {
+ que_thr_stop_for_mysql(thr);
+
+ if (err != DB_RECORD_NOT_FOUND) {
+ thr->lock_state = QUE_THR_LOCK_ROW;
+
+ lock_wait = ib_handle_errors(
+ &err, trx, thr, savept);
+
+ thr->lock_state = QUE_THR_LOCK_NOLOCK;
+ } else {
+ lock_wait = FALSE;
+ }
+ } else {
+ lock_wait = FALSE;
+ }
+ } while (lock_wait);
+
+ return(err);
+}
+
+/*********************************************************************//**
+Does an update or delete of a row.
+@return DB_SUCCESS or err code */
+UNIV_INLINE
+ib_err_t
+ib_execute_update_query_graph(
+/*==========================*/
+ ib_cursor_t* cursor, /*!< in: Cursor instance */
+ btr_pcur_t* pcur) /*!< in: Btree persistent cursor */
+{
+ ib_err_t err;
+ que_thr_t* thr;
+ upd_node_t* node;
+ trx_savept_t savept;
+ trx_t* trx = cursor->prebuilt->trx;
+ dict_table_t* table = cursor->prebuilt->table;
+ ib_qry_proc_t* q_proc = &cursor->q_proc;
+
+ /* The transaction must be running. */
+ ut_a(trx->state != TRX_STATE_NOT_STARTED);
+
+ node = q_proc->node.upd;
+
+ ut_a(dict_index_is_clust(pcur->btr_cur.index));
+ btr_pcur_copy_stored_position(node->pcur, pcur);
+
+ ut_a(node->pcur->rel_pos == BTR_PCUR_ON);
+
+ savept = trx_savept_take(trx);
+
+ thr = que_fork_get_first_thr(q_proc->grph.upd);
+
+ node->state = UPD_NODE_UPDATE_CLUSTERED;
+
+ que_thr_move_to_run_state_for_mysql(thr, trx);
+
+ err = ib_update_row_with_lock_retry(thr, node, &savept);
+
+ if (err == DB_SUCCESS) {
+
+ que_thr_stop_for_mysql_no_error(thr, trx);
+
+ if (node->is_delete) {
+
+ dict_table_n_rows_dec(table);
+
+ srv_stats.n_rows_deleted.inc();
+ } else {
+ srv_stats.n_rows_updated.inc();
+ }
+
+ } else if (err == DB_RECORD_NOT_FOUND) {
+ trx->error_state = DB_SUCCESS;
+ }
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*****************************************************************//**
+Update a row in a table.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_update_row(
+/*=================*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ const ib_tpl_t ib_old_tpl, /*!< in: Old tuple in table */
+ const ib_tpl_t ib_new_tpl) /*!< in: New tuple to update */
+{
+ upd_t* upd;
+ ib_err_t err;
+ btr_pcur_t* pcur;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+ const ib_tuple_t*old_tuple = (const ib_tuple_t*) ib_old_tpl;
+ const ib_tuple_t*new_tuple = (const ib_tuple_t*) ib_new_tpl;
+
+ if (dict_index_is_clust(prebuilt->index)) {
+ pcur = &cursor->prebuilt->pcur;
+ } else if (prebuilt->need_to_access_clustered) {
+ pcur = &cursor->prebuilt->clust_pcur;
+ } else {
+ return(DB_ERROR);
+ }
+
+ ut_a(old_tuple->type == TPL_TYPE_ROW);
+ ut_a(new_tuple->type == TPL_TYPE_ROW);
+
+ upd = ib_update_vector_create(cursor);
+
+ err = ib_calc_diff(cursor, upd, old_tuple, new_tuple);
+
+ if (err == DB_SUCCESS) {
+ /* Note that this is not a delete. */
+ cursor->q_proc.node.upd->is_delete = FALSE;
+
+ err = ib_execute_update_query_graph(cursor, pcur);
+ }
+
+ return(err);
+}
+
+/**********************************************************************//**
+Build the update query graph to delete a row from an index.
+@return DB_SUCCESS or err code */
+static
+ib_err_t
+ib_delete_row(
+/*==========*/
+ ib_cursor_t* cursor, /*!< in: current cursor */
+ btr_pcur_t* pcur, /*!< in: Btree persistent cursor */
+ const rec_t* rec) /*!< in: record to delete */
+{
+ ulint i;
+ upd_t* upd;
+ ib_err_t err;
+ ib_tuple_t* tuple;
+ ib_tpl_t ib_tpl;
+ ulint n_cols;
+ upd_field_t* upd_field;
+ ib_bool_t page_format;
+ dict_table_t* table = cursor->prebuilt->table;
+ dict_index_t* index = dict_table_get_first_index(table);
+
+ n_cols = dict_index_get_n_ordering_defined_by_user(index);
+ ib_tpl = ib_key_tuple_new(index, n_cols);
+
+ if (!ib_tpl) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ tuple = (ib_tuple_t*) ib_tpl;
+
+ upd = ib_update_vector_create(cursor);
+
+ page_format = dict_table_is_comp(index->table);
+ ib_read_tuple(rec, page_format, tuple);
+
+ upd->n_fields = ib_tuple_get_n_cols(ib_tpl);
+
+ for (i = 0; i < upd->n_fields; ++i) {
+ dfield_t* dfield;
+
+ upd_field = &upd->fields[i];
+ dfield = dtuple_get_nth_field(tuple->ptr, i);
+
+ dfield_copy_data(&upd_field->new_val, dfield);
+
+ upd_field->exp = NULL;
+
+ upd_field->orig_len = 0;
+
+ upd->info_bits = 0;
+
+ upd_field->field_no = dict_col_get_clust_pos(
+ &table->cols[i], index);
+ }
+
+ /* Note that this is a delete. */
+ cursor->q_proc.node.upd->is_delete = TRUE;
+
+ err = ib_execute_update_query_graph(cursor, pcur);
+
+ ib_tuple_delete(ib_tpl);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Delete a row in a table.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_delete_row(
+/*=================*/
+ ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
+{
+ ib_err_t err;
+ btr_pcur_t* pcur;
+ dict_index_t* index;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+ index = dict_table_get_first_index(prebuilt->index->table);
+
+ /* Check whether this is a secondary index cursor */
+ if (index != prebuilt->index) {
+ if (prebuilt->need_to_access_clustered) {
+ pcur = &prebuilt->clust_pcur;
+ } else {
+ return(DB_ERROR);
+ }
+ } else {
+ pcur = &prebuilt->pcur;
+ }
+
+ if (ib_btr_cursor_is_positioned(pcur)) {
+ const rec_t* rec;
+ ib_bool_t page_format;
+ mtr_t mtr;
+
+ page_format = dict_table_is_comp(index->table);
+
+ mtr_start(&mtr);
+
+ if (btr_pcur_restore_position(
+ BTR_SEARCH_LEAF, pcur, &mtr)) {
+
+ rec = btr_pcur_get_rec(pcur);
+ } else {
+ rec = NULL;
+ }
+
+ mtr_commit(&mtr);
+
+ if (rec && !rec_get_deleted_flag(rec, page_format)) {
+ err = ib_delete_row(cursor, pcur, rec);
+ } else {
+ err = DB_RECORD_NOT_FOUND;
+ }
+ } else {
+ err = DB_RECORD_NOT_FOUND;
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Read current row.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_read_row(
+/*===============*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_tpl_t ib_tpl) /*!< out: read cols into this tuple */
+{
+ ib_err_t err;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+
+ ut_a(cursor->prebuilt->trx->state != TRX_STATE_NOT_STARTED);
+
+ /* When searching with IB_EXACT_MATCH set, row_search_for_mysql()
+ will not position the persistent cursor but will copy the record
+ found into the row cache. It should be the only entry. */
+ if (!ib_cursor_is_positioned(ib_crsr) ) {
+ err = DB_RECORD_NOT_FOUND;
+ } else {
+ mtr_t mtr;
+ btr_pcur_t* pcur;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+ if (prebuilt->need_to_access_clustered
+ && tuple->type == TPL_TYPE_ROW) {
+ pcur = &prebuilt->clust_pcur;
+ } else {
+ pcur = &prebuilt->pcur;
+ }
+
+ if (pcur == NULL) {
+ return(DB_ERROR);
+ }
+
+ mtr_start(&mtr);
+
+ if (btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr)) {
+ const rec_t* rec;
+ ib_bool_t page_format;
+
+ page_format = dict_table_is_comp(tuple->index->table);
+ rec = btr_pcur_get_rec(pcur);
+
+ if (prebuilt->innodb_api_rec &&
+ prebuilt->innodb_api_rec != rec) {
+ rec = prebuilt->innodb_api_rec;
+ }
+
+ if (!rec_get_deleted_flag(rec, page_format)) {
+ ib_read_tuple(rec, page_format, tuple);
+ err = DB_SUCCESS;
+ } else{
+ err = DB_RECORD_NOT_FOUND;
+ }
+
+ } else {
+ err = DB_RECORD_NOT_FOUND;
+ }
+
+ mtr_commit(&mtr);
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Move cursor to the first record in the table.
+@return DB_SUCCESS or err code */
+UNIV_INLINE
+ib_err_t
+ib_cursor_position(
+/*===============*/
+ ib_cursor_t* cursor, /*!< in: InnoDB cursor instance */
+ ib_srch_mode_t mode) /*!< in: Search mode */
+{
+ ib_err_t err;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+ unsigned char* buf;
+
+ buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
+
+ /* We want to position at one of the ends, row_search_for_mysql()
+ uses the search_tuple fields to work out what to do. */
+ dtuple_set_n_fields(prebuilt->search_tuple, 0);
+
+ err = static_cast<ib_err_t>(row_search_for_mysql(
+ buf, mode, prebuilt, 0, 0));
+
+ mem_free(buf);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Move cursor to the first record in the table.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_first(
+/*============*/
+ ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
+{
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+
+ return(ib_cursor_position(cursor, IB_CUR_G));
+}
+
+/*****************************************************************//**
+Move cursor to the last record in the table.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_last(
+/*===========*/
+ ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
+{
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+
+ return(ib_cursor_position(cursor, IB_CUR_L));
+}
+
+/*****************************************************************//**
+Move cursor to the next user record in the table.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_next(
+/*===========*/
+ ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
+{
+ ib_err_t err;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+ byte buf[UNIV_PAGE_SIZE_MAX];
+
+ /* We want to move to the next record */
+ dtuple_set_n_fields(prebuilt->search_tuple, 0);
+
+ err = static_cast<ib_err_t>(row_search_for_mysql(
+ buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT));
+
+ return(err);
+}
+
+/*****************************************************************//**
+Search for key.
+@return DB_SUCCESS or err code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_moveto(
+/*=============*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_tpl_t ib_tpl, /*!< in: Key to search for */
+ ib_srch_mode_t ib_srch_mode) /*!< in: search mode */
+{
+ ulint i;
+ ulint n_fields;
+ ib_err_t err = DB_SUCCESS;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+ dtuple_t* search_tuple = prebuilt->search_tuple;
+ unsigned char* buf;
+
+ ut_a(tuple->type == TPL_TYPE_KEY);
+
+ n_fields = dict_index_get_n_ordering_defined_by_user(prebuilt->index);
+
+ dtuple_set_n_fields(search_tuple, n_fields);
+ dtuple_set_n_fields_cmp(search_tuple, n_fields);
+
+ /* Do a shallow copy */
+ for (i = 0; i < n_fields; ++i) {
+ dfield_copy(dtuple_get_nth_field(search_tuple, i),
+ dtuple_get_nth_field(tuple->ptr, i));
+ }
+
+ ut_a(prebuilt->select_lock_type <= LOCK_NUM);
+
+ prebuilt->innodb_api_rec = NULL;
+
+ buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
+
+ err = static_cast<ib_err_t>(row_search_for_mysql(
+ buf, ib_srch_mode, prebuilt, cursor->match_mode, 0));
+
+ mem_free(buf);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Set the cursor search mode. */
+UNIV_INTERN
+void
+ib_cursor_set_match_mode(
+/*=====================*/
+ ib_crsr_t ib_crsr, /*!< in: Cursor instance */
+ ib_match_mode_t match_mode) /*!< in: ib_cursor_moveto match mode */
+{
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+
+ cursor->match_mode = match_mode;
+}
+
+/*****************************************************************//**
+Get the dfield instance for the column in the tuple.
+@return dfield instance in tuple */
+UNIV_INLINE
+dfield_t*
+ib_col_get_dfield(
+/*==============*/
+ ib_tuple_t* tuple, /*!< in: tuple instance */
+ ulint col_no) /*!< in: col no. in tuple */
+{
+ dfield_t* dfield;
+
+ dfield = dtuple_get_nth_field(tuple->ptr, col_no);
+
+ return(dfield);
+}
+
+/*****************************************************************//**
+Predicate to check whether a column type contains variable length data.
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+ib_err_t
+ib_col_is_capped(
+/*==============*/
+ const dtype_t* dtype) /*!< in: column type */
+{
+ return(static_cast<ib_err_t>(
+ (dtype_get_mtype(dtype) == DATA_VARCHAR
+ || dtype_get_mtype(dtype) == DATA_CHAR
+ || dtype_get_mtype(dtype) == DATA_MYSQL
+ || dtype_get_mtype(dtype) == DATA_VARMYSQL
+ || dtype_get_mtype(dtype) == DATA_FIXBINARY
+ || dtype_get_mtype(dtype) == DATA_BINARY)
+ && dtype_get_len(dtype) > 0));
+}
+
+/*****************************************************************//**
+Set a column of the tuple. Make a copy using the tuple's heap.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_col_set_value(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t col_no, /*!< in: column index in tuple */
+ const void* src, /*!< in: data value */
+ ib_ulint_t len) /*!< in: data value len */
+{
+ const dtype_t* dtype;
+ dfield_t* dfield;
+ void* dst = NULL;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+
+ dfield = ib_col_get_dfield(tuple, col_no);
+
+ /* User wants to set the column to NULL. */
+ if (len == IB_SQL_NULL) {
+ dfield_set_null(dfield);
+ return(DB_SUCCESS);
+ }
+
+ dtype = dfield_get_type(dfield);
+
+ /* Not allowed to update system columns. */
+ if (dtype_get_mtype(dtype) == DATA_SYS) {
+ return(DB_DATA_MISMATCH);
+ }
+
+ dst = dfield_get_data(dfield);
+
+ /* Since TEXT/CLOB also map to DATA_VARCHAR we need to make an
+ exception. Perhaps we need to set the precise type and check
+ for that. */
+ if (ib_col_is_capped(dtype)) {
+
+ len = ut_min(len, dtype_get_len(dtype));
+
+ if (dst == NULL || len > dfield_get_len(dfield)) {
+ dst = mem_heap_alloc(tuple->heap, dtype_get_len(dtype));
+ ut_a(dst != NULL);
+ }
+ } else if (dst == NULL || len > dfield_get_len(dfield)) {
+ dst = mem_heap_alloc(tuple->heap, len);
+ }
+
+ if (dst == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ switch (dtype_get_mtype(dtype)) {
+ case DATA_INT: {
+
+ if (dtype_get_len(dtype) == len) {
+ ibool usign;
+
+ usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
+ mach_write_int_type(static_cast<byte*>(dst),
+ static_cast<const byte*>(src),
+ len, usign);
+
+ } else {
+ return(DB_DATA_MISMATCH);
+ }
+ break;
+ }
+
+ case DATA_FLOAT:
+ if (len == sizeof(float)) {
+ mach_float_write(static_cast<byte*>(dst), *(float*)src);
+ } else {
+ return(DB_DATA_MISMATCH);
+ }
+ break;
+
+ case DATA_DOUBLE:
+ if (len == sizeof(double)) {
+ mach_double_write(static_cast<byte*>(dst),
+ *(double*)src);
+ } else {
+ return(DB_DATA_MISMATCH);
+ }
+ break;
+
+ case DATA_SYS:
+ ut_error;
+ break;
+
+ case DATA_CHAR: {
+ ulint pad_char = ULINT_UNDEFINED;
+
+ pad_char = dtype_get_pad_char(
+ dtype_get_mtype(dtype), dtype_get_prtype(dtype));
+
+ ut_a(pad_char != ULINT_UNDEFINED);
+
+ memset((byte*) dst + len,
+ pad_char,
+ dtype_get_len(dtype) - len);
+
+ memcpy(dst, src, len);
+
+ len = dtype_get_len(dtype);
+ break;
+ }
+ case DATA_BLOB:
+ case DATA_BINARY:
+ case DATA_MYSQL:
+ case DATA_DECIMAL:
+ case DATA_VARCHAR:
+ case DATA_VARMYSQL:
+ case DATA_FIXBINARY:
+ memcpy(dst, src, len);
+ break;
+
+ default:
+ ut_error;
+ }
+
+ if (dst != dfield_get_data(dfield)) {
+ dfield_set_data(dfield, dst, len);
+ } else {
+ dfield_set_len(dfield, len);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Get the size of the data available in a column of the tuple.
+@return bytes avail or IB_SQL_NULL */
+UNIV_INTERN
+ib_ulint_t
+ib_col_get_len(
+/*===========*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t i) /*!< in: column index in tuple */
+{
+ const dfield_t* dfield;
+ ulint data_len;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+
+ dfield = ib_col_get_dfield(tuple, i);
+
+ data_len = dfield_get_len(dfield);
+
+ return(data_len == UNIV_SQL_NULL ? IB_SQL_NULL : data_len);
+}
+
+/*****************************************************************//**
+Copy a column value from the tuple.
+@return bytes copied or IB_SQL_NULL */
+UNIV_INLINE
+ib_ulint_t
+ib_col_copy_value_low(
+/*==================*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t i, /*!< in: column index in tuple */
+ void* dst, /*!< out: copied data value */
+ ib_ulint_t len) /*!< in: max data value len to copy */
+{
+ const void* data;
+ const dfield_t* dfield;
+ ulint data_len;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+
+ dfield = ib_col_get_dfield(tuple, i);
+
+ data = dfield_get_data(dfield);
+ data_len = dfield_get_len(dfield);
+
+ if (data_len != UNIV_SQL_NULL) {
+
+ const dtype_t* dtype = dfield_get_type(dfield);
+
+ switch (dtype_get_mtype(dfield_get_type(dfield))) {
+ case DATA_INT: {
+ ibool usign;
+ ullint ret;
+
+ ut_a(data_len == len);
+
+ usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
+ ret = mach_read_int_type(static_cast<const byte*>(data),
+ data_len, usign);
+
+ if (usign) {
+ if (len == 2) {
+ *(ib_i16_t*)dst = (ib_i16_t)ret;
+ } else if (len == 4) {
+ *(ib_i32_t*)dst = (ib_i32_t)ret;
+ } else {
+ *(ib_i64_t*)dst = (ib_i64_t)ret;
+ }
+ } else {
+ if (len == 2) {
+ *(ib_u16_t*)dst = (ib_i16_t)ret;
+ } else if (len == 4) {
+ *(ib_u32_t*)dst = (ib_i32_t)ret;
+ } else {
+ *(ib_u64_t*)dst = (ib_i64_t)ret;
+ }
+ }
+
+ break;
+ }
+ case DATA_FLOAT:
+ if (len == data_len) {
+ float f;
+
+ ut_a(data_len == sizeof(f));
+ f = mach_float_read(static_cast<const byte*>(
+ data));
+ memcpy(dst, &f, sizeof(f));
+ } else {
+ data_len = 0;
+ }
+ break;
+ case DATA_DOUBLE:
+ if (len == data_len) {
+ double d;
+
+ ut_a(data_len == sizeof(d));
+ d = mach_double_read(static_cast<const byte*>(
+ data));
+ memcpy(dst, &d, sizeof(d));
+ } else {
+ data_len = 0;
+ }
+ break;
+ default:
+ data_len = ut_min(data_len, len);
+ memcpy(dst, data, data_len);
+ }
+ } else {
+ data_len = IB_SQL_NULL;
+ }
+
+ return(data_len);
+}
+
+/*****************************************************************//**
+Copy a column value from the tuple.
+@return bytes copied or IB_SQL_NULL */
+UNIV_INTERN
+ib_ulint_t
+ib_col_copy_value(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t i, /*!< in: column index in tuple */
+ void* dst, /*!< out: copied data value */
+ ib_ulint_t len) /*!< in: max data value len to copy */
+{
+ return(ib_col_copy_value_low(ib_tpl, i, dst, len));
+}
+
+/*****************************************************************//**
+Get the InnoDB column attribute from the internal column precise type.
+@return precise type in api format */
+UNIV_INLINE
+ib_col_attr_t
+ib_col_get_attr(
+/*============*/
+ ulint prtype) /*!< in: column definition */
+{
+ ib_col_attr_t attr = IB_COL_NONE;
+
+ if (prtype & DATA_UNSIGNED) {
+ attr = static_cast<ib_col_attr_t>(attr | IB_COL_UNSIGNED);
+ }
+
+ if (prtype & DATA_NOT_NULL) {
+ attr = static_cast<ib_col_attr_t>(attr | IB_COL_NOT_NULL);
+ }
+
+ return(attr);
+}
+
+/*****************************************************************//**
+Get a column name from the tuple.
+@return name of the column */
+UNIV_INTERN
+const char*
+ib_col_get_name(
+/*============*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_ulint_t i) /*!< in: column index in tuple */
+{
+ const char* name;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ dict_table_t* table = cursor->prebuilt->table;
+ dict_col_t* col = dict_table_get_nth_col(table, i);
+ ulint col_no = dict_col_get_no(col);
+
+ name = dict_table_get_col_name(table, col_no);
+
+ return(name);
+}
+
+/*****************************************************************//**
+Get an index field name from the cursor.
+@return name of the field */
+UNIV_INTERN
+const char*
+ib_get_idx_field_name(
+/*==================*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_ulint_t i) /*!< in: column index in tuple */
+{
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ dict_index_t* index = cursor->prebuilt->index;
+ dict_field_t* field;
+
+ if (index) {
+ field = dict_index_get_nth_field(cursor->prebuilt->index, i);
+
+ if (field) {
+ return(field->name);
+ }
+ }
+
+ return(NULL);
+}
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return len of column data */
+UNIV_INLINE
+ib_ulint_t
+ib_col_get_meta_low(
+/*================*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t i, /*!< in: column index in tuple */
+ ib_col_meta_t* ib_col_meta) /*!< out: column meta data */
+{
+ ib_u16_t prtype;
+ const dfield_t* dfield;
+ ulint data_len;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+
+ dfield = ib_col_get_dfield(tuple, i);
+
+ data_len = dfield_get_len(dfield);
+
+ /* We assume 1-1 mapping between the ENUM and internal type codes. */
+ ib_col_meta->type = static_cast<ib_col_type_t>(
+ dtype_get_mtype(dfield_get_type(dfield)));
+
+ ib_col_meta->type_len = dtype_get_len(dfield_get_type(dfield));
+
+ prtype = (ib_u16_t) dtype_get_prtype(dfield_get_type(dfield));
+
+ ib_col_meta->attr = ib_col_get_attr(prtype);
+ ib_col_meta->client_type = prtype & DATA_MYSQL_TYPE_MASK;
+
+ return(data_len);
+}
+
+/*************************************************************//**
+Read a signed int 8 bit column from an InnoDB tuple. */
+UNIV_INLINE
+ib_err_t
+ib_tuple_check_int(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_bool_t usign, /*!< in: true if unsigned */
+ ulint size) /*!< in: size of integer */
+{
+ ib_col_meta_t ib_col_meta;
+
+ ib_col_get_meta_low(ib_tpl, i, &ib_col_meta);
+
+ if (ib_col_meta.type != IB_INT) {
+ return(DB_DATA_MISMATCH);
+ } else if (ib_col_meta.type_len == IB_SQL_NULL) {
+ return(DB_UNDERFLOW);
+ } else if (ib_col_meta.type_len != size) {
+ return(DB_DATA_MISMATCH);
+ } else if ((ib_col_meta.attr & IB_COL_UNSIGNED) && !usign) {
+ return(DB_DATA_MISMATCH);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*************************************************************//**
+Read a signed int 8 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_i8(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i8_t* ival) /*!< out: integer value */
+{
+ ib_err_t err;
+
+ err = ib_tuple_check_int(ib_tpl, i, IB_FALSE, sizeof(*ival));
+
+ if (err == DB_SUCCESS) {
+ ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+ }
+
+ return(err);
+}
+
+/*************************************************************//**
+Read an unsigned int 8 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_u8(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u8_t* ival) /*!< out: integer value */
+{
+ ib_err_t err;
+
+ err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
+
+ if (err == DB_SUCCESS) {
+ ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+ }
+
+ return(err);
+}
+
+/*************************************************************//**
+Read a signed int 16 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_i16(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i16_t* ival) /*!< out: integer value */
+{
+ ib_err_t err;
+
+ err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
+
+ if (err == DB_SUCCESS) {
+ ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+ }
+
+ return(err);
+}
+
+/*************************************************************//**
+Read an unsigned int 16 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_u16(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u16_t* ival) /*!< out: integer value */
+{
+ ib_err_t err;
+
+ err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
+
+ if (err == DB_SUCCESS) {
+ ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+ }
+
+ return(err);
+}
+
+/*************************************************************//**
+Read a signed int 32 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_i32(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i32_t* ival) /*!< out: integer value */
+{
+ ib_err_t err;
+
+ err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
+
+ if (err == DB_SUCCESS) {
+ ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+ }
+
+ return(err);
+}
+
+/*************************************************************//**
+Read an unsigned int 32 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_u32(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u32_t* ival) /*!< out: integer value */
+{
+ ib_err_t err;
+
+ err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
+
+ if (err == DB_SUCCESS) {
+ ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+ }
+
+ return(err);
+}
+
+/*************************************************************//**
+Read a signed int 64 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_i64(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i64_t* ival) /*!< out: integer value */
+{
+ ib_err_t err;
+
+ err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
+
+ if (err == DB_SUCCESS) {
+ ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+ }
+
+ return(err);
+}
+
+/*************************************************************//**
+Read an unsigned int 64 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_u64(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u64_t* ival) /*!< out: integer value */
+{
+ ib_err_t err;
+
+ err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
+
+ if (err == DB_SUCCESS) {
+ ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Get a column value pointer from the tuple.
+@return NULL or pointer to buffer */
+UNIV_INTERN
+const void*
+ib_col_get_value(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t i) /*!< in: column index in tuple */
+{
+ const void* data;
+ const dfield_t* dfield;
+ ulint data_len;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+
+ dfield = ib_col_get_dfield(tuple, i);
+
+ data = dfield_get_data(dfield);
+ data_len = dfield_get_len(dfield);
+
+ return(data_len != UNIV_SQL_NULL ? data : NULL);
+}
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return len of column data */
+UNIV_INTERN
+ib_ulint_t
+ib_col_get_meta(
+/*============*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t i, /*!< in: column index in tuple */
+ ib_col_meta_t* ib_col_meta) /*!< out: column meta data */
+{
+ return(ib_col_get_meta_low(ib_tpl, i, ib_col_meta));
+}
+
+/*****************************************************************//**
+"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
+@return new tuple, or NULL */
+UNIV_INTERN
+ib_tpl_t
+ib_tuple_clear(
+/*============*/
+ ib_tpl_t ib_tpl) /*!< in,own: tuple (will be freed) */
+{
+ const dict_index_t* index;
+ ulint n_cols;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+ ib_tuple_type_t type = tuple->type;
+ mem_heap_t* heap = tuple->heap;
+
+ index = tuple->index;
+ n_cols = dtuple_get_n_fields(tuple->ptr);
+
+ mem_heap_empty(heap);
+
+ if (type == TPL_TYPE_ROW) {
+ return(ib_row_tuple_new_low(index, n_cols, heap));
+ } else {
+ return(ib_key_tuple_new_low(index, n_cols, heap));
+ }
+}
+
+/*****************************************************************//**
+Create a new cluster key search tuple and copy the contents of the
+secondary index key tuple columns that refer to the cluster index record
+to the cluster key. It does a deep copy of the column data.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_tuple_get_cluster_key(
+/*=====================*/
+ ib_crsr_t ib_crsr, /*!< in: secondary index cursor */
+ ib_tpl_t* ib_dst_tpl, /*!< out,own: destination tuple */
+ const ib_tpl_t ib_src_tpl) /*!< in: source tuple */
+{
+ ulint i;
+ ulint n_fields;
+ ib_err_t err = DB_SUCCESS;
+ ib_tuple_t* dst_tuple = NULL;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ ib_tuple_t* src_tuple = (ib_tuple_t*) ib_src_tpl;
+ dict_index_t* clust_index;
+
+ clust_index = dict_table_get_first_index(cursor->prebuilt->table);
+
+ /* We need to ensure that the src tuple belongs to the same table
+ as the open cursor and that it's not a tuple for a cluster index. */
+ if (src_tuple->type != TPL_TYPE_KEY) {
+ return(DB_ERROR);
+ } else if (src_tuple->index->table != cursor->prebuilt->table) {
+ return(DB_DATA_MISMATCH);
+ } else if (src_tuple->index == clust_index) {
+ return(DB_ERROR);
+ }
+
+ /* Create the cluster index key search tuple. */
+ *ib_dst_tpl = ib_clust_search_tuple_create(ib_crsr);
+
+ if (!*ib_dst_tpl) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ dst_tuple = (ib_tuple_t*) *ib_dst_tpl;
+ ut_a(dst_tuple->index == clust_index);
+
+ n_fields = dict_index_get_n_unique(dst_tuple->index);
+
+ /* Do a deep copy of the data fields. */
+ for (i = 0; i < n_fields; i++) {
+ ulint pos;
+ dfield_t* src_field;
+ dfield_t* dst_field;
+
+ pos = dict_index_get_nth_field_pos(
+ src_tuple->index, dst_tuple->index, i);
+
+ ut_a(pos != ULINT_UNDEFINED);
+
+ src_field = dtuple_get_nth_field(src_tuple->ptr, pos);
+ dst_field = dtuple_get_nth_field(dst_tuple->ptr, i);
+
+ if (!dfield_is_null(src_field)) {
+ UNIV_MEM_ASSERT_RW(src_field->data, src_field->len);
+
+ dst_field->data = mem_heap_dup(
+ dst_tuple->heap,
+ src_field->data,
+ src_field->len);
+
+ dst_field->len = src_field->len;
+ } else {
+ dfield_set_null(dst_field);
+ }
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Copy the contents of source tuple to destination tuple. The tuples
+must be of the same type and belong to the same table/index.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_tuple_copy(
+/*==========*/
+ ib_tpl_t ib_dst_tpl, /*!< in: destination tuple */
+ const ib_tpl_t ib_src_tpl) /*!< in: source tuple */
+{
+ ulint i;
+ ulint n_fields;
+ ib_err_t err = DB_SUCCESS;
+ const ib_tuple_t*src_tuple = (const ib_tuple_t*) ib_src_tpl;
+ ib_tuple_t* dst_tuple = (ib_tuple_t*) ib_dst_tpl;
+
+ /* Make sure src and dst are not the same. */
+ ut_a(src_tuple != dst_tuple);
+
+ /* Make sure they are the same type and refer to the same index. */
+ if (src_tuple->type != dst_tuple->type
+ || src_tuple->index != dst_tuple->index) {
+
+ return(DB_DATA_MISMATCH);
+ }
+
+ n_fields = dtuple_get_n_fields(src_tuple->ptr);
+ ut_ad(n_fields == dtuple_get_n_fields(dst_tuple->ptr));
+
+ /* Do a deep copy of the data fields. */
+ for (i = 0; i < n_fields; ++i) {
+ dfield_t* src_field;
+ dfield_t* dst_field;
+
+ src_field = dtuple_get_nth_field(src_tuple->ptr, i);
+ dst_field = dtuple_get_nth_field(dst_tuple->ptr, i);
+
+ if (!dfield_is_null(src_field)) {
+ UNIV_MEM_ASSERT_RW(src_field->data, src_field->len);
+
+ dst_field->data = mem_heap_dup(
+ dst_tuple->heap,
+ src_field->data,
+ src_field->len);
+
+ dst_field->len = src_field->len;
+ } else {
+ dfield_set_null(dst_field);
+ }
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return own: Tuple for current index */
+UNIV_INTERN
+ib_tpl_t
+ib_sec_search_tuple_create(
+/*=======================*/
+ ib_crsr_t ib_crsr) /*!< in: Cursor instance */
+{
+ ulint n_cols;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ dict_index_t* index = cursor->prebuilt->index;
+
+ n_cols = dict_index_get_n_unique_in_tree(index);
+ return(ib_key_tuple_new(index, n_cols));
+}
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return own: Tuple for current index */
+UNIV_INTERN
+ib_tpl_t
+ib_sec_read_tuple_create(
+/*=====================*/
+ ib_crsr_t ib_crsr) /*!< in: Cursor instance */
+{
+ ulint n_cols;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ dict_index_t* index = cursor->prebuilt->index;
+
+ n_cols = dict_index_get_n_fields(index);
+ return(ib_row_tuple_new(index, n_cols));
+}
+
+/*****************************************************************//**
+Create an InnoDB tuple used for table key operations.
+@return own: Tuple for current table */
+UNIV_INTERN
+ib_tpl_t
+ib_clust_search_tuple_create(
+/*=========================*/
+ ib_crsr_t ib_crsr) /*!< in: Cursor instance */
+{
+ ulint n_cols;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ dict_index_t* index;
+
+ index = dict_table_get_first_index(cursor->prebuilt->table);
+
+ n_cols = dict_index_get_n_ordering_defined_by_user(index);
+ return(ib_key_tuple_new(index, n_cols));
+}
+
+/*****************************************************************//**
+Create an InnoDB tuple for table row operations.
+@return own: Tuple for current table */
+UNIV_INTERN
+ib_tpl_t
+ib_clust_read_tuple_create(
+/*=======================*/
+ ib_crsr_t ib_crsr) /*!< in: Cursor instance */
+{
+ ulint n_cols;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ dict_index_t* index;
+
+ index = dict_table_get_first_index(cursor->prebuilt->table);
+
+ n_cols = dict_table_get_n_cols(cursor->prebuilt->table);
+ return(ib_row_tuple_new(index, n_cols));
+}
+
+/*****************************************************************//**
+Return the number of user columns in the tuple definition.
+@return number of user columns */
+UNIV_INTERN
+ib_ulint_t
+ib_tuple_get_n_user_cols(
+/*=====================*/
+ const ib_tpl_t ib_tpl) /*!< in: Tuple for current table */
+{
+ const ib_tuple_t* tuple = (const ib_tuple_t*) ib_tpl;
+
+ if (tuple->type == TPL_TYPE_ROW) {
+ return(dict_table_get_n_user_cols(tuple->index->table));
+ }
+
+ return(dict_index_get_n_ordering_defined_by_user(tuple->index));
+}
+
+/*****************************************************************//**
+Return the number of columns in the tuple definition.
+@return number of columns */
+UNIV_INTERN
+ib_ulint_t
+ib_tuple_get_n_cols(
+/*================*/
+ const ib_tpl_t ib_tpl) /*!< in: Tuple for table/index */
+{
+ const ib_tuple_t* tuple = (const ib_tuple_t*) ib_tpl;
+
+ return(dtuple_get_n_fields(tuple->ptr));
+}
+
+/*****************************************************************//**
+Destroy an InnoDB tuple. */
+UNIV_INTERN
+void
+ib_tuple_delete(
+/*============*/
+ ib_tpl_t ib_tpl) /*!< in,own: Tuple instance to delete */
+{
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+
+ if (!ib_tpl) {
+ return;
+ }
+
+ mem_heap_free(tuple->heap);
+}
+
+/*****************************************************************//**
+Get a table id. This function will acquire the dictionary mutex.
+@return DB_SUCCESS if found */
+UNIV_INTERN
+ib_err_t
+ib_table_get_id(
+/*============*/
+ const char* table_name, /*!< in: table to find */
+ ib_id_u64_t* table_id) /*!< out: table id if found */
+{
+ ib_err_t err;
+
+ dict_mutex_enter_for_mysql();
+
+ err = ib_table_get_id_low(table_name, table_id);
+
+ dict_mutex_exit_for_mysql();
+
+ return(err);
+}
+
+/*****************************************************************//**
+Get an index id.
+@return DB_SUCCESS if found */
+UNIV_INTERN
+ib_err_t
+ib_index_get_id(
+/*============*/
+ const char* table_name, /*!< in: find index for this table */
+ const char* index_name, /*!< in: index to find */
+ ib_id_u64_t* index_id) /*!< out: index id if found */
+{
+ dict_table_t* table;
+ char* normalized_name;
+ ib_err_t err = DB_TABLE_NOT_FOUND;
+
+ *index_id = 0;
+
+ normalized_name = static_cast<char*>(
+ mem_alloc(ut_strlen(table_name) + 1));
+ ib_normalize_table_name(normalized_name, table_name);
+
+ table = ib_lookup_table_by_name(normalized_name);
+
+ mem_free(normalized_name);
+ normalized_name = NULL;
+
+ if (table != NULL) {
+ dict_index_t* index;
+
+ index = dict_table_get_index_on_name(table, index_name);
+
+ if (index != NULL) {
+ /* We only support 32 bit table and index ids. Because
+ we need to pack the table id into the index id. */
+
+ *index_id = (table->id);
+ *index_id <<= 32;
+ *index_id |= (index->id);
+
+ err = DB_SUCCESS;
+ }
+ }
+
+ return(err);
+}
+
+#ifdef __WIN__
+#define SRV_PATH_SEPARATOR '\\'
+#else
+#define SRV_PATH_SEPARATOR '/'
+#endif
+
+
+/*****************************************************************//**
+Check if cursor is positioned.
+@return IB_TRUE if positioned */
+UNIV_INTERN
+ib_bool_t
+ib_cursor_is_positioned(
+/*====================*/
+ const ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
+{
+ const ib_cursor_t* cursor = (const ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+ return(ib_btr_cursor_is_positioned(&prebuilt->pcur));
+}
+
+
+/*****************************************************************//**
+Checks if the data dictionary is latched in exclusive mode.
+@return TRUE if exclusive latch */
+UNIV_INTERN
+ib_bool_t
+ib_schema_lock_is_exclusive(
+/*========================*/
+ const ib_trx_t ib_trx) /*!< in: transaction */
+{
+ const trx_t* trx = (const trx_t*) ib_trx;
+
+ return(trx->dict_operation_lock_mode == RW_X_LATCH);
+}
+
+/*****************************************************************//**
+Checks if the data dictionary is latched in shared mode.
+@return TRUE if shared latch */
+UNIV_INTERN
+ib_bool_t
+ib_schema_lock_is_shared(
+/*=====================*/
+ const ib_trx_t ib_trx) /*!< in: transaction */
+{
+ const trx_t* trx = (const trx_t*) ib_trx;
+
+ return(trx->dict_operation_lock_mode == RW_S_LATCH);
+}
+
+/*****************************************************************//**
+Set the Lock an InnoDB cursor/table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_lock(
+/*===========*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */
+{
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+ trx_t* trx = prebuilt->trx;
+ dict_table_t* table = prebuilt->table;
+
+ return(ib_trx_lock_table_with_retry(
+ trx, table, (enum lock_mode) ib_lck_mode));
+}
+
+/*****************************************************************//**
+Set the Lock an InnoDB table using the table id.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_table_lock(
+/*==========*/
+ ib_trx_t ib_trx, /*!< in/out: transaction */
+ ib_id_u64_t table_id, /*!< in: table id */
+ ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */
+{
+ ib_err_t err;
+ que_thr_t* thr;
+ mem_heap_t* heap;
+ dict_table_t* table;
+ ib_qry_proc_t q_proc;
+ trx_t* trx = (trx_t*) ib_trx;
+
+ ut_a(trx->state != TRX_STATE_NOT_STARTED);
+
+ table = ib_open_table_by_id(table_id, FALSE);
+
+ if (table == NULL) {
+ return(DB_TABLE_NOT_FOUND);
+ }
+
+ ut_a(ib_lck_mode <= static_cast<ib_lck_mode_t>(LOCK_NUM));
+
+ heap = mem_heap_create(128);
+
+ q_proc.node.sel = sel_node_create(heap);
+
+ thr = pars_complete_graph_for_exec(q_proc.node.sel, trx, heap);
+
+ q_proc.grph.sel = static_cast<que_fork_t*>(que_node_get_parent(thr));
+ q_proc.grph.sel->state = QUE_FORK_ACTIVE;
+
+ trx->op_info = "setting table lock";
+
+ ut_a(ib_lck_mode == IB_LOCK_IS || ib_lck_mode == IB_LOCK_IX);
+ err = static_cast<ib_err_t>(
+ lock_table(0, table, (enum lock_mode) ib_lck_mode, thr));
+
+ trx->error_state = err;
+
+ mem_heap_free(heap);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Unlock an InnoDB table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_unlock(
+/*=============*/
+ ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
+{
+ ib_err_t err = DB_SUCCESS;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+ if (prebuilt->trx->mysql_n_tables_locked > 0) {
+ --prebuilt->trx->mysql_n_tables_locked;
+ } else {
+ err = DB_ERROR;
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Set the Lock mode of the cursor.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_set_lock_mode(
+/*====================*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */
+{
+ ib_err_t err = DB_SUCCESS;
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+ ut_a(ib_lck_mode <= static_cast<ib_lck_mode_t>(LOCK_NUM));
+
+ if (ib_lck_mode == IB_LOCK_X) {
+ err = ib_cursor_lock(ib_crsr, IB_LOCK_IX);
+ } else if (ib_lck_mode == IB_LOCK_S) {
+ err = ib_cursor_lock(ib_crsr, IB_LOCK_IS);
+ }
+
+ if (err == DB_SUCCESS) {
+ prebuilt->select_lock_type = (enum lock_mode) ib_lck_mode;
+ ut_a(prebuilt->trx->state != TRX_STATE_NOT_STARTED);
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Set need to access clustered index record. */
+UNIV_INTERN
+void
+ib_cursor_set_cluster_access(
+/*=========================*/
+ ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
+{
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+ prebuilt->need_to_access_clustered = TRUE;
+}
+
+/*************************************************************//**
+Convert and write an INT column value to an InnoDB tuple.
+@return DB_SUCCESS or error */
+UNIV_INLINE
+ib_err_t
+ib_tuple_write_int(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ ulint col_no, /*!< in: column number */
+ const void* value, /*!< in: integer value */
+ ulint value_len) /*!< in: sizeof value type */
+{
+ const dfield_t* dfield;
+ ulint data_len;
+ ulint type_len;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+
+ ut_a(col_no < ib_tuple_get_n_cols(ib_tpl));
+
+ dfield = ib_col_get_dfield(tuple, col_no);
+
+ data_len = dfield_get_len(dfield);
+ type_len = dtype_get_len(dfield_get_type(dfield));
+
+ if (dtype_get_mtype(dfield_get_type(dfield)) != DATA_INT
+ || value_len != data_len) {
+
+ return(DB_DATA_MISMATCH);
+ }
+
+ return(ib_col_set_value(ib_tpl, col_no, value, type_len));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_i8(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i8_t val) /*!< in: value to write */
+{
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_i16(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i16_t val) /*!< in: value to write */
+{
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_i32(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i32_t val) /*!< in: value to write */
+{
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_i64(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i64_t val) /*!< in: value to write */
+{
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_u8(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_u8_t val) /*!< in: value to write */
+{
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_u16(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tupe to write to */
+ int col_no, /*!< in: column number */
+ ib_u16_t val) /*!< in: value to write */
+{
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_u32(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_u32_t val) /*!< in: value to write */
+{
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_u64(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_u64_t val) /*!< in: value to write */
+{
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+}
+
+/*****************************************************************//**
+Inform the cursor that it's the start of an SQL statement. */
+UNIV_INTERN
+void
+ib_cursor_stmt_begin(
+/*=================*/
+ ib_crsr_t ib_crsr) /*!< in: cursor */
+{
+ ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
+
+ cursor->prebuilt->sql_stat_start = TRUE;
+}
+
+/*****************************************************************//**
+Write a double value to a column.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_double(
+/*==================*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ double val) /*!< in: value to write */
+{
+ const dfield_t* dfield;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+
+ dfield = ib_col_get_dfield(tuple, col_no);
+
+ if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_DOUBLE) {
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ } else {
+ return(DB_DATA_MISMATCH);
+ }
+}
+
+/*************************************************************//**
+Read a double column value from an InnoDB tuple.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_double(
+/*=================*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t col_no, /*!< in: column number */
+ double* dval) /*!< out: double value */
+{
+ ib_err_t err;
+ const dfield_t* dfield;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+
+ dfield = ib_col_get_dfield(tuple, col_no);
+
+ if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_DOUBLE) {
+ ib_col_copy_value_low(ib_tpl, col_no, dval, sizeof(*dval));
+ err = DB_SUCCESS;
+ } else {
+ err = DB_DATA_MISMATCH;
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Write a float value to a column.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_write_float(
+/*=================*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ float val) /*!< in: value to write */
+{
+ const dfield_t* dfield;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+
+ dfield = ib_col_get_dfield(tuple, col_no);
+
+ if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_FLOAT) {
+ return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val)));
+ } else {
+ return(DB_DATA_MISMATCH);
+ }
+}
+
+/*************************************************************//**
+Read a float value from an InnoDB tuple.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+ib_err_t
+ib_tuple_read_float(
+/*================*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t col_no, /*!< in: column number */
+ float* fval) /*!< out: float value */
+{
+ ib_err_t err;
+ const dfield_t* dfield;
+ ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
+
+ dfield = ib_col_get_dfield(tuple, col_no);
+
+ if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_FLOAT) {
+ ib_col_copy_value_low(ib_tpl, col_no, fval, sizeof(*fval));
+ err = DB_SUCCESS;
+ } else {
+ err = DB_DATA_MISMATCH;
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Truncate a table. The cursor handle will be closed and set to NULL
+on success.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_cursor_truncate(
+/*===============*/
+ ib_crsr_t* ib_crsr, /*!< in/out: cursor for table
+ to truncate */
+ ib_id_u64_t* table_id) /*!< out: new table id */
+{
+ ib_err_t err;
+ ib_cursor_t* cursor = *(ib_cursor_t**) ib_crsr;
+ row_prebuilt_t* prebuilt = cursor->prebuilt;
+
+ *table_id = 0;
+
+ err = ib_cursor_lock(*ib_crsr, IB_LOCK_X);
+
+ if (err == DB_SUCCESS) {
+ trx_t* trx;
+ dict_table_t* table = prebuilt->table;
+
+ /* We are going to free the cursor and the prebuilt. Store
+ the transaction handle locally. */
+ trx = prebuilt->trx;
+ err = ib_cursor_close(*ib_crsr);
+ ut_a(err == DB_SUCCESS);
+
+ *ib_crsr = NULL;
+
+ /* A temp go around for assertion in trx_start_for_ddl_low
+ we already start the trx */
+ if (trx->state == TRX_STATE_ACTIVE) {
+#ifdef UNIV_DEBUG
+ trx->start_file = 0;
+#endif /* UNIV_DEBUG */
+ trx->dict_operation = TRX_DICT_OP_TABLE;
+ }
+
+ /* This function currently commits the transaction
+ on success. */
+ err = static_cast<ib_err_t>(
+ row_truncate_table_for_mysql(table, trx));
+
+ if (err == DB_SUCCESS) {
+ *table_id = (table->id);
+ }
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Truncate a table.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+ib_err_t
+ib_table_truncate(
+/*==============*/
+ const char* table_name, /*!< in: table name */
+ ib_id_u64_t* table_id) /*!< out: new table id */
+{
+ ib_err_t err;
+ dict_table_t* table;
+ ib_err_t trunc_err;
+ ib_trx_t ib_trx = NULL;
+ ib_crsr_t ib_crsr = NULL;
+
+ ib_trx = ib_trx_begin(IB_TRX_SERIALIZABLE);
+
+ dict_mutex_enter_for_mysql();
+
+ table = dict_table_open_on_name(table_name, TRUE, FALSE,
+ DICT_ERR_IGNORE_NONE);
+
+ if (table != NULL && dict_table_get_first_index(table)) {
+ err = ib_create_cursor_with_index_id(&ib_crsr, table, 0,
+ (trx_t*) ib_trx);
+ } else {
+ err = DB_TABLE_NOT_FOUND;
+ }
+
+ dict_mutex_exit_for_mysql();
+
+ if (err == DB_SUCCESS) {
+ trunc_err = ib_cursor_truncate(&ib_crsr, table_id);
+ ut_a(err == DB_SUCCESS);
+ } else {
+ trunc_err = err;
+ }
+
+ if (ib_crsr != NULL) {
+ err = ib_cursor_close(ib_crsr);
+ ut_a(err == DB_SUCCESS);
+ }
+
+ if (trunc_err == DB_SUCCESS) {
+ ut_a(ib_trx_state(ib_trx) == static_cast<ib_trx_state_t>(
+ TRX_STATE_NOT_STARTED));
+
+ err = ib_trx_release(ib_trx);
+ ut_a(err == DB_SUCCESS);
+ } else {
+ err = ib_trx_rollback(ib_trx);
+ ut_a(err == DB_SUCCESS);
+ }
+
+ return(trunc_err);
+}
+
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return 0 or error number */
+UNIV_INTERN
+ib_err_t
+ib_close_thd(
+/*=========*/
+ void* thd) /*!< in: handle to the MySQL thread of the user
+ whose resources should be free'd */
+{
+ innobase_close_thd(static_cast<THD*>(thd));
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Return isolation configuration set by "innodb_api_trx_level"
+@return trx isolation level*/
+UNIV_INTERN
+ib_trx_state_t
+ib_cfg_trx_level()
+/*==============*/
+{
+ return(static_cast<ib_trx_state_t>(ib_trx_level_setting));
+}
+
+/*****************************************************************//**
+Return configure value for background commit interval (in seconds)
+@return background commit interval (in seconds) */
+UNIV_INTERN
+ib_ulint_t
+ib_cfg_bk_commit_interval()
+/*=======================*/
+{
+ return(static_cast<ib_ulint_t>(ib_bk_commit_interval));
+}
+
+/*****************************************************************//**
+Get generic configure status
+@return configure status*/
+UNIV_INTERN
+int
+ib_cfg_get_cfg()
+/*============*/
+{
+ int cfg_status;
+
+ cfg_status = (ib_binlog_enabled) ? IB_CFG_BINLOG_ENABLED : 0;
+
+ if (ib_mdl_enabled) {
+ cfg_status |= IB_CFG_MDL_ENABLED;
+ }
+
+ if (ib_disable_row_lock) {
+ cfg_status |= IB_CFG_DISABLE_ROWLOCK;
+ }
+
+ return(cfg_status);
+}
diff --git a/storage/innobase/api/api0misc.cc b/storage/innobase/api/api0misc.cc
new file mode 100644
index 00000000000..b2370105938
--- /dev/null
+++ b/storage/innobase/api/api0misc.cc
@@ -0,0 +1,206 @@
+/*****************************************************************************
+
+Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file api/api0misc.cc
+InnoDB Native API
+
+2008-08-01 Created by Sunny Bains
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+*******************************************************/
+
+#include <errno.h>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+
+#include "api0misc.h"
+#include "trx0roll.h"
+#include "srv0srv.h"
+#include "dict0mem.h"
+#include "dict0dict.h"
+#include "pars0pars.h"
+#include "row0sel.h"
+#include "lock0lock.h"
+#include "ha_prototypes.h"
+#include <m_ctype.h>
+#include <mysys_err.h>
+#include <mysql/plugin.h>
+
+/*********************************************************************//**
+Sets a lock on a table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+ib_trx_lock_table_with_retry(
+/*=========================*/
+ trx_t* trx, /*!< in/out: transaction */
+ dict_table_t* table, /*!< in: table to lock */
+ enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */
+{
+ que_thr_t* thr;
+ dberr_t err;
+ mem_heap_t* heap;
+ sel_node_t* node;
+
+ heap = mem_heap_create(512);
+
+ trx->op_info = "setting table lock";
+
+ node = sel_node_create(heap);
+ thr = pars_complete_graph_for_exec(node, trx, heap);
+ thr->graph->state = QUE_FORK_ACTIVE;
+
+ /* We use the select query graph as the dummy graph needed
+ in the lock module call */
+
+ thr = que_fork_get_first_thr(static_cast<que_fork_t*>(
+ que_node_get_parent(thr)));
+ que_thr_move_to_run_state_for_mysql(thr, trx);
+
+run_again:
+ thr->run_node = thr;
+ thr->prev_node = thr->common.parent;
+
+ err = lock_table(0, table, mode, thr);
+
+ trx->error_state = err;
+
+ if (UNIV_LIKELY(err == DB_SUCCESS)) {
+ que_thr_stop_for_mysql_no_error(thr, trx);
+ } else {
+ que_thr_stop_for_mysql(thr);
+
+ if (err != DB_QUE_THR_SUSPENDED) {
+ ibool was_lock_wait;
+
+ was_lock_wait = ib_handle_errors(&err, trx, thr, NULL);
+
+ if (was_lock_wait) {
+ goto run_again;
+ }
+ } else {
+ que_thr_t* run_thr;
+ que_node_t* parent;
+
+ parent = que_node_get_parent(thr);
+ run_thr = que_fork_start_command(
+ static_cast<que_fork_t*>(parent));
+
+ ut_a(run_thr == thr);
+
+ /* There was a lock wait but the thread was not
+ in a ready to run or running state. */
+ trx->error_state = DB_LOCK_WAIT;
+
+ goto run_again;
+ }
+ }
+
+ que_graph_free(thr->graph);
+ trx->op_info = "";
+
+ return(err);
+}
+/****************************************************************//**
+Handles user errors and lock waits detected by the database engine.
+@return TRUE if it was a lock wait and we should continue running
+the query thread */
+UNIV_INTERN
+ibool
+ib_handle_errors(
+/*=============*/
+ dberr_t* new_err,/*!< out: possible new error encountered in
+ lock wait, or if no new error, the value
+ of trx->error_state at the entry of this
+ function */
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t* thr, /*!< in: query thread */
+ trx_savept_t* savept) /*!< in: savepoint or NULL */
+{
+ dberr_t err;
+handle_new_error:
+ err = trx->error_state;
+
+ ut_a(err != DB_SUCCESS);
+
+ trx->error_state = DB_SUCCESS;
+
+ switch (err) {
+ case DB_LOCK_WAIT_TIMEOUT:
+ trx_rollback_for_mysql(trx);
+ break;
+ /* fall through */
+ case DB_DUPLICATE_KEY:
+ case DB_FOREIGN_DUPLICATE_KEY:
+ case DB_TOO_BIG_RECORD:
+ case DB_ROW_IS_REFERENCED:
+ case DB_NO_REFERENCED_ROW:
+ case DB_CANNOT_ADD_CONSTRAINT:
+ case DB_TOO_MANY_CONCURRENT_TRXS:
+ case DB_OUT_OF_FILE_SPACE:
+ if (savept) {
+ /* Roll back the latest, possibly incomplete
+ insertion or update */
+
+ trx_rollback_to_savepoint(trx, savept);
+ }
+ break;
+ case DB_LOCK_WAIT:
+ lock_wait_suspend_thread(thr);
+
+ if (trx->error_state != DB_SUCCESS) {
+ que_thr_stop_for_mysql(thr);
+
+ goto handle_new_error;
+ }
+
+ *new_err = err;
+
+ return(TRUE); /* Operation needs to be retried. */
+
+ case DB_DEADLOCK:
+ case DB_LOCK_TABLE_FULL:
+ /* Roll back the whole transaction; this resolution was added
+ to version 3.23.43 */
+
+ trx_rollback_for_mysql(trx);
+ break;
+
+ case DB_MUST_GET_MORE_FILE_SPACE:
+
+ exit(1);
+
+ case DB_CORRUPTION:
+ case DB_FOREIGN_EXCEED_MAX_CASCADE:
+ break;
+ default:
+ ut_error;
+ }
+
+ if (trx->error_state != DB_SUCCESS) {
+ *new_err = trx->error_state;
+ } else {
+ *new_err = err;
+ }
+
+ trx->error_state = DB_SUCCESS;
+
+ return(FALSE);
+}
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 8b7a19777ab..e3e127c3ace 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -697,14 +698,16 @@ btr_root_fseg_validate(
#endif /* UNIV_BTR_DEBUG */
/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return root page, x-latched */
+Gets the root node of a tree and x- or s-latches it.
+@return root page, x- or s-latched */
static
buf_block_t*
btr_root_block_get(
/*===============*/
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
+ const dict_index_t* index, /*!< in: index tree */
+ ulint mode, /*!< in: either RW_S_LATCH
+ or RW_X_LATCH */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint space;
ulint zip_size;
@@ -715,8 +718,7 @@ btr_root_block_get(
zip_size = dict_table_zip_size(index->table);
root_page_no = dict_index_get_page(index);
- block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH,
- index, mtr);
+ block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr);
btr_assert_not_corrupted(block, index);
#ifdef UNIV_BTR_DEBUG
if (!dict_index_is_ibuf(index)) {
@@ -739,10 +741,162 @@ UNIV_INTERN
page_t*
btr_root_get(
/*=========*/
+ const dict_index_t* index, /*!< in: index tree */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ return(buf_block_get_frame(btr_root_block_get(index, RW_X_LATCH,
+ mtr)));
+}
+
+/**************************************************************//**
+Gets the height of the B-tree (the level of the root, when the leaf
+level is assumed to be 0). The caller must hold an S or X latch on
+the index.
+@return tree height (level of the root) */
+UNIV_INTERN
+ulint
+btr_height_get(
+/*===========*/
dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ulint height;
+ buf_block_t* root_block;
+
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK)
+ || mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_X_LOCK));
+
+ /* S latches the page */
+ root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
+
+ height = btr_page_get_level(buf_block_get_frame(root_block), mtr);
+
+ /* Release the S latch on the root page. */
+ mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
+#ifdef UNIV_SYNC_DEBUG
+ sync_thread_reset_level(&root_block->lock);
+#endif /* UNIV_SYNC_DEBUG */
+
+ return(height);
+}
+
+/**************************************************************//**
+Checks a file segment header within a B-tree root page and updates
+the segment header space id.
+@return TRUE if valid */
+static
+bool
+btr_root_fseg_adjust_on_import(
+/*===========================*/
+ fseg_header_t* seg_header, /*!< in/out: segment header */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page,
+ or NULL */
+ ulint space, /*!< in: tablespace identifier */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
- return(buf_block_get_frame(btr_root_block_get(index, mtr)));
+ ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
+
+ if (offset < FIL_PAGE_DATA
+ || offset > UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) {
+
+ return(FALSE);
+
+ } else if (page_zip) {
+ mach_write_to_4(seg_header + FSEG_HDR_SPACE, space);
+ page_zip_write_header(page_zip, seg_header + FSEG_HDR_SPACE,
+ 4, mtr);
+ } else {
+ mlog_write_ulint(seg_header + FSEG_HDR_SPACE,
+ space, MLOG_4BYTES, mtr);
+ }
+
+ return(TRUE);
+}
+
+/**************************************************************//**
+Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
+@return error code, or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+btr_root_adjust_on_import(
+/*======================*/
+ const dict_index_t* index) /*!< in: index tree */
+{
+ dberr_t err;
+ mtr_t mtr;
+ page_t* page;
+ buf_block_t* block;
+ page_zip_des_t* page_zip;
+ dict_table_t* table = index->table;
+ ulint space_id = dict_index_get_space(index);
+ ulint zip_size = dict_table_zip_size(table);
+ ulint root_page_no = dict_index_get_page(index);
+
+ mtr_start(&mtr);
+
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
+ return(DB_CORRUPTION););
+
+ block = btr_block_get(
+ space_id, zip_size, root_page_no, RW_X_LATCH, index, &mtr);
+
+ page = buf_block_get_frame(block);
+ page_zip = buf_block_get_page_zip(block);
+
+ /* Check that this is a B-tree page and both the PREV and NEXT
+ pointers are FIL_NULL, because the root page does not have any
+ siblings. */
+ if (fil_page_get_type(page) != FIL_PAGE_INDEX
+ || fil_page_get_prev(page) != FIL_NULL
+ || fil_page_get_next(page) != FIL_NULL) {
+
+ err = DB_CORRUPTION;
+
+ } else if (dict_index_is_clust(index)) {
+ bool page_is_compact_format;
+
+ page_is_compact_format = page_is_comp(page) > 0;
+
+ /* Check if the page format and table format agree. */
+ if (page_is_compact_format != dict_table_is_comp(table)) {
+ err = DB_CORRUPTION;
+ } else {
+
+ /* Check that the table flags and the tablespace
+ flags match. */
+ ulint flags = fil_space_get_flags(table->space);
+
+ if (flags
+ && flags != dict_tf_to_fsp_flags(table->flags)) {
+
+ err = DB_CORRUPTION;
+ } else {
+ err = DB_SUCCESS;
+ }
+ }
+ } else {
+ err = DB_SUCCESS;
+ }
+
+ /* Check and adjust the file segment headers, if all OK so far. */
+ if (err == DB_SUCCESS
+ && (!btr_root_fseg_adjust_on_import(
+ FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ + page, page_zip, space_id, &mtr)
+ || !btr_root_fseg_adjust_on_import(
+ FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ + page, page_zip, space_id, &mtr))) {
+
+ err = DB_CORRUPTION;
+ }
+
+ mtr_commit(&mtr);
+
+ return(err);
}
/*************************************************************//**
@@ -1033,8 +1187,7 @@ btr_get_size(
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_S_LOCK));
- if (index->page == FIL_NULL
- || index->to_be_dropped
+ if (index->page == FIL_NULL || dict_index_is_online_ddl(index)
|| *index->name == TEMP_INDEX_PREFIX) {
return(ULINT_UNDEFINED);
}
@@ -1584,6 +1737,8 @@ btr_page_reorganize_low(
there cannot exist locks on the
page, and a hash index should not be
dropped: it cannot exist */
+ ulint compression_level,/*!< in: compression level to be used
+ if dealing with compressed page */
buf_block_t* block, /*!< in: page to be reorganized */
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr) /*!< in: mtr */
@@ -1601,6 +1756,8 @@ btr_page_reorganize_low(
ulint max_ins_size1;
ulint max_ins_size2;
ibool success = FALSE;
+ byte type;
+ byte* log_ptr;
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
btr_assert_not_corrupted(block, index);
@@ -1612,9 +1769,23 @@ btr_page_reorganize_low(
#ifndef UNIV_HOTBACKUP
/* Write the log record */
- mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
- ? MLOG_COMP_PAGE_REORGANIZE
- : MLOG_PAGE_REORGANIZE, 0);
+ if (page_zip) {
+ type = MLOG_ZIP_PAGE_REORGANIZE;
+ } else if (page_is_comp(page)) {
+ type = MLOG_COMP_PAGE_REORGANIZE;
+ } else {
+ type = MLOG_PAGE_REORGANIZE;
+ }
+
+ log_ptr = mlog_open_and_write_index(
+ mtr, page, index, type, page_zip ? 1 : 0);
+
+ /* For compressed pages write the compression level. */
+ if (log_ptr && page_zip) {
+ mach_write_to_1(log_ptr, compression_level);
+ mlog_close(mtr, log_ptr + 1);
+ }
+
#endif /* !UNIV_HOTBACKUP */
/* Turn logging off */
@@ -1662,7 +1833,9 @@ btr_page_reorganize_low(
ut_ad(max_trx_id != 0 || recovery);
}
- if (page_zip && !page_zip_compress(page_zip, page, index, NULL)) {
+ if (page_zip
+ && !page_zip_compress(page_zip, page, index,
+ compression_level, NULL)) {
/* Restore the old page and exit. */
btr_blob_dbg_restore(page, temp_page, index,
@@ -1750,7 +1923,8 @@ btr_page_reorganize(
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr) /*!< in: mtr */
{
- return(btr_page_reorganize_low(FALSE, block, index, mtr));
+ return(btr_page_reorganize_low(FALSE, page_compression_level,
+ block, index, mtr));
}
#endif /* !UNIV_HOTBACKUP */
@@ -1762,18 +1936,32 @@ byte*
btr_parse_page_reorganize(
/*======================*/
byte* ptr, /*!< in: buffer */
- byte* end_ptr __attribute__((unused)),
- /*!< in: buffer end */
+ byte* end_ptr,/*!< in: buffer end */
dict_index_t* index, /*!< in: record descriptor */
+ bool compressed,/*!< in: true if compressed page */
buf_block_t* block, /*!< in: page to be reorganized, or NULL */
mtr_t* mtr) /*!< in: mtr or NULL */
{
+ ulint level = page_compression_level;
+
ut_ad(ptr && end_ptr);
- /* The record is empty, except for the record initial part */
+ /* If dealing with a compressed page the record has the
+ compression level used during original compression written in
+ one byte. Otherwise record is empty. */
+ if (compressed) {
+ if (ptr == end_ptr) {
+ return(NULL);
+ }
+
+ level = (ulint)mach_read_from_1(ptr);
+
+ ut_a(level <= 9);
+ ++ptr;
+ }
if (block != NULL) {
- btr_page_reorganize_low(TRUE, block, index, mtr);
+ btr_page_reorganize_low(TRUE, level, block, index, mtr);
}
return(ptr);
@@ -1827,10 +2015,13 @@ UNIV_INTERN
rec_t*
btr_root_raise_and_insert(
/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
+ ulint** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mtr */
@@ -1840,7 +2031,6 @@ btr_root_raise_and_insert(
page_t* new_page;
ulint new_page_no;
rec_t* rec;
- mem_heap_t* heap;
dtuple_t* node_ptr;
ulint level;
rec_t* node_ptr_rec;
@@ -1926,7 +2116,9 @@ btr_root_raise_and_insert(
lock_update_root_raise(new_block, root_block);
/* Create a memory heap where the node pointer is stored */
- heap = mem_heap_create(100);
+ if (!*heap) {
+ *heap = mem_heap_create(1000);
+ }
rec = page_rec_get_next(page_get_infimum_rec(new_page));
new_page_no = buf_block_get_page_no(new_block);
@@ -1934,8 +2126,8 @@ btr_root_raise_and_insert(
/* Build the node pointer (= node key and page address) for the
child */
- node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
- level);
+ node_ptr = dict_index_build_node_ptr(
+ index, rec, new_page_no, *heap, level);
/* The node pointer must be marked as the predefined minimum record,
as there is no lower alphabetical limit to records in the leftmost
node of a level: */
@@ -1961,15 +2153,12 @@ btr_root_raise_and_insert(
page_cur_set_before_first(root_block, page_cursor);
node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
- index, 0, mtr);
+ index, offsets, heap, 0, mtr);
/* The root page should only contain the node pointer
to new_page at this point. Thus, the data should fit. */
ut_a(node_ptr_rec);
- /* Free the memory heap */
- mem_heap_free(heap);
-
/* We play safe and reset the free bits for the new page */
#if 0
@@ -1985,7 +2174,8 @@ btr_root_raise_and_insert(
PAGE_CUR_LE, page_cursor);
/* Split the child and insert tuple */
- return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
+ return(btr_page_split_and_insert(flags, cursor, offsets, heap,
+ tuple, n_ext, mtr));
}
/*************************************************************//**
@@ -2213,9 +2403,9 @@ func_exit:
/*************************************************************//**
Returns TRUE if the insert fits on the appropriate half-page with the
chosen split_rec.
-@return TRUE if fits */
-static
-ibool
+@return true if fits */
+static __attribute__((nonnull(1,3,4,6), warn_unused_result))
+bool
btr_page_insert_fits(
/*=================*/
btr_cur_t* cursor, /*!< in: cursor at which insert
@@ -2223,11 +2413,11 @@ btr_page_insert_fits(
const rec_t* split_rec,/*!< in: suggestion for first record
on upper half-page, or NULL if
tuple to be inserted should be first */
- const ulint* offsets,/*!< in: rec_get_offsets(
- split_rec, cursor->index) */
+ ulint** offsets,/*!< in: rec_get_offsets(
+ split_rec, cursor->index); out: garbage */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
- mem_heap_t* heap) /*!< in: temporary memory heap */
+ mem_heap_t** heap) /*!< in: temporary memory heap */
{
page_t* page;
ulint insert_size;
@@ -2236,15 +2426,13 @@ btr_page_insert_fits(
ulint total_n_recs;
const rec_t* rec;
const rec_t* end_rec;
- ulint* offs;
page = btr_cur_get_page(cursor);
- ut_ad(!split_rec == !offsets);
- ut_ad(!offsets
- || !page_is_comp(page) == !rec_offs_comp(offsets));
- ut_ad(!offsets
- || rec_offs_validate(split_rec, cursor->index, offsets));
+ ut_ad(!split_rec
+ || !page_is_comp(page) == !rec_offs_comp(*offsets));
+ ut_ad(!split_rec
+ || rec_offs_validate(split_rec, cursor->index, *offsets));
insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
free_space = page_get_free_space_of_empty(page_is_comp(page));
@@ -2262,7 +2450,7 @@ btr_page_insert_fits(
rec = page_rec_get_next(page_get_infimum_rec(page));
end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
- } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
+ } else if (cmp_dtuple_rec(tuple, split_rec, *offsets) >= 0) {
rec = page_rec_get_next(page_get_infimum_rec(page));
end_rec = split_rec;
@@ -2277,19 +2465,17 @@ btr_page_insert_fits(
/* Ok, there will be enough available space on the
half page where the tuple is inserted */
- return(TRUE);
+ return(true);
}
- offs = NULL;
-
while (rec != end_rec) {
/* In this loop we calculate the amount of reserved
space after rec is removed from page. */
- offs = rec_get_offsets(rec, cursor->index, offs,
- ULINT_UNDEFINED, &heap);
+ *offsets = rec_get_offsets(rec, cursor->index, *offsets,
+ ULINT_UNDEFINED, heap);
- total_data -= rec_offs_size(offs);
+ total_data -= rec_offs_size(*offsets);
total_n_recs--;
if (total_data + page_dir_calc_reserved_space(total_n_recs)
@@ -2298,13 +2484,13 @@ btr_page_insert_fits(
/* Ok, there will be enough available space on the
half page where the tuple is inserted */
- return(TRUE);
+ return(true);
}
rec = page_rec_get_next_const(rec);
}
- return(FALSE);
+ return(false);
}
/*******************************************************//**
@@ -2314,6 +2500,7 @@ UNIV_INTERN
void
btr_insert_on_non_leaf_level_func(
/*==============================*/
+ ulint flags, /*!< in: undo logging and locking flags */
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
@@ -2323,8 +2510,10 @@ btr_insert_on_non_leaf_level_func(
{
big_rec_t* dummy_big_rec;
btr_cur_t cursor;
- ulint err;
+ dberr_t err;
rec_t* rec;
+ ulint* offsets = NULL;
+ mem_heap_t* heap = NULL;
ut_ad(level > 0);
@@ -2335,26 +2524,35 @@ btr_insert_on_non_leaf_level_func(
ut_ad(cursor.flag == BTR_CUR_BINARY);
err = btr_cur_optimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG, &cursor, tuple, &rec,
- &dummy_big_rec, 0, NULL, mtr);
+ flags
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG,
+ &cursor, &offsets, &heap,
+ tuple, &rec, &dummy_big_rec, 0, NULL, mtr);
if (err == DB_FAIL) {
- err = btr_cur_pessimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG,
- &cursor, tuple, &rec, &dummy_big_rec, 0, NULL, mtr);
+ err = btr_cur_pessimistic_insert(flags
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG
+ | BTR_NO_UNDO_LOG_FLAG,
+ &cursor, &offsets, &heap,
+ tuple, &rec,
+ &dummy_big_rec, 0, NULL, mtr);
ut_a(err == DB_SUCCESS);
}
+ mem_heap_free(heap);
}
/**************************************************************//**
Attaches the halves of an index page on the appropriate level in an
index tree. */
-static
+static __attribute__((nonnull))
void
btr_attach_half_pages(
/*==================*/
+ ulint flags, /*!< in: undo logging and
+ locking flags */
dict_index_t* index, /*!< in: the index tree */
buf_block_t* block, /*!< in/out: page to be split */
const rec_t* split_rec, /*!< in: first record on upper
@@ -2432,7 +2630,8 @@ btr_attach_half_pages(
/* Insert it next to the pointer to the lower half. Note that this
may generate recursion leading to a split on the higher level. */
- btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
+ btr_insert_on_non_leaf_level(flags, index, level + 1,
+ node_ptr_upper, mtr);
/* Free the memory heap */
mem_heap_free(heap);
@@ -2484,13 +2683,13 @@ btr_attach_half_pages(
/*************************************************************//**
Determine if a tuple is smaller than any record on the page.
@return TRUE if smaller */
-static
-ibool
+static __attribute__((nonnull, warn_unused_result))
+bool
btr_page_tuple_smaller(
/*===================*/
btr_cur_t* cursor, /*!< in: b-tree cursor */
const dtuple_t* tuple, /*!< in: tuple to consider */
- ulint* offsets,/*!< in/out: temporary storage */
+ ulint** offsets,/*!< in/out: temporary storage */
ulint n_uniq, /*!< in: number of unique fields
in the index page records */
mem_heap_t** heap) /*!< in/out: heap for offsets */
@@ -2505,11 +2704,11 @@ btr_page_tuple_smaller(
page_cur_move_to_next(&pcur);
first_rec = page_cur_get_rec(&pcur);
- offsets = rec_get_offsets(
- first_rec, cursor->index, offsets,
+ *offsets = rec_get_offsets(
+ first_rec, cursor->index, *offsets,
n_uniq, heap);
- return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0);
+ return(cmp_dtuple_rec(tuple, first_rec, *offsets) < 0);
}
/*************************************************************//**
@@ -2525,9 +2724,12 @@ UNIV_INTERN
rec_t*
btr_page_split_and_insert(
/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
+ ulint** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mtr */
@@ -2553,18 +2755,21 @@ btr_page_split_and_insert(
ibool insert_left;
ulint n_iterations = 0;
rec_t* rec;
- mem_heap_t* heap;
ulint n_uniq;
- ulint* offsets;
- heap = mem_heap_create(1024);
+ if (!*heap) {
+ *heap = mem_heap_create(1024);
+ }
n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
func_start:
- mem_heap_empty(heap);
- offsets = NULL;
+ mem_heap_empty(*heap);
+ *offsets = NULL;
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
MTR_MEMO_X_LOCK));
+ ut_ad(!dict_index_is_online_ddl(cursor->index)
+ || (flags & BTR_CREATE_FLAG)
+ || dict_index_is_clust(cursor->index));
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
@@ -2590,7 +2795,7 @@ func_start:
if (split_rec == NULL) {
insert_left = btr_page_tuple_smaller(
- cursor, tuple, offsets, n_uniq, &heap);
+ cursor, tuple, offsets, n_uniq, heap);
}
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
direction = FSP_UP;
@@ -2612,7 +2817,7 @@ func_start:
if (page_get_n_recs(page) > 1) {
split_rec = page_get_middle_rec(page);
} else if (btr_page_tuple_smaller(cursor, tuple,
- offsets, n_uniq, &heap)) {
+ offsets, n_uniq, heap)) {
split_rec = page_rec_get_next(
page_get_infimum_rec(page));
} else {
@@ -2635,10 +2840,10 @@ func_start:
if (split_rec) {
first_rec = move_limit = split_rec;
- offsets = rec_get_offsets(split_rec, cursor->index, offsets,
- n_uniq, &heap);
+ *offsets = rec_get_offsets(split_rec, cursor->index, *offsets,
+ n_uniq, heap);
- insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
+ insert_left = cmp_dtuple_rec(tuple, split_rec, *offsets) < 0;
if (!insert_left && new_page_zip && n_iterations > 0) {
/* If a compressed page has already been split,
@@ -2665,7 +2870,7 @@ insert_empty:
/* 4. Do first the modifications in the tree structure */
- btr_attach_half_pages(cursor->index, block,
+ btr_attach_half_pages(flags, cursor->index, block,
first_rec, new_block, direction, mtr);
/* If the split is made on the leaf level and the insert will fit
@@ -2685,10 +2890,11 @@ insert_empty:
insert_will_fit = !new_page_zip
&& btr_page_insert_fits(cursor, NULL,
- NULL, tuple, n_ext, heap);
+ offsets, tuple, n_ext, heap);
}
- if (insert_will_fit && page_is_leaf(page)) {
+ if (insert_will_fit && page_is_leaf(page)
+ && !dict_index_is_online_ddl(cursor->index)) {
mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
MTR_MEMO_X_LOCK);
@@ -2805,8 +3011,8 @@ insert_empty:
page_cur_search(insert_block, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, n_ext, mtr);
+ rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
+ offsets, heap, n_ext, mtr);
#ifdef UNIV_ZIP_DEBUG
{
@@ -2837,7 +3043,7 @@ insert_empty:
page_cur_search(insert_block, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
- n_ext, mtr);
+ offsets, heap, n_ext, mtr);
if (rec == NULL) {
/* The insert did not fit on the page: loop back to the
@@ -2878,7 +3084,7 @@ func_exit:
ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
- mem_heap_free(heap);
+ ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
return(rec);
}
@@ -3058,15 +3264,15 @@ btr_node_ptr_delete(
{
btr_cur_t cursor;
ibool compressed;
- ulint err;
+ dberr_t err;
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
/* Delete node pointer on father page */
btr_page_get_father(index, block, mtr, &cursor);
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
- mtr);
+ compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
+ BTR_CREATE_FLAG, RB_NONE, mtr);
ut_a(err == DB_SUCCESS);
if (!compressed) {
@@ -3098,6 +3304,8 @@ btr_lift_page_up(
buf_block_t* blocks[BTR_MAX_LEVELS];
ulint n_blocks; /*!< last used index in blocks[] */
ulint i;
+ bool lift_father_up;
+ buf_block_t* block_orig = block;
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
@@ -3108,11 +3316,13 @@ btr_lift_page_up(
{
btr_cur_t cursor;
- mem_heap_t* heap = mem_heap_create(100);
- ulint* offsets;
+ ulint* offsets = NULL;
+ mem_heap_t* heap = mem_heap_create(
+ sizeof(*offsets)
+ * (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
buf_block_t* b;
- offsets = btr_page_get_father_block(NULL, heap, index,
+ offsets = btr_page_get_father_block(offsets, heap, index,
block, mtr, &cursor);
father_block = btr_cur_get_block(&cursor);
father_page_zip = buf_block_get_page_zip(father_block);
@@ -3136,6 +3346,29 @@ btr_lift_page_up(
blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
}
+ lift_father_up = (n_blocks && page_level == 0);
+ if (lift_father_up) {
+ /* The father page also should be the only on its level (not
+ root). We should lift up the father page at first.
+ Because the leaf page should be lifted up only for root page.
+ The freeing page is based on page_level (==0 or !=0)
+ to choose segment. If the page_level is changed ==0 from !=0,
+ later freeing of the page doesn't find the page allocation
+ to be freed.*/
+
+ block = father_block;
+ page = buf_block_get_frame(block);
+ page_level = btr_page_get_level(page, mtr);
+
+ ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+ father_block = blocks[0];
+ father_page_zip = buf_block_get_page_zip(father_block);
+ father_page = buf_block_get_frame(father_block);
+ }
+
mem_heap_free(heap);
}
@@ -3143,6 +3376,7 @@ btr_lift_page_up(
/* Make the father empty */
btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
+ page_level++;
/* Copy the records to the father page one by one. */
if (0
@@ -3174,7 +3408,7 @@ btr_lift_page_up(
lock_update_copy_and_discard(father_block, block);
/* Go upward to root page, decrementing levels by one. */
- for (i = 0; i < n_blocks; i++, page_level++) {
+ for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
page_t* page = buf_block_get_frame(blocks[i]);
page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
@@ -3196,7 +3430,7 @@ btr_lift_page_up(
ut_ad(page_validate(father_page, index));
ut_ad(btr_check_node_ptr(index, father_block, mtr));
- return(father_block);
+ return(lift_father_up ? block_orig : father_block);
}
/*************************************************************//**
@@ -3267,6 +3501,7 @@ btr_compress(
if (adjust) {
nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
+ ut_ad(nth_rec > 0);
}
/* Decide the page to which we try to merge and which will inherit
@@ -3323,6 +3558,16 @@ err_exit:
return(FALSE);
}
+ /* If compression padding tells us that merging will result in
+ too packed up page i.e.: which is likely to cause compression
+ failure then don't merge the pages. */
+ if (zip_size && page_is_leaf(merge_page)
+ && (page_get_data_size(merge_page) + data_size
+ >= dict_index_zip_pad_optimal_page_size(index))) {
+
+ goto err_exit;
+ }
+
ut_ad(page_validate(merge_page, index));
max_ins_size = page_get_max_insert_size(merge_page, n_recs);
@@ -3502,6 +3747,7 @@ func_exit:
mem_heap_free(heap);
if (adjust) {
+ ut_ad(nth_rec > 0);
btr_cur_position(
index,
page_rec_get_nth(merge_block->frame, nth_rec),
@@ -3818,7 +4064,7 @@ btr_print_index(
mtr_start(&mtr);
- root = btr_root_block_get(index, &mtr);
+ root = btr_root_block_get(index, RW_X_LATCH, &mtr);
btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
if (heap) {
@@ -3827,7 +4073,7 @@ btr_print_index(
mtr_commit(&mtr);
- btr_validate_index(index, NULL);
+ btr_validate_index(index, 0);
}
#endif /* UNIV_BTR_PRINT */
@@ -4013,8 +4259,22 @@ btr_index_page_validate(
{
page_cur_t cur;
ibool ret = TRUE;
+#ifndef DBUG_OFF
+ ulint nth = 1;
+#endif /* !DBUG_OFF */
page_cur_set_before_first(block, &cur);
+
+ /* Directory slot 0 should only contain the infimum record. */
+ DBUG_EXECUTE_IF("check_table_rec_next",
+ ut_a(page_rec_get_nth_const(
+ page_cur_get_page(&cur), 0)
+ == cur.rec);
+ ut_a(page_dir_slot_get_n_owned(
+ page_dir_get_nth_slot(
+ page_cur_get_page(&cur), 0))
+ == 1););
+
page_cur_move_to_next(&cur);
for (;;) {
@@ -4028,6 +4288,16 @@ btr_index_page_validate(
return(FALSE);
}
+ /* Verify that page_rec_get_nth_const() is correctly
+ retrieving each record. */
+ DBUG_EXECUTE_IF("check_table_rec_next",
+ ut_a(cur.rec == page_rec_get_nth_const(
+ page_cur_get_page(&cur),
+ page_rec_get_n_recs_before(
+ cur.rec)));
+ ut_a(nth++ == page_rec_get_n_recs_before(
+ cur.rec)););
+
page_cur_move_to_next(&cur);
}
@@ -4078,14 +4348,15 @@ btr_validate_report2(
Validates index tree level.
@return TRUE if ok */
static
-ibool
+bool
btr_validate_level(
/*===============*/
dict_index_t* index, /*!< in: index tree */
- trx_t* trx, /*!< in: transaction or NULL */
+ const trx_t* trx, /*!< in: transaction or NULL */
ulint level) /*!< in: level number */
{
ulint space;
+ ulint space_flags;
ulint zip_size;
buf_block_t* block;
page_t* page;
@@ -4099,9 +4370,10 @@ btr_validate_level(
ulint left_page_no;
page_cur_t cursor;
dtuple_t* node_ptr_tuple;
- ibool ret = TRUE;
+ bool ret = true;
mtr_t mtr;
mem_heap_t* heap = mem_heap_create(256);
+ fseg_header_t* seg;
ulint* offsets = NULL;
ulint* offsets2= NULL;
#ifdef UNIV_ZIP_DEBUG
@@ -4112,15 +4384,39 @@ btr_validate_level(
mtr_x_lock(dict_index_get_lock(index), &mtr);
- block = btr_root_block_get(index, &mtr);
+ block = btr_root_block_get(index, RW_X_LATCH, &mtr);
page = buf_block_get_frame(block);
+ seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP;
space = dict_index_get_space(index);
zip_size = dict_table_zip_size(index->table);
+ fil_space_get_latch(space, &space_flags);
+
+ if (zip_size != dict_tf_get_zip_size(space_flags)) {
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Flags mismatch: table=%lu, tablespace=%lu",
+ (ulint) index->table->flags, (ulint) space_flags);
+
+ mtr_commit(&mtr);
+
+ return(false);
+ }
+
while (level != btr_page_get_level(page, &mtr)) {
const rec_t* node_ptr;
+ if (fseg_page_is_free(seg,
+ block->page.space, block->page.offset)) {
+
+ btr_validate_report1(index, level, block);
+
+ ib_logf(IB_LOG_LEVEL_WARN, "page is free");
+
+ ret = false;
+ }
+
ut_a(space == buf_block_get_space(block));
ut_a(space == page_get_space_id(page));
#ifdef UNIV_ZIP_DEBUG
@@ -4141,12 +4437,13 @@ btr_validate_level(
/* Now we are on the desired level. Loop through the pages on that
level. */
-loop:
- if (trx_is_interrupted(trx)) {
- mtr_commit(&mtr);
- mem_heap_free(heap);
- return(ret);
+
+ if (level == 0) {
+ /* Leaf pages are managed in their own file segment. */
+ seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF;
}
+
+loop:
mem_heap_empty(heap);
offsets = offsets2 = NULL;
mtr_x_lock(dict_index_get_lock(index), &mtr);
@@ -4156,20 +4453,35 @@ loop:
ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
- /* Check ordering etc. of records */
+ ut_a(block->page.space == space);
+
+ if (fseg_page_is_free(seg, block->page.space, block->page.offset)) {
+
+ btr_validate_report1(index, level, block);
+
+ ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free");
+ ret = false;
+
+ } else if (btr_page_get_index_id(page) != index->id) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Page index id " IB_ID_FMT " != data dictionary "
+ "index id " IB_ID_FMT,
+ btr_page_get_index_id(page), index->id);
+
+ ret = false;
+
+ } else if (!page_validate(page, index)) {
- if (!page_validate(page, index)) {
btr_validate_report1(index, level, block);
+ ret = false;
+
+ } else if (level == 0 && !btr_index_page_validate(block, index)) {
- ret = FALSE;
- } else if (level == 0) {
/* We are on level 0. Check that the records have the right
number of fields, and field lengths are right. */
- if (!btr_index_page_validate(block, index)) {
-
- ret = FALSE;
- }
+ ret = false;
}
ut_a(btr_page_get_level(page, &mtr) == level);
@@ -4195,7 +4507,7 @@ loop:
buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
- ret = FALSE;
+ ret = false;
}
if (page_is_comp(right_page) != page_is_comp(page)) {
@@ -4204,7 +4516,7 @@ loop:
buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
- ret = FALSE;
+ ret = false;
goto node_ptr_fails;
}
@@ -4237,7 +4549,7 @@ loop:
rec_print(stderr, rec, index);
putc('\n', stderr);
- ret = FALSE;
+ ret = false;
}
}
@@ -4288,7 +4600,7 @@ loop:
fputs("InnoDB: record on page ", stderr);
rec_print_new(stderr, rec, offsets);
putc('\n', stderr);
- ret = FALSE;
+ ret = false;
goto node_ptr_fails;
}
@@ -4318,7 +4630,7 @@ loop:
fputs("InnoDB: first rec ", stderr);
rec_print(stderr, first_rec, index);
putc('\n', stderr);
- ret = FALSE;
+ ret = false;
goto node_ptr_fails;
}
@@ -4346,7 +4658,7 @@ loop:
if (btr_cur_get_rec(&right_node_cur)
!= right_node_ptr) {
- ret = FALSE;
+ ret = false;
fputs("InnoDB: node pointer to"
" the right page is wrong\n",
stderr);
@@ -4372,7 +4684,7 @@ loop:
!= page_rec_get_next(
page_get_infimum_rec(
right_father_page))) {
- ret = FALSE;
+ ret = false;
fputs("InnoDB: node pointer 2 to"
" the right page is wrong\n",
stderr);
@@ -4397,7 +4709,7 @@ loop:
if (page_get_page_no(right_father_page)
!= btr_page_get_next(father_page, &mtr)) {
- ret = FALSE;
+ ret = false;
fputs("InnoDB: node pointer 3 to"
" the right page is wrong\n",
stderr);
@@ -4428,17 +4740,23 @@ node_ptr_fails:
on the next loop. The page has already been checked. */
mtr_commit(&mtr);
- if (right_page_no != FIL_NULL) {
+ if (trx_is_interrupted(trx)) {
+ /* On interrupt, return the current status. */
+ } else if (right_page_no != FIL_NULL) {
+
mtr_start(&mtr);
- block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, index, &mtr);
+ block = btr_block_get(
+ space, zip_size, right_page_no,
+ RW_X_LATCH, index, &mtr);
+
page = buf_block_get_frame(block);
goto loop;
}
mem_heap_free(heap);
+
return(ret);
}
@@ -4446,40 +4764,39 @@ node_ptr_fails:
Checks the consistency of an index tree.
@return TRUE if ok */
UNIV_INTERN
-ibool
+bool
btr_validate_index(
/*===============*/
dict_index_t* index, /*!< in: index */
- trx_t* trx) /*!< in: transaction or NULL */
+ const trx_t* trx) /*!< in: transaction or NULL */
{
- mtr_t mtr;
- page_t* root;
- ulint i;
- ulint n;
-
/* Full Text index are implemented by auxiliary tables,
not the B-tree */
- if (index->type & DICT_FTS) {
- return(TRUE);
+ if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) {
+ return(true);
}
+ mtr_t mtr;
+
mtr_start(&mtr);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
- root = btr_root_get(index, &mtr);
- n = btr_page_get_level(root, &mtr);
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
- for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
- if (!btr_validate_level(index, trx, n - i)) {
+ bool ok = true;
+ page_t* root = btr_root_get(index, &mtr);
+ ulint n = btr_page_get_level(root, &mtr);
- mtr_commit(&mtr);
+ for (ulint i = 0; i <= n; ++i) {
- return(FALSE);
+ if (!btr_validate_level(index, trx, n - i)) {
+ ok = false;
+ break;
}
}
mtr_commit(&mtr);
- return(TRUE);
+ return(ok);
}
+
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index aeb16200f80..913b2088f24 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -2,6 +2,7 @@
Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2012, Facebook Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -57,6 +58,7 @@ Created 10/16/1994 Heikki Tuuri
#include "buf0lru.h"
#include "btr0btr.h"
#include "btr0sea.h"
+#include "row0log.h"
#include "row0purge.h"
#include "row0upd.h"
#include "trx0rec.h"
@@ -69,13 +71,13 @@ Created 10/16/1994 Heikki Tuuri
#include "zlib.h"
/** Buffered B-tree operation types, introduced as part of delete buffering. */
-typedef enum btr_op_enum {
+enum btr_op_t {
BTR_NO_OP = 0, /*!< Not buffered */
BTR_INSERT_OP, /*!< Insert, do not ignore UNIQUE */
BTR_INSERT_IGNORE_UNIQUE_OP, /*!< Insert, ignoring UNIQUE */
BTR_DELETE_OP, /*!< Purge a delete-marked record */
BTR_DELMARK_OP /*!< Mark a record for deletion */
-} btr_op_t;
+};
#ifdef UNIV_DEBUG
/** If the following is set to TRUE, this module prints a lot of
@@ -97,6 +99,11 @@ srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
UNIV_INTERN ulint btr_cur_n_sea_old = 0;
+#ifdef UNIV_DEBUG
+/* Flag to limit optimistic insert records */
+UNIV_INTERN uint btr_cur_limit_optimistic_insert_debug = 0;
+#endif /* UNIV_DEBUG */
+
/** In the optimistic insert, if the insert does not fit, but this much space
can be released by page reorganize, then it is reorganized */
#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
@@ -425,6 +432,14 @@ btr_cur_search_to_nth_level(
cursor->low_match = ULINT_UNDEFINED;
#endif
+ ibool s_latch_by_caller;
+
+ s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
+
+ ut_ad(!s_latch_by_caller
+ || mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK));
+
/* These flags are mutually exclusive, they are lumped together
with the latch mode for historical reasons. It's possible for
none of the flags to be set. */
@@ -460,11 +475,11 @@ btr_cur_search_to_nth_level(
estimate = latch_mode & BTR_ESTIMATE;
/* Turn the flags unrelated to the latch mode off. */
- latch_mode &= ~(BTR_INSERT
- | BTR_DELETE_MARK
- | BTR_DELETE
- | BTR_ESTIMATE
- | BTR_IGNORE_SEC_UNIQUE);
+ latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
+
+ ut_ad(!s_latch_by_caller
+ || latch_mode == BTR_SEARCH_LEAF
+ || latch_mode == BTR_MODIFY_LEAF);
cursor->flag = BTR_CUR_BINARY;
cursor->index = index;
@@ -478,16 +493,16 @@ btr_cur_search_to_nth_level(
#ifdef BTR_CUR_HASH_ADAPT
-#ifdef UNIV_SEARCH_PERF_STAT
+# ifdef UNIV_SEARCH_PERF_STAT
info->n_searches++;
-#endif
+# endif
if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
&& latch_mode <= BTR_MODIFY_LEAF
&& info->last_hash_succ
&& !estimate
-#ifdef PAGE_CUR_LE_OR_EXTENDS
+# ifdef PAGE_CUR_LE_OR_EXTENDS
&& mode != PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
+# endif /* PAGE_CUR_LE_OR_EXTENDS */
/* If !has_search_latch, we do a dirty read of
btr_search_enabled below, and btr_search_guess_on_hash()
will have to check it again. */
@@ -508,7 +523,7 @@ btr_cur_search_to_nth_level(
return;
}
-#endif /* BTR_CUR_HASH_ADAPT */
+# endif /* BTR_CUR_HASH_ADAPT */
#endif /* BTR_CUR_ADAPT */
btr_cur_n_non_sea++;
@@ -525,15 +540,19 @@ btr_cur_search_to_nth_level(
savepoint = mtr_set_savepoint(mtr);
- if (latch_mode == BTR_MODIFY_TREE) {
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
mtr_x_lock(dict_index_get_lock(index), mtr);
-
- } else if (latch_mode == BTR_CONT_MODIFY_TREE) {
+ break;
+ case BTR_CONT_MODIFY_TREE:
/* Do nothing */
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
- } else {
- mtr_s_lock(dict_index_get_lock(index), mtr);
+ break;
+ default:
+ if (!s_latch_by_caller) {
+ mtr_s_lock(dict_index_get_lock(index), mtr);
+ }
}
page_cursor = btr_cur_get_page_cur(cursor);
@@ -687,6 +706,7 @@ retry_page_get:
? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
}
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
ut_ad(index->id == btr_page_get_index_id(page));
if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
@@ -711,13 +731,17 @@ retry_page_get:
cursor, mtr);
}
- if (latch_mode != BTR_MODIFY_TREE
- && latch_mode != BTR_CONT_MODIFY_TREE) {
-
- /* Release the tree s-latch */
-
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint, dict_index_get_lock(index));
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ case BTR_CONT_MODIFY_TREE:
+ break;
+ default:
+ if (!s_latch_by_caller) {
+ /* Release the tree s-latch */
+ mtr_release_s_latch_at_savepoint(
+ mtr, savepoint,
+ dict_index_get_lock(index));
+ }
}
page_mode = mode;
@@ -784,8 +808,7 @@ retry_page_get:
will properly check btr_search_enabled again in
btr_search_build_page_hash_index() before building a
page hash index, while holding btr_search_latch. */
- if (UNIV_LIKELY(btr_search_enabled)) {
-
+ if (btr_search_enabled) {
btr_search_info_update(index, cursor);
}
#endif
@@ -815,14 +838,16 @@ UNIV_INTERN
void
btr_cur_open_at_index_side_func(
/*============================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
+ bool from_left, /*!< in: true if open to the low end,
+ false if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
- btr_cur_t* cursor, /*!< in: cursor */
+ btr_cur_t* cursor, /*!< in/out: cursor */
+ ulint level, /*!< in: level to search for
+ (0=leaf). */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
page_cur_t* page_cursor;
ulint page_no;
@@ -839,16 +864,27 @@ btr_cur_open_at_index_side_func(
rec_offs_init(offsets_);
estimate = latch_mode & BTR_ESTIMATE;
- latch_mode = latch_mode & ~BTR_ESTIMATE;
+ latch_mode &= ~BTR_ESTIMATE;
+
+ ut_ad(level != ULINT_UNDEFINED);
/* Store the position of the tree latch we push to mtr so that we
know how to release it when we have latched the leaf node */
savepoint = mtr_set_savepoint(mtr);
- if (latch_mode == BTR_MODIFY_TREE) {
+ switch (latch_mode) {
+ case BTR_CONT_MODIFY_TREE:
+ break;
+ case BTR_MODIFY_TREE:
mtr_x_lock(dict_index_get_lock(index), mtr);
- } else {
+ break;
+ case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
+ case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK));
+ break;
+ default:
mtr_s_lock(dict_index_get_lock(index), mtr);
}
@@ -868,6 +904,7 @@ btr_cur_open_at_index_side_func(
RW_NO_LATCH, NULL, BUF_GET,
file, line, mtr);
page = buf_block_get_frame(block);
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
ut_ad(index->id == btr_page_get_index_id(page));
block->check_index_page_at_flush = TRUE;
@@ -877,26 +914,40 @@ btr_cur_open_at_index_side_func(
height = btr_page_get_level(page, mtr);
root_height = height;
+ ut_a(height >= level);
+ } else {
+ /* TODO: flag the index corrupted if this fails */
+ ut_ad(height == btr_page_get_level(page, mtr));
}
- if (height == 0) {
- btr_cur_latch_leaves(page, space, zip_size, page_no,
- latch_mode, cursor, mtr);
-
- /* In versions <= 3.23.52 we had forgotten to
- release the tree latch here. If in an index scan
- we had to scan far to find a record visible to the
- current transaction, that could starve others
- waiting for the tree latch. */
-
- if ((latch_mode != BTR_MODIFY_TREE)
- && (latch_mode != BTR_CONT_MODIFY_TREE)) {
+ if (height == level) {
+ btr_cur_latch_leaves(
+ page, space, zip_size, page_no,
+ latch_mode & ~BTR_ALREADY_S_LATCHED,
+ cursor, mtr);
- /* Release the tree s-latch */
+ if (height == 0) {
+ /* In versions <= 3.23.52 we had
+ forgotten to release the tree latch
+ here. If in an index scan we had to
+ scan far to find a record visible to
+ the current transaction, that could
+ starve others waiting for the tree
+ latch. */
+
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
+ case BTR_CONT_MODIFY_TREE:
+ case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
+ case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
+ break;
+ default:
+ /* Release the tree s-latch */
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint,
- dict_index_get_lock(index));
+ mtr_release_s_latch_at_savepoint(
+ mtr, savepoint,
+ dict_index_get_lock(index));
+ }
}
}
@@ -906,7 +957,7 @@ btr_cur_open_at_index_side_func(
page_cur_set_after_last(block, page_cursor);
}
- if (height == 0) {
+ if (height == level) {
if (estimate) {
btr_cur_add_path_info(cursor, height,
root_height);
@@ -965,9 +1016,12 @@ btr_cur_open_at_rnd_pos_func(
ulint* offsets = offsets_;
rec_offs_init(offsets_);
- if (latch_mode == BTR_MODIFY_TREE) {
+ switch (latch_mode) {
+ case BTR_MODIFY_TREE:
mtr_x_lock(dict_index_get_lock(index), mtr);
- } else {
+ break;
+ default:
+ ut_ad(latch_mode != BTR_CONT_MODIFY_TREE);
mtr_s_lock(dict_index_get_lock(index), mtr);
}
@@ -988,6 +1042,7 @@ btr_cur_open_at_rnd_pos_func(
RW_NO_LATCH, NULL, BUF_GET,
file, line, mtr);
page = buf_block_get_frame(block);
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
ut_ad(index->id == btr_page_get_index_id(page));
if (height == ULINT_UNDEFINED) {
@@ -1032,7 +1087,7 @@ be freed by reorganizing. Differs from btr_cur_optimistic_insert because
no heuristics is applied to whether it pays to use CPU time for
reorganizing the page or not.
@return pointer to inserted record if succeed, else NULL */
-static
+static __attribute__((nonnull, warn_unused_result))
rec_t*
btr_cur_insert_if_possible(
/*=======================*/
@@ -1040,6 +1095,8 @@ btr_cur_insert_if_possible(
cursor stays valid */
const dtuple_t* tuple, /*!< in: tuple to insert; the size info need not
have been stored to tuple */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mtr */
{
@@ -1055,8 +1112,8 @@ btr_cur_insert_if_possible(
page_cursor = btr_cur_get_page_cur(cursor);
/* Now, try the insert */
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, n_ext, mtr);
+ rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
+ offsets, heap, n_ext, mtr);
if (UNIV_UNLIKELY(!rec)) {
/* If record did not fit, reorganize */
@@ -1066,19 +1123,21 @@ btr_cur_insert_if_possible(
page_cur_search(block, cursor->index, tuple,
PAGE_CUR_LE, page_cursor);
- rec = page_cur_tuple_insert(page_cursor, tuple,
- cursor->index, n_ext, mtr);
+ rec = page_cur_tuple_insert(
+ page_cursor, tuple, cursor->index,
+ offsets, heap, n_ext, mtr);
}
}
+ ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
return(rec);
}
/*************************************************************//**
For an insert, checks the locks and does the undo logging if desired.
@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INLINE
-ulint
+UNIV_INLINE __attribute__((warn_unused_result, nonnull(2,3,5,6)))
+dberr_t
btr_cur_ins_lock_and_undo(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags: if
@@ -1093,7 +1152,7 @@ btr_cur_ins_lock_and_undo(
successor record */
{
dict_index_t* index;
- ulint err;
+ dberr_t err;
rec_t* rec;
roll_ptr_t roll_ptr;
@@ -1103,6 +1162,10 @@ btr_cur_ins_lock_and_undo(
rec = btr_cur_get_rec(cursor);
index = cursor->index;
+ ut_ad(!dict_index_is_online_ddl(index)
+ || dict_index_is_clust(index)
+ || (flags & BTR_CREATE_FLAG));
+
err = lock_rec_insert_check_and_lock(flags, rec,
btr_cur_get_block(cursor),
index, thr, mtr, inherit);
@@ -1115,7 +1178,7 @@ btr_cur_ins_lock_and_undo(
err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP,
thr, index, entry,
- NULL, 0, NULL,
+ NULL, 0, NULL, NULL,
&roll_ptr);
if (err != DB_SUCCESS) {
@@ -1140,13 +1203,13 @@ static
void
btr_cur_trx_report(
/*===============*/
- trx_t* trx, /*!< in: transaction */
+ trx_id_t trx_id, /*!< in: transaction id */
const dict_index_t* index, /*!< in: index */
const char* op) /*!< in: operation */
{
- fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx->id);
+ fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx_id);
fputs(op, stderr);
- dict_index_name_print(stderr, trx, index);
+ dict_index_name_print(stderr, NULL, index);
putc('\n', stderr);
}
#endif /* UNIV_DEBUG */
@@ -1159,7 +1222,7 @@ one record on the page, the insert will always succeed; this is to
prevent trying to split a page with just one record.
@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_optimistic_insert(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
@@ -1167,6 +1230,8 @@ btr_cur_optimistic_insert(
specified */
btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
cursor stays valid */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
@@ -1193,13 +1258,16 @@ btr_cur_optimistic_insert(
ibool inherit;
ulint zip_size;
ulint rec_size;
- ulint err;
+ dberr_t err;
*big_rec = NULL;
block = btr_cur_get_block(cursor);
page = buf_block_get_frame(block);
index = cursor->index;
+ ut_ad(!dict_index_is_online_ddl(index)
+ || dict_index_is_clust(index)
+ || (flags & BTR_CREATE_FLAG));
zip_size = buf_block_get_zip_size(block);
#ifdef UNIV_DEBUG_VALGRIND
if (zip_size) {
@@ -1214,7 +1282,7 @@ btr_cur_optimistic_insert(
}
#ifdef UNIV_DEBUG
if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "insert into ");
+ btr_cur_trx_report(thr_get_trx(thr)->id, index, "insert ");
dtuple_print(stderr, entry);
}
#endif /* UNIV_DEBUG */
@@ -1276,6 +1344,9 @@ btr_cur_optimistic_insert(
}
}
+ LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
+ goto fail);
+
/* If there have been many consecutive inserts, and we are on the leaf
level, check if we have to split the page to reserve enough free space
for future updates of records. */
@@ -1305,6 +1376,15 @@ fail_err:
goto fail;
}
+ /* If compression padding tells us that insertion will result in
+ too packed up page i.e.: which is likely to cause compression
+ failure then don't do an optimistic insertion. */
+ if (zip_size && leaf
+ && (page_get_data_size(page) + rec_size
+ >= dict_index_zip_pad_optimal_page_size(index))) {
+
+ goto fail;
+ }
/* Check locks and write to the undo log, if specified */
err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
thr, mtr, &inherit);
@@ -1321,7 +1401,7 @@ fail_err:
{
const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
*rec = page_cur_tuple_insert(page_cursor, entry, index,
- n_ext, mtr);
+ offsets, heap, n_ext, mtr);
reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
if (UNIV_UNLIKELY(reorg)) {
@@ -1351,7 +1431,7 @@ fail_err:
page_cur_search(block, index, entry, PAGE_CUR_LE, page_cursor);
*rec = page_cur_tuple_insert(page_cursor, entry, index,
- n_ext, mtr);
+ offsets, heap, n_ext, mtr);
if (UNIV_UNLIKELY(!*rec)) {
if (zip_size != 0) {
@@ -1426,7 +1506,7 @@ made on the leaf level, to avoid deadlocks, mtr must also own x-latches
to brothers of page, if those brothers exist.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_pessimistic_insert(
/*=======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
@@ -1437,6 +1517,9 @@ btr_cur_pessimistic_insert(
insertion will certainly succeed */
btr_cur_t* cursor, /*!< in: cursor after which to insert;
cursor stays valid */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
@@ -1450,8 +1533,7 @@ btr_cur_pessimistic_insert(
dict_index_t* index = cursor->index;
ulint zip_size = dict_table_zip_size(index->table);
big_rec_t* big_rec_vec = NULL;
- mem_heap_t* heap = NULL;
- ulint err;
+ dberr_t err;
ibool dummy_inh;
ibool success;
ulint n_extents = 0;
@@ -1466,6 +1548,9 @@ btr_cur_pessimistic_insert(
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
+ ut_ad(!dict_index_is_online_ddl(index)
+ || dict_index_is_clust(index)
+ || (flags & BTR_CREATE_FLAG));
cursor->flag = BTR_CUR_BINARY;
@@ -1523,13 +1608,11 @@ btr_cur_pessimistic_insert(
== buf_block_get_page_no(btr_cur_get_block(cursor))) {
/* The page is the root page */
- *rec = btr_root_raise_and_insert(cursor, entry, n_ext, mtr);
+ *rec = btr_root_raise_and_insert(
+ flags, cursor, offsets, heap, entry, n_ext, mtr);
} else {
- *rec = btr_page_split_and_insert(cursor, entry, n_ext, mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
+ *rec = btr_page_split_and_insert(
+ flags, cursor, offsets, heap, entry, n_ext, mtr);
}
ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
@@ -1556,29 +1639,36 @@ btr_cur_pessimistic_insert(
/*************************************************************//**
For an update, checks the locks and does the undo logging.
@return DB_SUCCESS, DB_WAIT_LOCK, or error number */
-UNIV_INLINE
-ulint
+UNIV_INLINE __attribute__((warn_unused_result, nonnull(2,3,6,7)))
+dberr_t
btr_cur_upd_lock_and_undo(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on record to update */
+ const ulint* offsets,/*!< in: rec_get_offsets() on cursor */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread
+ (can be NULL if BTR_NO_LOCKING_FLAG) */
mtr_t* mtr, /*!< in/out: mini-transaction */
roll_ptr_t* roll_ptr)/*!< out: roll pointer */
{
dict_index_t* index;
- rec_t* rec;
- ulint err;
+ const rec_t* rec;
+ dberr_t err;
- ut_ad(cursor && update && thr && roll_ptr);
+ ut_ad(thr || (flags & BTR_NO_LOCKING_FLAG));
rec = btr_cur_get_rec(cursor);
index = cursor->index;
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
if (!dict_index_is_clust(index)) {
+ ut_ad(dict_index_is_online_ddl(index)
+ == !!(flags & BTR_CREATE_FLAG));
+
/* We do undo logging only when we update a clustered index
record */
return(lock_sec_rec_modify_check_and_lock(
@@ -1589,50 +1679,39 @@ btr_cur_upd_lock_and_undo(
/* Check if we have to wait for a lock: enqueue an explicit lock
request if yes */
- err = DB_SUCCESS;
-
if (!(flags & BTR_NO_LOCKING_FLAG)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
err = lock_clust_rec_modify_check_and_lock(
flags, btr_cur_get_block(cursor), rec, index,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap), thr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
+ offsets, thr);
if (err != DB_SUCCESS) {
-
return(err);
}
}
/* Append the info about the update in the undo log */
- err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
- index, NULL, update,
- cmpl_info, rec, roll_ptr);
- return(err);
+ return(trx_undo_report_row_operation(
+ flags, TRX_UNDO_MODIFY_OP, thr,
+ index, NULL, update,
+ cmpl_info, rec, offsets, roll_ptr));
}
/***********************************************************//**
Writes a redo log record of updating a record in-place. */
-UNIV_INLINE
+UNIV_INLINE __attribute__((nonnull))
void
btr_cur_update_in_place_log(
/*========================*/
ulint flags, /*!< in: flags */
- rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index where cursor positioned */
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in: index of the record */
const upd_t* update, /*!< in: update vector */
- trx_t* trx, /*!< in: transaction */
+ trx_id_t trx_id, /*!< in: transaction id */
roll_ptr_t roll_ptr, /*!< in: roll ptr */
mtr_t* mtr) /*!< in: mtr */
{
- byte* log_ptr;
- page_t* page = page_align(rec);
+ byte* log_ptr;
+ const page_t* page = page_align(rec);
ut_ad(flags < 256);
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
@@ -1657,8 +1736,8 @@ btr_cur_update_in_place_log(
mach_write_to_1(log_ptr, flags);
log_ptr++;
- log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
- mtr);
+ log_ptr = row_upd_write_sys_vals_to_log(
+ index, trx_id, roll_ptr, log_ptr, mtr);
mach_write_to_2(log_ptr, page_offset(rec));
log_ptr += 2;
@@ -1761,6 +1840,13 @@ btr_cur_update_alloc_zip(
FALSE=update-in-place */
mtr_t* mtr) /*!< in: mini-transaction */
{
+
+ /* Have a local copy of the variables as these can change
+ dynamically. */
+ bool log_compressed = page_log_compressed_pages;
+ ulint compression_level = page_compression_level;
+ page_t* page = buf_block_get_frame(block);
+
ut_a(page_zip == buf_block_get_page_zip(block));
ut_ad(page_zip);
ut_ad(!dict_index_is_ibuf(index));
@@ -1776,12 +1862,27 @@ btr_cur_update_alloc_zip(
return(FALSE);
}
- if (!page_zip_compress(page_zip, buf_block_get_frame(block),
- index, mtr)) {
+ page = buf_block_get_frame(block);
+
+ if (create && page_is_leaf(page)
+ && (length + page_get_data_size(page)
+ >= dict_index_zip_pad_optimal_page_size(index))) {
+
+ return(FALSE);
+ }
+
+ if (!page_zip_compress(
+ page_zip, page, index, compression_level,
+ log_compressed ? mtr : NULL)) {
/* Unable to compress the page */
return(FALSE);
}
+ if (mtr && !log_compressed) {
+ page_zip_compress_write_log_no_data(
+ compression_level, page, index, mtr);
+ }
+
/* After recompressing a page, we must make sure that the free
bits in the insert buffer bitmap will not exceed the free
space on the page. Because this function will not attempt
@@ -1795,8 +1896,7 @@ btr_cur_update_alloc_zip(
if (!page_zip_available(page_zip, dict_index_is_clust(index),
length, create)) {
/* Out of space: reset the free bits. */
- if (!dict_index_is_clust(index)
- && page_is_leaf(buf_block_get_frame(block))) {
+ if (!dict_index_is_clust(index) && page_is_leaf(page)) {
ibuf_reset_free_bits(block);
}
return(FALSE);
@@ -1810,45 +1910,50 @@ Updates a record when the update causes no size changes in its fields.
We assume here that the ordering fields of the record do not change.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_update_in_place(
/*====================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
+ const ulint* offsets,/*!< in: offsets on cursor->page_cur.rec */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread, or NULL if
+ appropriate flags are set */
+ trx_id_t trx_id, /*!< in: transaction id */
mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
{
dict_index_t* index;
buf_block_t* block;
page_zip_des_t* page_zip;
- ulint err;
+ dberr_t err;
rec_t* rec;
roll_ptr_t roll_ptr = 0;
- trx_t* trx;
ulint was_delete_marked;
ibool is_hashed;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
rec = btr_cur_get_rec(cursor);
index = cursor->index;
+ ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
+ || dict_index_is_clust(index));
+ ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
+ ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+ == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
+ | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
+ ut_ad(fil_page_get_type(btr_cur_get_page(cursor)) == FIL_PAGE_INDEX);
+ ut_ad(btr_page_get_index_id(btr_cur_get_page(cursor)) == index->id);
- trx = thr_get_trx(thr);
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(trx, index, "update ");
+ if (btr_cur_print_record_ops) {
+ btr_cur_trx_report(trx_id, index, "update ");
rec_print_new(stderr, rec, offsets);
}
#endif /* UNIV_DEBUG */
@@ -1864,19 +1969,17 @@ btr_cur_update_in_place(
}
/* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+ err = btr_cur_upd_lock_and_undo(flags, cursor, offsets,
+ update, cmpl_info,
thr, mtr, &roll_ptr);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
return(err);
}
if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields(rec, NULL,
- index, offsets, trx, roll_ptr);
+ row_upd_rec_sys_fields(rec, NULL, index, offsets,
+ thr_get_trx(thr), roll_ptr);
}
was_delete_marked = rec_get_deleted_flag(
@@ -1917,7 +2020,7 @@ btr_cur_update_in_place(
}
btr_cur_update_in_place_log(flags, rec, index, update,
- trx, roll_ptr, mtr);
+ trx_id, roll_ptr, mtr);
if (was_delete_marked
&& !rec_get_deleted_flag(
@@ -1929,9 +2032,6 @@ btr_cur_update_in_place(
rec, index, offsets, mtr);
}
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
return(DB_SUCCESS);
}
@@ -1945,24 +2045,28 @@ fields of the record do not change.
DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
there is not enough space left on the compressed page */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_optimistic_update(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
+ ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread, or NULL if
+ appropriate flags are set */
+ trx_id_t trx_id, /*!< in: transaction id */
mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
{
dict_index_t* index;
page_cur_t* page_cursor;
- ulint err;
+ dberr_t err;
buf_block_t* block;
page_t* page;
page_zip_des_t* page_zip;
@@ -1972,10 +2076,8 @@ btr_cur_optimistic_update(
ulint old_rec_size;
dtuple_t* new_entry;
roll_ptr_t roll_ptr;
- mem_heap_t* heap;
ulint i;
ulint n_ext;
- ulint* offsets;
block = btr_cur_get_block(cursor);
page = buf_block_get_frame(block);
@@ -1985,39 +2087,46 @@ btr_cur_optimistic_update(
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
-
- heap = mem_heap_create(1024);
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+ ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
+ || dict_index_is_clust(index));
+ ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
+ ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+ == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
+ | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(btr_page_get_index_id(page) == index->id);
+
+ *offsets = rec_get_offsets(rec, index, *offsets,
+ ULINT_UNDEFINED, heap);
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(rec, offsets)
+ ut_a(!rec_offs_any_null_extern(rec, *offsets)
|| trx_is_recv(thr_get_trx(thr)));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "update ");
- rec_print_new(stderr, rec, offsets);
+ if (btr_cur_print_record_ops) {
+ btr_cur_trx_report(trx_id, index, "update ");
+ rec_print_new(stderr, rec, *offsets);
}
#endif /* UNIV_DEBUG */
- if (!row_upd_changes_field_size_or_external(index, offsets, update)) {
+ if (!row_upd_changes_field_size_or_external(index, *offsets, update)) {
/* The simplest and the most common case: the update does not
change the size of any field and none of the updated fields is
externally stored in rec or update, and there is enough space
on the compressed page to log the update. */
- mem_heap_free(heap);
- return(btr_cur_update_in_place(flags, cursor, update,
- cmpl_info, thr, mtr));
+ return(btr_cur_update_in_place(
+ flags, cursor, *offsets, update,
+ cmpl_info, thr, trx_id, mtr));
}
- if (rec_offs_any_extern(offsets)) {
+ if (rec_offs_any_extern(*offsets)) {
any_extern:
/* Externally stored fields are treated in pessimistic
update */
- mem_heap_free(heap);
return(DB_OVERFLOW);
}
@@ -2030,8 +2139,14 @@ any_extern:
page_cursor = btr_cur_get_page_cur(cursor);
- new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
- &n_ext, heap);
+ if (!*heap) {
+ *heap = mem_heap_create(
+ rec_offs_size(*offsets)
+ + DTUPLE_EST_ALLOC(rec_offs_n_fields(*offsets)));
+ }
+
+ new_entry = row_rec_to_index_entry(rec, index, *offsets,
+ &n_ext, *heap);
/* We checked above that there are no externally stored fields. */
ut_a(!n_ext);
@@ -2039,8 +2154,8 @@ any_extern:
corresponding to new_entry is latched in mtr.
Thus the following call is safe. */
row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, heap);
- old_rec_size = rec_offs_size(offsets);
+ FALSE, *heap);
+ old_rec_size = rec_offs_size(*offsets);
new_rec_size = rec_get_converted_size(index, new_entry, 0);
page_zip = buf_block_get_page_zip(block);
@@ -2051,16 +2166,14 @@ any_extern:
if (page_zip
&& !btr_cur_update_alloc_zip(page_zip, block, index,
new_rec_size, TRUE, mtr)) {
- err = DB_ZIP_OVERFLOW;
- goto err_exit;
+ return(DB_ZIP_OVERFLOW);
}
if (UNIV_UNLIKELY(new_rec_size
>= (page_get_free_space_of_empty(page_is_comp(page))
/ 2))) {
- err = DB_OVERFLOW;
- goto err_exit;
+ return(DB_OVERFLOW);
}
if (UNIV_UNLIKELY(page_get_data_size(page)
@@ -2069,8 +2182,7 @@ any_extern:
/* The page would become too empty */
- err = DB_UNDERFLOW;
- goto err_exit;
+ return(DB_UNDERFLOW);
}
/* We do not attempt to reorganize if the page is compressed.
@@ -2088,16 +2200,16 @@ any_extern:
reorganize: for simplicity, we decide what to do assuming a
reorganization is needed, though it might not be necessary */
- err = DB_OVERFLOW;
- goto err_exit;
+ return(DB_OVERFLOW);
}
/* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+ err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
+ update, cmpl_info,
thr, mtr, &roll_ptr);
if (err != DB_SUCCESS) {
- goto err_exit;
+ return(err);
}
/* Ok, we may do the replacement. Store on the page infimum the
@@ -2108,13 +2220,7 @@ any_extern:
btr_search_update_hash_on_delete(cursor);
- /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
- invokes rec_offs_make_valid() to point to the copied record that
- the fields of new_entry point to. We have to undo it here. */
- ut_ad(rec_offs_validate(NULL, index, offsets));
- rec_offs_make_valid(page_cur_get_rec(page_cursor), index, offsets);
-
- page_cur_delete_rec(page_cursor, index, offsets, mtr);
+ page_cur_delete_rec(page_cursor, index, *offsets, mtr);
page_cur_move_to_prev(page_cursor);
@@ -2122,11 +2228,12 @@ any_extern:
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
roll_ptr);
row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- thr_get_trx(thr)->id);
+ trx_id);
}
/* There are no externally stored columns in new_entry */
- rec = btr_cur_insert_if_possible(cursor, new_entry, 0/*n_ext*/, mtr);
+ rec = btr_cur_insert_if_possible(
+ cursor, new_entry, offsets, heap, 0/*n_ext*/, mtr);
ut_a(rec); /* <- We calculated above the insert would fit */
if (page_zip && !dict_index_is_clust(index)
@@ -2141,10 +2248,7 @@ any_extern:
page_cur_move_to_next(page_cursor);
- err = DB_SUCCESS;
-err_exit:
- mem_heap_free(heap);
- return(err);
+ return(DB_SUCCESS);
}
/*************************************************************//**
@@ -2203,7 +2307,7 @@ own x-latches to brothers of page, if those brothers exist. We assume
here that the ordering fields of the record do not change.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_pessimistic_update(
/*=======================*/
ulint flags, /*!< in: undo logging, locking, and rollback
@@ -2211,7 +2315,13 @@ btr_cur_pessimistic_update(
btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
cursor may become invalid if *big_rec == NULL
|| !(flags & BTR_KEEP_POS_FLAG) */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ mem_heap_t** offsets_heap,
+ /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
+ mem_heap_t* entry_heap,
+ /*!< in/out: memory heap for allocating
+ big_rec and the index tuple */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
const upd_t* update, /*!< in: update vector; this is allowed also
@@ -2219,7 +2329,9 @@ btr_cur_pessimistic_update(
the values in update vector have no effect */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread, or NULL if
+ appropriate flags are set */
+ trx_id_t trx_id, /*!< in: transaction id */
mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
{
@@ -2231,17 +2343,15 @@ btr_cur_pessimistic_update(
page_zip_des_t* page_zip;
rec_t* rec;
page_cur_t* page_cursor;
- dtuple_t* new_entry;
- ulint err;
- ulint optim_err;
+ dberr_t err;
+ dberr_t optim_err;
roll_ptr_t roll_ptr;
- trx_t* trx;
ibool was_first;
ulint n_extents = 0;
ulint n_reserved;
ulint n_ext;
- ulint* offsets = NULL;
+ *offsets = NULL;
*big_rec = NULL;
block = btr_cur_get_block(cursor);
@@ -2258,9 +2368,16 @@ btr_cur_pessimistic_update(
#endif /* UNIV_ZIP_DEBUG */
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
+ ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
+ || dict_index_is_clust(index));
+ ut_ad(!thr || thr_get_trx(thr)->id == trx_id);
+ ut_ad(thr || (flags & ~BTR_KEEP_POS_FLAG)
+ == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
+ | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
- optim_err = btr_cur_optimistic_update(flags, cursor, update,
- cmpl_info, thr, mtr);
+ optim_err = btr_cur_optimistic_update(
+ flags, cursor, offsets, offsets_heap, update,
+ cmpl_info, thr, trx_id, mtr);
switch (optim_err) {
case DB_UNDERFLOW:
@@ -2272,7 +2389,8 @@ btr_cur_pessimistic_update(
}
/* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info,
+ err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
+ update, cmpl_info,
thr, mtr, &roll_ptr);
if (err != DB_SUCCESS) {
@@ -2300,20 +2418,11 @@ btr_cur_pessimistic_update(
}
}
- if (!*heap) {
- *heap = mem_heap_create(1024);
- }
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, heap);
-
- trx = thr_get_trx(thr);
+ *offsets = rec_get_offsets(
+ rec, index, *offsets, ULINT_UNDEFINED, offsets_heap);
- new_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
- &n_ext, *heap);
- /* The call to row_rec_to_index_entry(ROW_COPY_DATA, ...) above
- invokes rec_offs_make_valid() to point to the copied record that
- the fields of new_entry point to. We have to undo it here. */
- ut_ad(rec_offs_validate(NULL, index, offsets));
- rec_offs_make_valid(rec, index, offsets);
+ dtuple_t* new_entry = row_rec_to_index_entry(
+ rec, index, *offsets, &n_ext, entry_heap);
/* The page containing the clustered index record
corresponding to new_entry is latched in mtr. If the
@@ -2322,15 +2431,15 @@ btr_cur_pessimistic_update(
purge would also have removed the clustered index record
itself. Thus the following call is safe. */
row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, *heap);
+ FALSE, entry_heap);
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
roll_ptr);
row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx->id);
+ trx_id);
}
- if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(offsets)) {
+ if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(*offsets)) {
/* We are in a transaction rollback undoing a row
update: we must free possible externally stored fields
which got new values in the update, if they are not
@@ -2341,16 +2450,17 @@ btr_cur_pessimistic_update(
ut_ad(big_rec_vec == NULL);
btr_rec_free_updated_extern_fields(
- index, rec, page_zip, offsets, update,
- trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr);
+ index, rec, page_zip, *offsets, update,
+ trx_is_recv(thr_get_trx(thr))
+ ? RB_RECOVERY : RB_NORMAL, mtr);
}
/* We have to set appropriate extern storage bits in the new
record to be inserted: we have to remember which fields were such */
ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
- n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
+ ut_ad(rec_offs_validate(rec, index, *offsets));
+ n_ext += btr_push_update_extern_fields(new_entry, update, entry_heap);
if (page_zip) {
ut_ad(page_is_comp(page));
@@ -2396,11 +2506,12 @@ make_external:
#endif /* UNIV_ZIP_DEBUG */
page_cursor = btr_cur_get_page_cur(cursor);
- page_cur_delete_rec(page_cursor, index, offsets, mtr);
+ page_cur_delete_rec(page_cursor, index, *offsets, mtr);
page_cur_move_to_prev(page_cursor);
- rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr);
+ rec = btr_cur_insert_if_possible(cursor, new_entry,
+ offsets, offsets_heap, n_ext, mtr);
if (rec) {
page_cursor->rec = rec;
@@ -2408,20 +2519,19 @@ make_external:
lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
rec, block);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, heap);
-
- if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+ if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
/* The new inserted record owns its possible externally
stored fields */
- btr_cur_unmark_extern_fields(page_zip,
- rec, index, offsets, mtr);
+ btr_cur_unmark_extern_fields(
+ page_zip, rec, index, *offsets, mtr);
}
- btr_cur_compress_if_useful(
- cursor,
- big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG),
- mtr);
+ bool adjust = big_rec_vec && (flags & BTR_KEEP_POS_FLAG);
+
+ if (btr_cur_compress_if_useful(cursor, adjust, mtr)
+ && adjust) {
+ rec_offs_make_valid(page_cursor->rec, index, *offsets);
+ }
if (page_zip && !dict_index_is_clust(index)
&& page_is_leaf(page)) {
@@ -2440,8 +2550,7 @@ make_external:
ut_a(page_zip || optim_err != DB_UNDERFLOW);
/* Out of space: reset the free bits. */
- if (!dict_index_is_clust(index)
- && page_is_leaf(page)) {
+ if (!dict_index_is_clust(index) && page_is_leaf(page)) {
ibuf_reset_free_bits(block);
}
}
@@ -2473,11 +2582,13 @@ make_external:
err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
| BTR_NO_LOCKING_FLAG
| BTR_KEEP_SYS_FLAG,
- cursor, new_entry, &rec,
+ cursor, offsets, offsets_heap,
+ new_entry, &rec,
&dummy_big_rec, n_ext, NULL, mtr);
ut_a(rec);
ut_a(err == DB_SUCCESS);
ut_a(dummy_big_rec == NULL);
+ ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
page_cursor->rec = rec;
if (dict_index_is_sec_or_ibuf(index)) {
@@ -2490,10 +2601,10 @@ make_external:
page_update_max_trx_id(rec_block,
buf_block_get_page_zip(rec_block),
- trx->id, mtr);
+ trx_id, mtr);
}
- if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) {
+ if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
/* The new inserted record owns its possible externally
stored fields */
buf_block_t* rec_block = btr_cur_get_block(cursor);
@@ -2504,10 +2615,8 @@ make_external:
#endif /* UNIV_ZIP_DEBUG */
page_zip = buf_block_get_page_zip(rec_block);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, heap);
btr_cur_unmark_extern_fields(page_zip,
- rec, index, offsets, mtr);
+ rec, index, *offsets, mtr);
}
lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
@@ -2546,17 +2655,13 @@ UNIV_INLINE
void
btr_cur_del_mark_set_clust_rec_log(
/*===============================*/
- ulint flags, /*!< in: flags */
rec_t* rec, /*!< in: record */
dict_index_t* index, /*!< in: index of the record */
- ibool val, /*!< in: value to set */
- trx_t* trx, /*!< in: deleting transaction */
+ trx_id_t trx_id, /*!< in: transaction id */
roll_ptr_t roll_ptr,/*!< in: roll ptr to the undo log record */
mtr_t* mtr) /*!< in: mtr */
{
byte* log_ptr;
- ut_ad(flags < 256);
- ut_ad(val <= 1);
ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
@@ -2572,13 +2677,11 @@ btr_cur_del_mark_set_clust_rec_log(
return;
}
- mach_write_to_1(log_ptr, flags);
- log_ptr++;
- mach_write_to_1(log_ptr, val);
- log_ptr++;
+ *log_ptr++ = 0;
+ *log_ptr++ = 1;
- log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr,
- mtr);
+ log_ptr = row_upd_write_sys_vals_to_log(
+ index, trx_id, roll_ptr, log_ptr, mtr);
mach_write_to_2(log_ptr, page_offset(rec));
log_ptr += 2;
@@ -2675,20 +2778,18 @@ of the deleting transaction, and in the roll ptr field pointer to the
undo log record created.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_del_mark_set_clust_rec(
/*===========================*/
- ulint flags, /*!< in: undo logging and locking flags */
buf_block_t* block, /*!< in/out: buffer block of the record */
rec_t* rec, /*!< in/out: record */
dict_index_t* index, /*!< in: clustered index of the record */
const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr) /*!< in: mtr */
{
roll_ptr_t roll_ptr;
- ulint err;
+ dberr_t err;
page_zip_des_t* page_zip;
trx_t* trx;
@@ -2700,7 +2801,7 @@ btr_cur_del_mark_set_clust_rec(
#ifdef UNIV_DEBUG
if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), index, "del mark ");
+ btr_cur_trx_report(thr_get_trx(thr)->id, index, "del mark ");
rec_print_new(stderr, rec, offsets);
}
#endif /* UNIV_DEBUG */
@@ -2708,7 +2809,7 @@ btr_cur_del_mark_set_clust_rec(
ut_ad(dict_index_is_clust(index));
ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
- err = lock_clust_rec_modify_check_and_lock(flags, block,
+ err = lock_clust_rec_modify_check_and_lock(BTR_NO_LOCKING_FLAG, block,
rec, index, offsets, thr);
if (err != DB_SUCCESS) {
@@ -2716,8 +2817,8 @@ btr_cur_del_mark_set_clust_rec(
return(err);
}
- err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr,
- index, NULL, NULL, 0, rec,
+ err = trx_undo_report_row_operation(0, TRX_UNDO_MODIFY_OP, thr,
+ index, NULL, NULL, 0, rec, offsets,
&roll_ptr);
if (err != DB_SUCCESS) {
@@ -2730,17 +2831,21 @@ btr_cur_del_mark_set_clust_rec(
page_zip = buf_block_get_page_zip(block);
- btr_blob_dbg_set_deleted_flag(rec, index, offsets, val);
- btr_rec_set_deleted_flag(rec, page_zip, val);
+ btr_blob_dbg_set_deleted_flag(rec, index, offsets, TRUE);
+ btr_rec_set_deleted_flag(rec, page_zip, TRUE);
trx = thr_get_trx(thr);
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields(rec, page_zip,
- index, offsets, trx, roll_ptr);
+ if (dict_index_is_online_ddl(index)) {
+ row_log_table_delete(
+ rec, index, offsets,
+ trx_read_trx_id(row_get_trx_id_offset(index, offsets)
+ + rec));
}
- btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx,
+ row_upd_rec_sys_fields(rec, page_zip, index, offsets, trx, roll_ptr);
+
+ btr_cur_del_mark_set_clust_rec_log(rec, index, trx->id,
roll_ptr, mtr);
return(err);
@@ -2829,7 +2934,7 @@ btr_cur_parse_del_mark_set_sec_rec(
Sets a secondary index record delete mark to TRUE or FALSE.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_del_mark_set_sec_rec(
/*=========================*/
ulint flags, /*!< in: locking flag */
@@ -2840,14 +2945,14 @@ btr_cur_del_mark_set_sec_rec(
{
buf_block_t* block;
rec_t* rec;
- ulint err;
+ dberr_t err;
block = btr_cur_get_block(cursor);
rec = btr_cur_get_rec(cursor);
#ifdef UNIV_DEBUG
if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr), cursor->index,
+ btr_cur_trx_report(thr_get_trx(thr)->id, cursor->index,
"del mark ");
rec_print(stderr, rec, cursor->index);
}
@@ -2937,12 +3042,15 @@ positioned, but no latch on the whole tree.
@return TRUE if success, i.e., the page did not become too empty */
UNIV_INTERN
ibool
-btr_cur_optimistic_delete(
-/*======================*/
+btr_cur_optimistic_delete_func(
+/*===========================*/
btr_cur_t* cursor, /*!< in: cursor on leaf page, on the record to
delete; cursor stays valid: if deletion
succeeds, on function exit it points to the
successor of the deleted record */
+#ifdef UNIV_DEBUG
+ ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
+#endif /* UNIV_DEBUG */
mtr_t* mtr) /*!< in: mtr; if this function returns
TRUE on a leaf page of a secondary
index, the mtr must be committed
@@ -2956,6 +3064,7 @@ btr_cur_optimistic_delete(
ibool no_compress_needed;
rec_offs_init(offsets_);
+ ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
MTR_MEMO_PAGE_X_FIX));
/* This is intended only for leaf page deletions */
@@ -2963,6 +3072,9 @@ btr_cur_optimistic_delete(
block = btr_cur_get_block(cursor);
ut_ad(page_is_leaf(buf_block_get_frame(block)));
+ ut_ad(!dict_index_is_online_ddl(cursor->index)
+ || dict_index_is_clust(cursor->index)
+ || (flags & BTR_CREATE_FLAG));
rec = btr_cur_get_rec(cursor);
offsets = rec_get_offsets(rec, cursor->index, offsets,
@@ -3030,7 +3142,7 @@ UNIV_INTERN
ibool
btr_cur_pessimistic_delete(
/*=======================*/
- ulint* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+ dberr_t* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
the latter may occur because we may have
to update node pointers on upper levels,
and in the case of variable length keys
@@ -3043,6 +3155,7 @@ btr_cur_pessimistic_delete(
if compression does not occur, the cursor
stays valid: it points to successor of
deleted record on function exit */
+ ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
mtr_t* mtr) /*!< in: mtr */
{
@@ -3051,7 +3164,6 @@ btr_cur_pessimistic_delete(
page_zip_des_t* page_zip;
dict_index_t* index;
rec_t* rec;
- dtuple_t* node_ptr;
ulint n_extents = 0;
ulint n_reserved;
ibool success;
@@ -3064,6 +3176,10 @@ btr_cur_pessimistic_delete(
page = buf_block_get_frame(block);
index = btr_cur_get_index(cursor);
+ ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
+ ut_ad(!dict_index_is_online_ddl(index)
+ || dict_index_is_clust(index)
+ || (flags & BTR_CREATE_FLAG));
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
@@ -3112,13 +3228,15 @@ btr_cur_pessimistic_delete(
btr_discard_page(cursor, mtr);
- *err = DB_SUCCESS;
ret = TRUE;
goto return_after_reservations;
}
- lock_update_delete(block, rec);
+ if (flags == 0) {
+ lock_update_delete(block, rec);
+ }
+
level = btr_page_get_level(page, mtr);
if (level > 0
@@ -3147,12 +3265,12 @@ btr_cur_pessimistic_delete(
btr_node_ptr_delete(index, block, mtr);
- node_ptr = dict_index_build_node_ptr(
+ dtuple_t* node_ptr = dict_index_build_node_ptr(
index, next_rec, buf_block_get_page_no(block),
heap, level);
- btr_insert_on_non_leaf_level(index,
- level + 1, node_ptr, mtr);
+ btr_insert_on_non_leaf_level(
+ flags, index, level + 1, node_ptr, mtr);
}
}
@@ -3165,9 +3283,9 @@ btr_cur_pessimistic_delete(
ut_ad(btr_check_node_ptr(index, block, mtr));
+return_after_reservations:
*err = DB_SUCCESS;
-return_after_reservations:
mem_heap_free(heap);
if (ret == FALSE) {
@@ -3194,8 +3312,8 @@ btr_cur_add_path_info(
ulint root_height) /*!< in: root node height in tree */
{
btr_path_t* slot;
- rec_t* rec;
- page_t* page;
+ const rec_t* rec;
+ const page_t* page;
ut_a(cursor->path_arr);
@@ -3407,6 +3525,9 @@ btr_estimate_n_rows_in_range(
ibool is_n_rows_exact;
ulint i;
mtr_t mtr;
+ ib_int64_t table_n_rows;
+
+ table_n_rows = dict_table_get_n_rows(index->table);
mtr_start(&mtr);
@@ -3419,9 +3540,9 @@ btr_estimate_n_rows_in_range(
&cursor, 0,
__FILE__, __LINE__, &mtr);
} else {
- btr_cur_open_at_index_side(TRUE, index,
+ btr_cur_open_at_index_side(true, index,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, &mtr);
+ &cursor, 0, &mtr);
}
mtr_commit(&mtr);
@@ -3437,9 +3558,9 @@ btr_estimate_n_rows_in_range(
&cursor, 0,
__FILE__, __LINE__, &mtr);
} else {
- btr_cur_open_at_index_side(FALSE, index,
+ btr_cur_open_at_index_side(false, index,
BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, &mtr);
+ &cursor, 0, &mtr);
}
mtr_commit(&mtr);
@@ -3471,20 +3592,21 @@ btr_estimate_n_rows_in_range(
n_rows = n_rows * 2;
}
+ DBUG_EXECUTE_IF("bug14007649", return(n_rows););
+
/* Do not estimate the number of rows in the range
to over 1 / 2 of the estimated rows in the whole
table */
- if (n_rows > index->table->stat_n_rows / 2
- && !is_n_rows_exact) {
+ if (n_rows > table_n_rows / 2 && !is_n_rows_exact) {
- n_rows = index->table->stat_n_rows / 2;
+ n_rows = table_n_rows / 2;
/* If there are just 0 or 1 rows in the table,
then we estimate all rows are in the range */
if (n_rows == 0) {
- n_rows = index->table->stat_n_rows;
+ n_rows = table_n_rows;
}
}
@@ -3544,9 +3666,9 @@ btr_estimate_n_rows_in_range(
/*******************************************************************//**
Record the number of non_null key values in a given index for
-each n-column prefix of the index where n < dict_index_get_n_unique(index).
+each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
The estimates are eventually stored in the array:
-index->stat_n_non_null_key_vals. */
+index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1. */
static
void
btr_record_not_null_field_in_rec(
@@ -3557,7 +3679,7 @@ btr_record_not_null_field_in_rec(
const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
its size could be for all fields or
that of "n_unique" */
- ib_int64_t* n_not_null) /*!< in/out: array to record number of
+ ib_uint64_t* n_not_null) /*!< in/out: array to record number of
not null rows for n-column prefix */
{
ulint i;
@@ -3579,11 +3701,12 @@ btr_record_not_null_field_in_rec(
/*******************************************************************//**
Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals[] and
-the number of pages that were sampled is saved in index->stat_n_sample_sizes[].
-If innodb_stats_method is "nulls_ignored", we also record the number of
-non-null values for each prefix and store the estimates in
+each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
+0..n_uniq-1) and the number of pages that were sampled is saved in
+index->stat_n_sample_sizes[].
+If innodb_stats_method is nulls_ignored, we also record the number of
+non-null values for each prefix and stored the estimates in
array index->stat_n_non_null_key_vals. */
UNIV_INTERN
void
@@ -3597,8 +3720,8 @@ btr_estimate_number_of_different_key_vals(
ulint n_cols;
ulint matched_fields;
ulint matched_bytes;
- ib_int64_t* n_diff;
- ib_int64_t* n_not_null;
+ ib_uint64_t* n_diff;
+ ib_uint64_t* n_not_null;
ibool stats_null_not_equal;
ullint n_sample_pages; /* number of pages to sample */
ulint not_empty_flag = 0;
@@ -3614,13 +3737,13 @@ btr_estimate_number_of_different_key_vals(
n_cols = dict_index_get_n_unique(index);
heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
- * (n_cols + 1)
+ * n_cols
+ dict_index_get_n_fields(index)
* (sizeof *offsets_rec
+ sizeof *offsets_next_rec));
- n_diff = (ib_int64_t*) mem_heap_zalloc(heap, (n_cols + 1)
- * sizeof(ib_int64_t));
+ n_diff = (ib_uint64_t*) mem_heap_zalloc(
+ heap, n_cols * sizeof(ib_int64_t));
n_not_null = NULL;
@@ -3629,8 +3752,8 @@ btr_estimate_number_of_different_key_vals(
considered equal (by setting stats_null_not_equal value) */
switch (srv_innodb_stats_method) {
case SRV_STATS_NULLS_IGNORED:
- n_not_null = (ib_int64_t*) mem_heap_zalloc(heap, (n_cols + 1)
- * sizeof *n_not_null);
+ n_not_null = (ib_uint64_t*) mem_heap_zalloc(
+ heap, n_cols * sizeof *n_not_null);
/* fall through */
case SRV_STATS_NULLS_UNEQUAL:
@@ -3681,7 +3804,7 @@ btr_estimate_number_of_different_key_vals(
offsets_rec = rec_get_offsets(rec, index, offsets_rec,
ULINT_UNDEFINED, &heap);
- if (n_not_null) {
+ if (n_not_null != NULL) {
btr_record_not_null_field_in_rec(
n_cols, offsets_rec, n_not_null);
}
@@ -3709,14 +3832,14 @@ btr_estimate_number_of_different_key_vals(
&matched_fields,
&matched_bytes);
- for (j = matched_fields + 1; j <= n_cols; j++) {
+ for (j = matched_fields; j < n_cols; j++) {
/* We add one if this index record has
a different prefix from the previous */
n_diff[j]++;
}
- if (n_not_null) {
+ if (n_not_null != NULL) {
btr_record_not_null_field_in_rec(
n_cols, offsets_next_rec, n_not_null);
}
@@ -3751,7 +3874,7 @@ btr_estimate_number_of_different_key_vals(
if (btr_page_get_prev(page, &mtr) != FIL_NULL
|| btr_page_get_next(page, &mtr) != FIL_NULL) {
- n_diff[n_cols]++;
+ n_diff[n_cols - 1]++;
}
}
@@ -3766,7 +3889,7 @@ btr_estimate_number_of_different_key_vals(
also the pages used for external storage of fields (those pages are
included in index->stat_n_leaf_pages) */
- for (j = 0; j <= n_cols; j++) {
+ for (j = 0; j < n_cols; j++) {
index->stat_n_diff_key_vals[j]
= BTR_TABLE_STATS_FROM_SAMPLE(
n_diff[j], index, n_sample_pages,
@@ -3796,7 +3919,7 @@ btr_estimate_number_of_different_key_vals(
sampled result. stat_n_non_null_key_vals[] is created
and initialized to zero in dict_index_add_to_cache(),
along with stat_n_diff_key_vals[] array */
- if (n_not_null != NULL && (j < n_cols)) {
+ if (n_not_null != NULL) {
index->stat_n_non_null_key_vals[j] =
BTR_TABLE_STATS_FROM_SAMPLE(
n_not_null[j], index, n_sample_pages,
@@ -4146,7 +4269,7 @@ The fields are stored on pages allocated from leaf node
file segment of the index tree.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
UNIV_INTERN
-enum db_err
+dberr_t
btr_store_big_rec_extern_fields(
/*============================*/
dict_index_t* index, /*!< in: index of rec; the index tree
@@ -4180,7 +4303,7 @@ btr_store_big_rec_extern_fields(
z_stream c_stream;
buf_block_t** freed_pages = NULL;
ulint n_freed_pages = 0;
- enum db_err error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(rec_offs_any_extern(offsets));
@@ -4211,7 +4334,7 @@ btr_store_big_rec_extern_fields(
heap = mem_heap_create(250000);
page_zip_set_alloc(&c_stream, heap);
- err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
+ err = deflateInit2(&c_stream, page_compression_level,
Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
ut_a(err == Z_OK);
}
@@ -5083,6 +5206,7 @@ btr_copy_zblob_prefix(
" page %lu space %lu\n",
(ulong) fil_page_get_type(bpage->zip.data),
(ulong) page_no, (ulong) space_id);
+ ut_ad(0);
goto end_of_blob;
}
diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc
index 5a67afc7e69..aceb6bd1d41 100644
--- a/storage/innobase/btr/btr0pcur.cc
+++ b/storage/innobase/btr/btr0pcur.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -133,6 +133,8 @@ btr_pcur_store_position(
ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_ad(page_is_leaf(page));
+ ut_ad(page_get_page_no(page) == index->page);
cursor->old_stored = BTR_PCUR_OLD_STORED;
@@ -258,7 +260,8 @@ btr_pcur_restore_position_func(
btr_cur_open_at_index_side(
cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
- index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr);
+ index, latch_mode,
+ btr_pcur_get_btr_cur(cursor), 0, mtr);
cursor->latch_mode = latch_mode;
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
@@ -326,13 +329,19 @@ btr_pcur_restore_position_func(
/* Save the old search mode of the cursor */
old_mode = cursor->search_mode;
- if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) {
+ switch (cursor->rel_pos) {
+ case BTR_PCUR_ON:
mode = PAGE_CUR_LE;
- } else if (cursor->rel_pos == BTR_PCUR_AFTER) {
+ break;
+ case BTR_PCUR_AFTER:
mode = PAGE_CUR_G;
- } else {
- ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE);
+ break;
+ case BTR_PCUR_BEFORE:
mode = PAGE_CUR_L;
+ break;
+ default:
+ ut_error;
+ mode = 0;
}
btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
@@ -341,25 +350,39 @@ btr_pcur_restore_position_func(
/* Restore the old search mode */
cursor->search_mode = old_mode;
- if (cursor->rel_pos == BTR_PCUR_ON
- && btr_pcur_is_on_user_rec(cursor)
- && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor),
- rec_get_offsets(
- btr_pcur_get_rec(cursor), index,
- NULL, ULINT_UNDEFINED, &heap))) {
-
- /* We have to store the NEW value for the modify clock, since
- the cursor can now be on a different page! But we can retain
- the value of old_rec */
-
- cursor->block_when_stored = btr_pcur_get_block(cursor);
- cursor->modify_clock = buf_block_get_modify_clock(
- cursor->block_when_stored);
- cursor->old_stored = BTR_PCUR_OLD_STORED;
-
- mem_heap_free(heap);
-
- return(TRUE);
+ switch (cursor->rel_pos) {
+ case BTR_PCUR_ON:
+ if (btr_pcur_is_on_user_rec(cursor)
+ && !cmp_dtuple_rec(
+ tuple, btr_pcur_get_rec(cursor),
+ rec_get_offsets(btr_pcur_get_rec(cursor),
+ index, NULL,
+ ULINT_UNDEFINED, &heap))) {
+
+ /* We have to store the NEW value for
+ the modify clock, since the cursor can
+ now be on a different page! But we can
+ retain the value of old_rec */
+
+ cursor->block_when_stored =
+ btr_pcur_get_block(cursor);
+ cursor->modify_clock =
+ buf_block_get_modify_clock(
+ cursor->block_when_stored);
+ cursor->old_stored = BTR_PCUR_OLD_STORED;
+
+ mem_heap_free(heap);
+
+ return(TRUE);
+ }
+#ifdef UNIV_DEBUG
+ /* fall through */
+ case BTR_PCUR_BEFORE:
+ case BTR_PCUR_AFTER:
+ break;
+ default:
+ ut_error;
+#endif /* UNIV_DEBUG */
}
mem_heap_free(heap);
diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc
index 7e6e2ef1cb1..432fef05dd5 100644
--- a/storage/innobase/btr/btr0sea.cc
+++ b/storage/innobase/btr/btr0sea.cc
@@ -42,7 +42,6 @@ Created 2/17/1996 Heikki Tuuri
#include "btr0pcur.h"
#include "btr0btr.h"
#include "ha0ha.h"
-#include "srv0mon.h"
/** Flag: has the search system been enabled?
Protected by btr_search_latch. */
@@ -1077,6 +1076,7 @@ btr_search_drop_page_hash_index(
mem_heap_t* heap;
const dict_index_t* index;
ulint* offsets;
+ btr_search_t* info;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
@@ -1102,6 +1102,27 @@ retry:
}
ut_a(!dict_index_is_ibuf(index));
+#ifdef UNIV_DEBUG
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_CREATION:
+ /* The index is being created (bulk loaded). */
+ case ONLINE_INDEX_COMPLETE:
+ /* The index has been published. */
+ case ONLINE_INDEX_ABORTED:
+ /* Either the index creation was aborted due to an
+ error observed by InnoDB (in which case there should
+ not be any adaptive hash index entries), or it was
+ completed and then flagged aborted in
+ rollback_inplace_alter_table(). */
+ break;
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ /* The index should have been dropped from the tablespace
+ already, and the adaptive hash index entries should have
+ been dropped as well. */
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+
table = btr_search_sys->hash_index;
#ifdef UNIV_SYNC_DEBUG
@@ -1196,8 +1217,9 @@ next_rec:
ha_remove_all_nodes_to_page(table, folds[i], page);
}
- ut_a(index->search_info->ref_count > 0);
- index->search_info->ref_count--;
+ info = btr_search_get_info(block->index);
+ ut_a(info->ref_count > 0);
+ info->ref_count--;
block->index = NULL;
diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc
index b6774aede8e..e34216dbc8f 100644
--- a/storage/innobase/buf/buf0buddy.cc
+++ b/storage/innobase/buf/buf0buddy.cc
@@ -335,7 +335,7 @@ buf_buddy_relocate(
{
buf_page_t* bpage;
const ulint size = BUF_BUDDY_LOW << i;
- mutex_t* mutex;
+ ib_mutex_t* mutex;
ulint space;
ulint page_no;
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 96821478e60..6efa14e6791 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -53,7 +53,6 @@ Created 11/5/1995 Heikki Tuuri
#include "page0zip.h"
#include "srv0mon.h"
#include "buf0checksum.h"
-#include "buf0dblwr.h"
/*
IMPLEMENTATION OF THE BUFFER POOL
@@ -372,10 +371,6 @@ buf_get_total_list_len(
buf_pool = buf_pool_from_array(i);
- if (!buf_pool) {
- continue;
- }
-
*LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
*free_len += UT_LIST_GET_LEN(buf_pool->free);
*flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
@@ -383,6 +378,32 @@ buf_get_total_list_len(
}
/********************************************************************//**
+Get total list size in bytes from all buffer pools. */
+UNIV_INTERN
+void
+buf_get_total_list_size_in_bytes(
+/*=============================*/
+ buf_pools_list_size_t* buf_pools_list_size) /*!< out: list sizes
+ in all buffer pools */
+{
+ ut_ad(buf_pools_list_size);
+ memset(buf_pools_list_size, 0, sizeof(*buf_pools_list_size));
+
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
+ /* We don't need mutex protection since this is
+ for statistics purpose */
+ buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes;
+ buf_pools_list_size->unzip_LRU_bytes +=
+ UT_LIST_GET_LEN(buf_pool->unzip_LRU) * UNIV_PAGE_SIZE;
+ buf_pools_list_size->flush_list_bytes +=
+ buf_pool->stat.flush_list_bytes;
+ }
+}
+
+/********************************************************************//**
Get total buffer pool statistics. */
UNIV_INTERN
void
@@ -400,10 +421,6 @@ buf_get_total_stat(
buf_pool = buf_pool_from_array(i);
- if (!buf_pool) {
- continue;
- }
-
buf_stat = &buf_pool->stat;
tot_stat->n_page_gets += buf_stat->n_page_gets;
tot_stat->n_pages_read += buf_stat->n_pages_read;
@@ -456,6 +473,8 @@ UNIV_INTERN
ibool
buf_page_is_corrupted(
/*==================*/
+ bool check_lsn, /*!< in: true if we need to check
+ and complain about the LSN */
const byte* read_buf, /*!< in: a database page */
ulint zip_size) /*!< in: size of compressed page;
0 for uncompressed pages */
@@ -480,14 +499,17 @@ buf_page_is_corrupted(
if (recv_lsn_checks_on) {
lsn_t current_lsn;
- if (log_peek_lsn(&current_lsn)
- && UNIV_UNLIKELY
- (current_lsn
- < mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
+ /* Since we are going to reset the page LSN during the import
+ phase it makes no sense to spam the log with error messages. */
+
+ if (check_lsn
+ && log_peek_lsn(&current_lsn)
+ && current_lsn
+ < mach_read_from_8(read_buf + FIL_PAGE_LSN)) {
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: Error: page %lu log sequence number"
+ " InnoDB: Error: page %lu log sequence number"
" " LSN_PF "\n"
"InnoDB: is in the future! Current system "
"log sequence number " LSN_PF ".\n"
@@ -673,6 +695,8 @@ buf_page_is_corrupted(
is added and not handled here */
}
+ DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(TRUE); );
+
return(FALSE);
}
@@ -885,7 +909,7 @@ pfs_register_buffer_block(
PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
for (i = 0; i < num_to_register; i++) {
- mutex_t* mutex;
+ ib_mutex_t* mutex;
rw_lock_t* rwlock;
# ifdef UNIV_PFS_MUTEX
@@ -1267,7 +1291,7 @@ buf_pool_init_instance(
SYNC_BUF_FLUSH_LIST);
for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
- buf_pool->no_flush[i] = os_event_create(NULL);
+ buf_pool->no_flush[i] = os_event_create();
}
buf_pool->watch = (buf_page_t*) mem_zalloc(
@@ -1334,7 +1358,7 @@ buf_pool_free_instance(
Creates the buffer pool.
@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
UNIV_INTERN
-ulint
+dberr_t
buf_pool_init(
/*==========*/
ulint total_size, /*!< in: size of the total pool in bytes */
@@ -1731,7 +1755,7 @@ buf_pool_watch_unset(
ut_a(bpage);
if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
- mutex_t* mutex = buf_page_get_mutex(bpage);
+ ib_mutex_t* mutex = buf_page_get_mutex(bpage);
mutex_enter(mutex);
ut_a(bpage->buf_fix_count > 0);
@@ -1802,34 +1826,24 @@ buf_page_make_young(
}
/********************************************************************//**
-Sets the time of the first access of a page and moves a page to the
-start of the buffer pool LRU list if it is too old. This high-level
-function can be used to prevent an important page from slipping
-out of the buffer pool. */
+Moves a page to the start of the buffer pool LRU list if it is too old.
+This high-level function can be used to prevent an important page from
+slipping out of the buffer pool. */
static
void
-buf_page_set_accessed_make_young(
-/*=============================*/
- buf_page_t* bpage, /*!< in/out: buffer block of a
+buf_page_make_young_if_needed(
+/*==========================*/
+ buf_page_t* bpage) /*!< in/out: buffer block of a
file page */
- unsigned access_time) /*!< in: bpage->access_time
- read under mutex protection,
- or 0 if unknown */
{
+#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
ut_ad(!buf_pool_mutex_own(buf_pool));
+#endif /* UNIV_DEBUG */
ut_a(buf_page_in_file(bpage));
if (buf_page_peek_if_too_old(bpage)) {
- buf_pool_mutex_enter(buf_pool);
- buf_LRU_make_block_young(bpage);
- buf_pool_mutex_exit(buf_pool);
- } else if (!access_time) {
- ulint time_ms = ut_time_ms();
- buf_pool_mutex_enter(buf_pool);
- buf_page_set_accessed(bpage, time_ms);
- buf_pool_mutex_exit(buf_pool);
+ buf_page_make_young(bpage);
}
}
@@ -1880,7 +1894,7 @@ buf_page_set_file_page_was_freed(
&hash_lock);
if (bpage) {
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
mutex_enter(block_mutex);
rw_lock_s_unlock(hash_lock);
@@ -1913,7 +1927,7 @@ buf_page_reset_file_page_was_freed(
bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
&hash_lock);
if (bpage) {
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
mutex_enter(block_mutex);
rw_lock_s_unlock(hash_lock);
@@ -1974,11 +1988,10 @@ buf_page_get_zip(
ulint offset) /*!< in: page number */
{
buf_page_t* bpage;
- mutex_t* block_mutex;
+ ib_mutex_t* block_mutex;
rw_lock_t* hash_lock;
ibool discard_attempted = FALSE;
ibool must_read;
- unsigned access_time;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
buf_pool->stat.n_page_gets++;
@@ -2051,15 +2064,17 @@ err_exit:
got_block:
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
- access_time = buf_page_is_accessed(bpage);
rw_lock_s_unlock(hash_lock);
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ut_a(!bpage->file_page_was_freed);
#endif
+
+ buf_page_set_accessed(bpage);
+
mutex_exit(block_mutex);
- buf_page_set_accessed_make_young(bpage, access_time);
+ buf_page_make_young_if_needed(bpage);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
@@ -2372,6 +2387,28 @@ buf_block_is_uncompressed(
return(buf_pointer_is_block_field_instance(buf_pool, (void*) block));
}
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/********************************************************************//**
+Return true if probe is enabled.
+@return true if probe enabled. */
+static
+bool
+buf_debug_execute_is_force_flush()
+/*==============================*/
+{
+ DBUG_EXECUTE_IF("ib_buf_force_flush", return(true); );
+
+ /* This is used during queisce testing, we want to ensure maximum
+ buffering by the change buffer. */
+
+ if (srv_ibuf_disable_background_merge) {
+ return(true);
+ }
+
+ return(false);
+}
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
/********************************************************************//**
This is the general function used to get access to a database page.
@return pointer to the block or NULL */
@@ -2398,7 +2435,7 @@ buf_page_get_gen(
ulint fix_type;
ibool must_read;
rw_lock_t* hash_lock;
- mutex_t* block_mutex;
+ ib_mutex_t* block_mutex;
buf_page_t* hash_bpage;
ulint retries = 0;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
@@ -2666,27 +2703,38 @@ wait_until_unfixed:
block->page.buf_fix_count = 1;
buf_block_set_io_fix(block, BUF_IO_READ);
- rw_lock_x_lock_func(&block->lock, 0, file, line);
+ rw_lock_x_lock_inline(&block->lock, 0, file, line);
UNIV_MEM_INVALID(bpage, sizeof *bpage);
rw_lock_x_unlock(hash_lock);
- mutex_exit(&block->mutex);
- mutex_exit(&buf_pool->zip_mutex);
- buf_pool->n_pend_unzip++;
+ buf_pool->n_pend_unzip++;
buf_pool_mutex_exit(buf_pool);
+ access_time = buf_page_is_accessed(&block->page);
+ mutex_exit(&block->mutex);
+ mutex_exit(&buf_pool->zip_mutex);
+
buf_page_free_descriptor(bpage);
- /* Decompress the page and apply buffered operations
- while not holding buf_pool->mutex or block->mutex. */
+ /* Decompress the page while not holding
+ buf_pool->mutex or block->mutex. */
- ut_a(buf_zip_decompress(block, TRUE));
+ /* Page checksum verification is already done when
+ the page is read from disk. Hence page checksum
+ verification is not necessary when decompressing the page. */
+ ut_a(buf_zip_decompress(block, FALSE));
if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
- ibuf_merge_or_delete_for_page(block, space, offset,
- zip_size, TRUE);
+ if (access_time) {
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(space, offset) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+ } else {
+ ibuf_merge_or_delete_for_page(
+ block, space, offset, zip_size, TRUE);
+ }
}
/* Unfix and unlatch the block. */
@@ -2723,8 +2771,9 @@ wait_until_unfixed:
UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
#endif
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+
if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
- && ibuf_debug) {
+ && (ibuf_debug || buf_debug_execute_is_force_flush())) {
/* Try to evict the block from the buffer pool, to use the
insert buffer (change buffer) as much as possible. */
@@ -2759,19 +2808,18 @@ wait_until_unfixed:
buf_pool, space, offset, fold);
}
- if (UNIV_LIKELY_NULL(block)) {
- block_mutex = buf_page_get_mutex(
- &block->page);
- /* The page entered the buffer
- pool for some reason. Try to
- evict it again. */
- mutex_enter(block_mutex);
- rw_lock_x_unlock(hash_lock);
+ rw_lock_x_unlock(hash_lock);
- goto got_block;
+ if (UNIV_LIKELY_NULL(block)) {
+ /* Either the page has been read in or
+ a watch was set on that in the window
+ where we released the buf_pool::mutex
+ and before we acquire the hash_lock
+ above. Try again. */
+ guess = block;
+ goto loop;
}
- rw_lock_x_unlock(hash_lock);
fprintf(stderr,
"innodb_change_buffering_debug evict %u %u\n",
(unsigned) space, (unsigned) offset);
@@ -2799,14 +2847,15 @@ wait_until_unfixed:
ut_a(mode == BUF_GET_POSSIBLY_FREED
|| !block->page.file_page_was_freed);
#endif
- mutex_exit(&block->mutex);
-
/* Check if this is the first access to the page */
-
access_time = buf_page_is_accessed(&block->page);
- if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
- buf_page_set_accessed_make_young(&block->page, access_time);
+ buf_page_set_accessed(&block->page);
+
+ mutex_exit(&block->mutex);
+
+ if (mode != BUF_PEEK_IF_IN_POOL) {
+ buf_page_make_young_if_needed(&block->page);
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -2842,14 +2891,14 @@ wait_until_unfixed:
break;
case RW_S_LATCH:
- rw_lock_s_lock_func(&(block->lock), 0, file, line);
+ rw_lock_s_lock_inline(&(block->lock), 0, file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
break;
default:
ut_ad(rw_latch == RW_X_LATCH);
- rw_lock_x_lock_func(&(block->lock), 0, file, line);
+ rw_lock_x_lock_inline(&(block->lock), 0, file, line);
fix_type = MTR_MEMO_PAGE_X_FIX;
break;
@@ -2857,7 +2906,7 @@ wait_until_unfixed:
mtr_memo_push(mtr, block, fix_type);
- if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) {
+ if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
/* In the case of a first access, try to apply linear
read-ahead */
@@ -2912,15 +2961,13 @@ buf_page_optimistic_get(
buf_block_buf_fix_inc(block, file, line);
- mutex_exit(&block->mutex);
+ access_time = buf_page_is_accessed(&block->page);
- /* Check if this is the first access to the page.
- We do a dirty read on purpose, to avoid mutex contention.
- This field is only used for heuristic purposes; it does not
- affect correctness. */
+ buf_page_set_accessed(&block->page);
- access_time = buf_page_is_accessed(&block->page);
- buf_page_set_accessed_make_young(&block->page, access_time);
+ mutex_exit(&block->mutex);
+
+ buf_page_make_young_if_needed(&block->page);
ut_ad(!ibuf_inside(mtr)
|| ibuf_page(buf_block_get_space(block),
@@ -2932,8 +2979,8 @@ buf_page_optimistic_get(
file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
} else {
- success = rw_lock_x_lock_func_nowait(&(block->lock),
- file, line);
+ success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
+ file, line);
fix_type = MTR_MEMO_PAGE_X_FIX;
}
@@ -2975,7 +3022,7 @@ buf_page_optimistic_get(
mutex_exit(&block->mutex);
#endif
- if (UNIV_UNLIKELY(!access_time)) {
+ if (!access_time) {
/* In the case of a first access, try to apply linear
read-ahead */
@@ -3038,24 +3085,14 @@ buf_page_get_known_nowait(
buf_block_buf_fix_inc(block, file, line);
+ buf_page_set_accessed(&block->page);
+
mutex_exit(&block->mutex);
buf_pool = buf_pool_from_block(block);
- if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
- buf_pool_mutex_enter(buf_pool);
- buf_LRU_make_block_young(&block->page);
- buf_pool_mutex_exit(buf_pool);
- } else if (!buf_page_is_accessed(&block->page)) {
- /* Above, we do a dirty read on purpose, to avoid
- mutex contention. The field buf_page_t::access_time
- is only used for heuristic purposes. Writes to the
- field must be protected by mutex, however. */
- ulint time_ms = ut_time_ms();
-
- buf_pool_mutex_enter(buf_pool);
- buf_page_set_accessed(&block->page, time_ms);
- buf_pool_mutex_exit(buf_pool);
+ if (mode == BUF_MAKE_YOUNG) {
+ buf_page_make_young_if_needed(&block->page);
}
ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
@@ -3065,8 +3102,8 @@ buf_page_get_known_nowait(
file, line);
fix_type = MTR_MEMO_PAGE_S_FIX;
} else {
- success = rw_lock_x_lock_func_nowait(&(block->lock),
- file, line);
+ success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
+ file, line);
fix_type = MTR_MEMO_PAGE_X_FIX;
}
@@ -3167,8 +3204,8 @@ buf_page_try_get_func(
S-latch. */
fix_type = MTR_MEMO_PAGE_X_FIX;
- success = rw_lock_x_lock_func_nowait(&block->lock,
- file, line);
+ success = rw_lock_x_lock_func_nowait_inline(&block->lock,
+ file, line);
}
if (!success) {
@@ -3234,6 +3271,7 @@ buf_page_init(
ulint offset, /*!< in: offset of the page within space
in units of a page */
ulint fold, /*!< in: buf_page_address_fold(space,offset) */
+ ulint zip_size,/*!< in: compressed page size, or 0 */
buf_block_t* block) /*!< in/out: block to init */
{
buf_page_t* hash_page;
@@ -3302,6 +3340,9 @@ buf_page_init(
ut_d(block->page.in_page_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
fold, &block->page);
+ if (zip_size) {
+ page_zip_set_size(&block->page.zip, zip_size);
+ }
}
/********************************************************************//**
@@ -3318,7 +3359,7 @@ UNIV_INTERN
buf_page_t*
buf_page_init_for_read(
/*===================*/
- ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+ dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size, or 0 */
@@ -3407,7 +3448,7 @@ err_exit:
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
- buf_page_init(buf_pool, space, offset, fold, block);
+ buf_page_init(buf_pool, space, offset, fold, zip_size, block);
rw_lock_x_unlock(hash_lock);
/* The block must be put to the LRU list, to the old blocks */
@@ -3426,8 +3467,6 @@ err_exit:
buf_page_set_io_fix(bpage, BUF_IO_READ);
if (zip_size) {
- page_zip_set_size(&block->page.zip, zip_size);
-
/* buf_pool->mutex may be released and
reacquired by buf_buddy_alloc(). Thus, we
must release block->mutex in order not to
@@ -3528,7 +3567,8 @@ err_exit:
rw_lock_x_unlock(hash_lock);
- /* The block must be put to the LRU list, to the old blocks */
+ /* The block must be put to the LRU list, to the old blocks.
+ The zip_size is already set into the page zip */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_LRU_insert_zip_clean(bpage);
@@ -3578,7 +3618,6 @@ buf_page_create(
buf_block_t* block;
ulint fold;
buf_block_t* free_block = NULL;
- ulint time_ms = ut_time_ms();
buf_pool_t* buf_pool = buf_pool_get(space, offset);
rw_lock_t* hash_lock;
@@ -3630,7 +3669,7 @@ buf_page_create(
mutex_enter(&block->mutex);
- buf_page_init(buf_pool, space, offset, fold, block);
+ buf_page_init(buf_pool, space, offset, fold, zip_size, block);
rw_lock_x_unlock(hash_lock);
@@ -3651,7 +3690,6 @@ buf_page_create(
buf_page_set_io_fix(&block->page, BUF_IO_READ);
rw_lock_x_lock(&block->lock);
- page_zip_set_size(&block->page.zip, zip_size);
mutex_exit(&block->mutex);
/* buf_pool->mutex may be released and reacquired by
buf_buddy_alloc(). Thus, we must release block->mutex
@@ -3675,12 +3713,12 @@ buf_page_create(
rw_lock_x_unlock(&block->lock);
}
- buf_page_set_accessed(&block->page, time_ms);
-
buf_pool_mutex_exit(buf_pool);
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+ buf_page_set_accessed(&block->page);
+
mutex_exit(&block->mutex);
/* Delete possible entries for the page from the insert buffer:
@@ -3849,6 +3887,8 @@ buf_mark_space_corrupt(
BUF_IO_READ);
}
+ mutex_exit(buf_page_get_mutex(bpage));
+
/* Find the table with specified space id, and mark it corrupted */
if (dict_set_corrupted_by_space(space)) {
buf_LRU_free_one_page(bpage);
@@ -3859,7 +3899,6 @@ buf_mark_space_corrupt(
ut_ad(buf_pool->n_pend_reads > 0);
buf_pool->n_pend_reads--;
- mutex_exit(buf_page_get_mutex(bpage));
buf_pool_mutex_exit(buf_pool);
return(ret);
@@ -3868,9 +3907,9 @@ buf_mark_space_corrupt(
/********************************************************************//**
Completes an asynchronous read or write request of a file page to or from
the buffer pool.
-@return TRUE if successful */
+@return true if successful */
UNIV_INTERN
-ibool
+bool
buf_page_io_complete(
/*=================*/
buf_page_t* bpage) /*!< in: pointer to the block in question */
@@ -3952,8 +3991,20 @@ buf_page_io_complete(
/* From version 3.23.38 up we store the page checksum
to the 4 first bytes of the page end lsn field */
- if (buf_page_is_corrupted(frame,
+ if (buf_page_is_corrupted(true, frame,
buf_page_get_zip_size(bpage))) {
+
+ /* Not a real corruption if it was triggered by
+ error injection */
+ DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
+ if (bpage->space > TRX_SYS_SPACE
+ && buf_mark_space_corrupt(bpage)) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Simulated page corruption");
+ return(true);
+ }
+ goto page_not_corrupt;
+ ;);
corrupt:
fprintf(stderr,
"InnoDB: Database page corruption on disk"
@@ -3997,7 +4048,7 @@ corrupt:
table as corrupted instead of crashing server */
if (bpage->space > TRX_SYS_SPACE
&& buf_mark_space_corrupt(bpage)) {
- return(FALSE);
+ return(false);
} else {
fputs("InnoDB: Ending processing"
" because of"
@@ -4008,6 +4059,9 @@ corrupt:
}
}
+ DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
+ page_not_corrupt: bpage = bpage; );
+
if (recv_recovery_is_on()) {
/* Pages must be uncompressed for crash recovery. */
ut_a(uncompressed);
@@ -4090,7 +4144,7 @@ corrupt:
mutex_exit(buf_page_get_mutex(bpage));
buf_pool_mutex_exit(buf_pool);
- return(TRUE);
+ return(true);
}
/*********************************************************************//**
@@ -5118,9 +5172,7 @@ void
buf_refresh_io_stats_all(void)
/*==========================*/
{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i);
@@ -5137,9 +5189,7 @@ ibool
buf_all_freed(void)
/*===============*/
{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i);
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index ad6ef7c4cef..fb853fe1543 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -25,16 +25,16 @@ Created 2011/12/19
#include "buf0dblwr.h"
+#ifdef UNIV_NONINL
+#include "buf0buf.ic"
+#endif
+
#include "buf0buf.h"
-#include "buf0lru.h"
-#include "buf0flu.h"
#include "buf0checksum.h"
#include "srv0start.h"
#include "srv0srv.h"
#include "page0zip.h"
#include "trx0sys.h"
-#include "page0page.h"
-#include "mtr0log.h"
#ifndef UNIV_HOTBACKUP
@@ -195,22 +195,20 @@ start_again:
return;
}
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Doublewrite buffer not found:"
- " creating new\n");
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Doublewrite buffer not found: creating new");
if (buf_pool_get_curr_size()
< ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+ FSP_EXTENT_SIZE / 2 + 100)
* UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite buffer:"
- " you must\n"
- "InnoDB: increase your buffer pool size.\n"
- "InnoDB: Cannot continue operation.\n");
- exit(1);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot create doublewrite buffer: you must "
+ "increase your buffer pool size. Cannot continue "
+ "operation.");
+
+ exit(EXIT_FAILURE);
}
block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
@@ -223,16 +221,15 @@ start_again:
buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
if (block2 == NULL) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite buffer:"
- " you must\n"
- "InnoDB: increase your tablespace size.\n"
- "InnoDB: Cannot continue operation.\n");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot create doublewrite buffer: you must "
+ "increase your tablespace size. "
+ "Cannot continue operation.");
/* We exit without committing the mtr to prevent
its modifications to the database getting to disk */
- exit(1);
+ exit(EXIT_FAILURE);
}
fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG;
@@ -243,15 +240,12 @@ start_again:
new_block = fseg_alloc_free_page(
fseg_header, prev_page_no + 1, FSP_UP, &mtr);
if (new_block == NULL) {
- fprintf(stderr,
- "InnoDB: Cannot create doublewrite"
- " buffer: you must\n"
- "InnoDB: increase your"
- " tablespace size.\n"
- "InnoDB: Cannot continue operation.\n"
- );
-
- exit(1);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot create doublewrite buffer: you must "
+ "increase your tablespace size. "
+ "Cannot continue operation.");
+
+ exit(EXIT_FAILURE);
}
/* We read the allocated pages to the buffer pool;
@@ -331,8 +325,7 @@ start_again:
/* Remove doublewrite pages from LRU */
buf_pool_invalidate();
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Doublewrite buffer created\n");
+ ib_logf(IB_LOG_LEVEL_INFO, "Doublewrite buffer created");
goto start_again;
}
@@ -391,7 +384,7 @@ buf_dblwr_init_or_restore_pages(
}
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
- != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
+ != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
/* We are upgrading from a version < 4.1.x to a version where
multiple tablespaces are supported. We must reset the space id
@@ -401,9 +394,8 @@ buf_dblwr_init_or_restore_pages(
reset_space_ids = TRUE;
- fprintf(stderr,
- "InnoDB: Resetting space id's in the"
- " doublewrite buffer\n");
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Resetting space id's in the doublewrite buffer");
}
/* Read the pages from the doublewrite buffer to memory */
@@ -459,12 +451,11 @@ buf_dblwr_init_or_restore_pages(
} else if (!fil_check_adress_in_tablespace(space_id,
page_no)) {
- fprintf(stderr,
- "InnoDB: Warning: a page in the"
- " doublewrite buffer is not within space\n"
- "InnoDB: bounds; space id %lu"
- " page number %lu, page %lu in"
- " doublewrite buf.\n",
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "A page in the doublewrite buffer is not "
+ "within space bounds; space id %lu "
+ "page number %lu, page %lu in "
+ "doublewrite buf.",
(ulong) space_id, (ulong) page_no, (ulong) i);
} else if (space_id == TRX_SYS_SPACE
@@ -489,8 +480,7 @@ buf_dblwr_init_or_restore_pages(
/* Check if the page is corrupt */
- if (UNIV_UNLIKELY
- (buf_page_is_corrupted(read_buf, zip_size))) {
+ if (buf_page_is_corrupted(true, read_buf, zip_size)) {
fprintf(stderr,
"InnoDB: Warning: database page"
@@ -501,7 +491,8 @@ buf_dblwr_init_or_restore_pages(
" the doublewrite buffer.\n",
(ulong) space_id, (ulong) page_no);
- if (buf_page_is_corrupted(page, zip_size)) {
+ if (buf_page_is_corrupted(true,
+ page, zip_size)) {
fprintf(stderr,
"InnoDB: Dump of the page:\n");
buf_page_print(
@@ -538,9 +529,10 @@ buf_dblwr_init_or_restore_pages(
zip_size, page_no, 0,
zip_size ? zip_size : UNIV_PAGE_SIZE,
page, NULL);
- fprintf(stderr,
- "InnoDB: Recovered the page from"
- " the doublewrite buffer.\n");
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Recovered the page from"
+ " the doublewrite buffer.");
}
}
@@ -595,6 +587,7 @@ buf_dblwr_update(void)
ut_ad(buf_dblwr->batch_running);
ut_ad(buf_dblwr->b_reserved > 0);
+ ut_ad(buf_dblwr->b_reserved <= buf_dblwr->first_free);
buf_dblwr->b_reserved--;
if (buf_dblwr->b_reserved == 0) {
@@ -705,23 +698,29 @@ static
void
buf_dblwr_write_block_to_datafile(
/*==============================*/
- const buf_block_t* block) /*!< in: block to write */
+ const buf_page_t* bpage) /*!< in: page to write */
{
- ut_a(block);
- ut_a(buf_page_in_file(&block->page));
+ ut_a(bpage);
+ ut_a(buf_page_in_file(bpage));
- if (block->page.zip.data) {
+ /* Increment the counter of I/O operations used
+ for selecting LRU policy. */
+ buf_LRU_stat_inc_io();
+
+ if (bpage->zip.data) {
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE, buf_page_get_space(&block->page),
- buf_page_get_zip_size(&block->page),
- buf_page_get_page_no(&block->page), 0,
- buf_page_get_zip_size(&block->page),
- (void*) block->page.zip.data,
- (void*) block);
-
- goto exit;
+ FALSE, buf_page_get_space(bpage),
+ buf_page_get_zip_size(bpage),
+ buf_page_get_page_no(bpage), 0,
+ buf_page_get_zip_size(bpage),
+ (void*) bpage->zip.data,
+ (void*) bpage);
+
+ return;
}
+
+ const buf_block_t* block = (buf_block_t*) bpage;
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
buf_dblwr_check_page_lsn(block->frame);
@@ -729,11 +728,6 @@ buf_dblwr_write_block_to_datafile(
FALSE, buf_block_get_space(block), 0,
buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
(void*) block->frame, (void*) block);
-
-exit:
- /* Increment the counter of I/O operations used
- for selecting LRU policy. */
- buf_LRU_stat_inc_io();
}
/********************************************************************//**
@@ -748,9 +742,8 @@ buf_dblwr_flush_buffered_writes(void)
/*=================================*/
{
byte* write_buf;
+ ulint first_free;
ulint len;
- ulint len2;
- ulint i;
if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
/* Sync the writes to the disk. */
@@ -782,10 +775,12 @@ try_again:
}
ut_a(!buf_dblwr->batch_running);
+ ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
/* Disallow anyone else to post to doublewrite buffer or to
start another batch of flushing. */
buf_dblwr->batch_running = TRUE;
+ first_free = buf_dblwr->first_free;
/* Now safe to release the mutex. Note that though no other
thread is allowed to post to the doublewrite batch flushing
@@ -795,7 +790,7 @@ try_again:
write_buf = buf_dblwr->write_buf;
- for (len2 = 0, i = 0;
+ for (ulint len2 = 0, i = 0;
i < buf_dblwr->first_free;
len2 += UNIV_PAGE_SIZE, i++) {
@@ -845,8 +840,8 @@ try_again:
flush:
/* increment the doublewrite flushed pages counter */
- srv_dblwr_pages_written += buf_dblwr->first_free;
- srv_dblwr_writes++;
+ srv_stats.dblwr_pages_written.add(buf_dblwr->first_free);
+ srv_stats.dblwr_writes.inc();
/* Now flush the doublewrite buffer data to disk */
fil_flush(TRX_SYS_SPACE);
@@ -855,11 +850,21 @@ flush:
and in recovery we will find them in the doublewrite buffer
blocks. Next do the writes to the intended positions. */
- for (i = 0; i < buf_dblwr->first_free; i++) {
- const buf_block_t* block = (buf_block_t*)
- buf_dblwr->buf_block_arr[i];
-
- buf_dblwr_write_block_to_datafile(block);
+ /* Up to this point first_free and buf_dblwr->first_free are
+ same because we have set the buf_dblwr->batch_running flag
+ disallowing any other thread to post any request but we
+ can't safely access buf_dblwr->first_free in the loop below.
+ This is so because it is possible that after we are done with
+ the last iteration and before we terminate the loop, the batch
+ gets finished in the IO helper thread and another thread posts
+ a new batch setting buf_dblwr->first_free to a higher value.
+ If this happens and we are using buf_dblwr->first_free in the
+ loop termination condition then we'll end up dispatching
+ the same block twice from two different threads. */
+ ut_ad(first_free == buf_dblwr->first_free);
+ for (ulint i = 0; i < first_free; i++) {
+ buf_dblwr_write_block_to_datafile(
+ buf_dblwr->buf_block_arr[i]);
}
/* Wake possible simulated aio thread to actually post the
@@ -935,6 +940,8 @@ try_again:
buf_dblwr->first_free++;
buf_dblwr->b_reserved++;
+ ut_ad(!buf_dblwr->batch_running);
+ ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
ut_ad(buf_dblwr->b_reserved <= srv_doublewrite_batch_size);
if (buf_dblwr->first_free == srv_doublewrite_batch_size) {
@@ -1065,7 +1072,7 @@ retry:
/* We know that the write has been flushed to disk now
and during recovery we will find it in the doublewrite buffer
blocks. Next do the write to the intended position. */
- buf_dblwr_write_block_to_datafile((buf_block_t*) bpage);
+ buf_dblwr_write_block_to_datafile(bpage);
/* Sync the writes to the disk. */
buf_flush_sync_datafiles();
@@ -1077,8 +1084,8 @@ retry:
buf_dblwr->in_use[i] = FALSE;
/* increment the doublewrite flushed pages counter */
- srv_dblwr_pages_written += buf_dblwr->first_free;
- srv_dblwr_writes++;
+ srv_stats.dblwr_pages_written.inc();
+ srv_stats.dblwr_writes.inc();
mutex_exit(&(buf_dblwr->mutex));
diff --git a/storage/innobase/buf/buf0dump.cc b/storage/innobase/buf/buf0dump.cc
index 27757241c3e..467f817a2d1 100644
--- a/storage/innobase/buf/buf0dump.cc
+++ b/storage/innobase/buf/buf0dump.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,14 +23,14 @@ Implements a buffer pool dump/load.
Created April 08, 2011 Vasil Dimov
*******************************************************/
+#include "univ.i"
+
#include <stdarg.h> /* va_* */
#include <string.h> /* strerror() */
-#include "univ.i"
-
#include "buf0buf.h" /* buf_pool_mutex_enter(), srv_buf_pool_instances */
#include "buf0dump.h"
-#include "db0err.h" /* enum db_err */
+#include "db0err.h"
#include "dict0dict.h" /* dict_operation_lock */
#include "os0file.h" /* OS_FILE_MAX_PATH */
#include "os0sync.h" /* os_event* */
@@ -40,7 +40,6 @@ Created April 08, 2011 Vasil Dimov
#include "sync0rw.h" /* rw_lock_s_lock() */
#include "ut0byte.h" /* ut_ull_create() */
#include "ut0sort.h" /* UT_SORT_FUNCTION_BODY */
-#include "buf0rea.h" /* buf_read_page_async() */
enum status_severity {
STATUS_INFO,
@@ -579,6 +578,8 @@ DECLARE_THREAD(buf_dump_thread)(
void* arg __attribute__((unused))) /*!< in: a dummy parameter
required by os_thread_create */
{
+ ut_ad(!srv_read_only_mode);
+
srv_buf_dump_thread_active = TRUE;
buf_dump_status(STATUS_INFO, "not started");
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 023ed766c62..542c1669667 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -25,6 +25,10 @@ Created 11/11/1995 Heikki Tuuri
#include "buf0flu.h"
+#ifdef UNIV_NONINL
+#include "buf0flu.ic"
+#endif
+
#include "buf0buf.h"
#include "buf0checksum.h"
#include "srv0start.h"
@@ -44,39 +48,6 @@ Created 11/11/1995 Heikki Tuuri
#include "srv0mon.h"
#include "mysql/plugin.h"
#include "mysql/service_thd_wait.h"
-#include "buf0dblwr.h"
-
-#ifdef UNIV_NONINL
-#include "buf0flu.ic"
-#endif
-
-/**********************************************************************
-These statistics are generated for heuristics used in estimating the
-rate at which we should flush the dirty blocks to avoid bursty IO
-activity. Note that the rate of flushing not only depends on how many
-dirty pages we have in the buffer pool but it is also a fucntion of
-how much redo the workload is generating and at what rate. */
-/* @{ */
-
-/** Number of intervals for which we keep the history of these stats.
-Each interval is 1 second, defined by the rate at which
-srv_error_monitor_thread() calls buf_flush_stat_update(). */
-#define BUF_FLUSH_STAT_N_INTERVAL 20
-
-/** Sampled values buf_flush_stat_cur.
-Not protected by any mutex. Updated by buf_flush_stat_update(). */
-static buf_flush_stat_t buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
-
-/** Cursor to buf_flush_stat_arr[]. Updated in a round-robin fashion. */
-static ulint buf_flush_stat_arr_ind;
-
-/** Values at start of the current interval. Reset by
-buf_flush_stat_update(). */
-static buf_flush_stat_t buf_flush_stat_cur;
-
-/** Running sum of past values of buf_flush_stat_cur.
-Updated by buf_flush_stat_update(). Not protected by any mutex. */
-static buf_flush_stat_t buf_flush_stat_sum;
/** Number of pages flushed through non flush_list flushes. */
static ulint buf_lru_flush_page_count = 0;
@@ -104,6 +75,22 @@ in thrashing. */
/* @} */
+/******************************************************************//**
+Increases flush_list size in bytes with zip_size for compressed page,
+UNIV_PAGE_SIZE for uncompressed page in inline function */
+static inline
+void
+incr_flush_list_size_in_bytes(
+/*==========================*/
+ buf_block_t* block, /*!< in: control block */
+ buf_pool_t* buf_pool) /*!< in: buffer pool instance */
+{
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
+ ulint zip_size = page_zip_get_size(&block->page.zip);
+ buf_pool->stat.flush_list_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+ ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size);
+}
+
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/******************************************************************//**
Validates the flush list.
@@ -333,6 +320,7 @@ buf_flush_insert_into_flush_list(
ut_d(block->page.in_flush_list = TRUE);
block->page.oldest_modification = lsn;
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+ incr_flush_list_size_in_bytes(block, buf_pool);
#ifdef UNIV_DEBUG_VALGRIND
{
@@ -437,7 +425,7 @@ buf_flush_insert_sorted_into_flush_list(
prev_b, &block->page);
}
- MONITOR_INC(MONITOR_PAGE_INFLUSH);
+ incr_flush_list_size_in_bytes(block, buf_pool);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low(buf_pool));
@@ -538,6 +526,7 @@ buf_flush_remove(
buf_page_t* bpage) /*!< in: pointer to the block in question */
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ ulint zip_size;
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
@@ -576,14 +565,15 @@ buf_flush_remove(
because we assert on in_flush_list in comparison function. */
ut_d(bpage->in_flush_list = FALSE);
+ zip_size = page_zip_get_size(&bpage->zip);
+ buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+
bpage->oldest_modification = 0;
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_skip(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- MONITOR_DEC(MONITOR_PAGE_INFLUSH);
-
buf_flush_list_mutex_exit(buf_pool);
}
@@ -606,7 +596,7 @@ buf_flush_relocate_on_flush_list(
buf_page_t* dpage) /*!< in/out: destination block */
{
buf_page_t* prev;
- buf_page_t* prev_b = NULL;
+ buf_page_t* prev_b = NULL;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
@@ -710,6 +700,27 @@ buf_flush_write_complete(
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
+Calculate the checksum of a page from compressed table and update the page. */
+UNIV_INTERN
+void
+buf_flush_update_zip_checksum(
+/*==========================*/
+ buf_frame_t* page, /*!< in/out: Page to update */
+ ulint zip_size, /*!< in: Compressed page size */
+ lsn_t lsn) /*!< in: Lsn to stamp on the page */
+{
+ ut_a(zip_size > 0);
+
+ ib_uint32_t checksum = page_zip_calc_checksum(
+ page, zip_size,
+ static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm));
+
+ mach_write_to_8(page + FIL_PAGE_LSN, lsn);
+ memset(page + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
+ mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
+}
+
+/********************************************************************//**
Initializes a page for writing to the tablespace. */
UNIV_INTERN
void
@@ -747,17 +758,10 @@ buf_flush_init_for_writing(
case FIL_PAGE_TYPE_ZBLOB:
case FIL_PAGE_TYPE_ZBLOB2:
case FIL_PAGE_INDEX:
- checksum = page_zip_calc_checksum(
- page_zip->data, zip_size,
- static_cast<srv_checksum_algorithm_t>(
- srv_checksum_algorithm));
-
- mach_write_to_8(page_zip->data
- + FIL_PAGE_LSN, newest_lsn);
- memset(page_zip->data + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
- mach_write_to_4(page_zip->data
- + FIL_PAGE_SPACE_OR_CHKSUM,
- checksum);
+
+ buf_flush_update_zip_checksum(
+ page_zip->data, zip_size, newest_lsn);
+
return;
}
@@ -865,7 +869,7 @@ buf_flush_write_block_low(
#endif
#ifdef UNIV_LOG_DEBUG
- static ibool univ_log_debug_warned;
+ static ibool univ_log_debug_warned;
#endif /* UNIV_LOG_DEBUG */
ut_ad(buf_page_in_file(bpage));
@@ -949,15 +953,15 @@ os_aio_simulated_wake_handler_threads after we have posted a batch of
writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
held upon entering this function, and they will be released by this
function. */
-static
+UNIV_INTERN
void
buf_flush_page(
/*===========*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
buf_page_t* bpage, /*!< in: buffer control block */
- enum buf_flush flush_type) /*!< in: type of flush */
+ buf_flush flush_type) /*!< in: type of flush */
{
- mutex_t* block_mutex;
+ ib_mutex_t* block_mutex;
ibool is_uncompressed;
ut_ad(flush_type < BUF_FLUSH_N_TYPES);
@@ -1091,6 +1095,56 @@ buf_flush_page_try(
}
# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/***********************************************************//**
+Check the page is in buffer pool and can be flushed.
+@return true if the page can be flushed. */
+static
+bool
+buf_flush_check_neighbor(
+/*=====================*/
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: page offset */
+ enum buf_flush flush_type) /*!< in: BUF_FLUSH_LRU or
+ BUF_FLUSH_LIST */
+{
+ buf_page_t* bpage;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
+ bool ret;
+
+ ut_ad(flush_type == BUF_FLUSH_LRU
+ || flush_type == BUF_FLUSH_LIST);
+
+ buf_pool_mutex_enter(buf_pool);
+
+ /* We only want to flush pages from this buffer pool. */
+ bpage = buf_page_hash_get(buf_pool, space, offset);
+
+ if (!bpage) {
+
+ buf_pool_mutex_exit(buf_pool);
+ return(false);
+ }
+
+ ut_a(buf_page_in_file(bpage));
+
+ /* We avoid flushing 'non-old' blocks in an LRU flush,
+ because the flushed blocks are soon freed */
+
+ ret = false;
+ if (flush_type != BUF_FLUSH_LRU || buf_page_is_old(bpage)) {
+ ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
+
+ mutex_enter(block_mutex);
+ if (buf_flush_ready_for_flush(bpage, flush_type)) {
+ ret = true;
+ }
+ mutex_exit(block_mutex);
+ }
+ buf_pool_mutex_exit(buf_pool);
+
+ return(ret);
+}
+
+/***********************************************************//**
Flushes to disk all flushable pages within the flush area.
@return number of pages flushed */
static
@@ -1115,7 +1169,7 @@ buf_flush_try_neighbors(
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN
- || !srv_flush_neighbors) {
+ || srv_flush_neighbors == 0) {
/* If there is little space or neighbor flushing is
not enabled then just flush the victim. */
low = offset;
@@ -1133,6 +1187,30 @@ buf_flush_try_neighbors(
low = (offset / buf_flush_area) * buf_flush_area;
high = (offset / buf_flush_area + 1) * buf_flush_area;
+
+ if (srv_flush_neighbors == 1) {
+ /* adjust 'low' and 'high' to limit
+ for contiguous dirty area */
+ if (offset > low) {
+ for (i = offset - 1;
+ i >= low
+ && buf_flush_check_neighbor(
+ space, i, flush_type);
+ i--) {
+ /* do nothing */
+ }
+ low = i + 1;
+ }
+
+ for (i = offset + 1;
+ i < high
+ && buf_flush_check_neighbor(
+ space, i, flush_type);
+ i++) {
+ /* do nothing */
+ }
+ high = i;
+ }
}
/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
@@ -1181,7 +1259,7 @@ buf_flush_try_neighbors(
if (flush_type != BUF_FLUSH_LRU
|| i == offset
|| buf_page_is_old(bpage)) {
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
@@ -1240,7 +1318,7 @@ buf_flush_page_and_try_neighbors(
ulint* count) /*!< in/out: number of pages
flushed */
{
- mutex_t* block_mutex;
+ ib_mutex_t* block_mutex;
ibool flushed = FALSE;
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
@@ -1374,7 +1452,7 @@ buf_flush_LRU_list_batch(
&& free_len < srv_LRU_scan_depth
&& lru_len > BUF_LRU_MIN_LEN) {
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
ibool evict;
mutex_enter(block_mutex);
@@ -1576,8 +1654,7 @@ NOTE 1: in the case of an LRU flush the calling thread may own latches to
pages: to avoid deadlocks, this function must be written so that it cannot
end up waiting for these latches! NOTE 2: in the case of a flush list flush,
the calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
+@return number of blocks for which the write request was queued */
static
ulint
buf_flush_batch(
@@ -1621,8 +1698,6 @@ buf_flush_batch(
buf_pool_mutex_exit(buf_pool);
- buf_dblwr_flush_buffered_writes();
-
#ifdef UNIV_DEBUG
if (buf_debug_prints && count > 0) {
fprintf(stderr, flush_type == BUF_FLUSH_LRU
@@ -1632,8 +1707,6 @@ buf_flush_batch(
}
#endif /* UNIV_DEBUG */
- srv_buf_pool_flushed += count;
-
return(count);
}
@@ -1659,14 +1732,7 @@ buf_flush_common(
}
#endif /* UNIV_DEBUG */
- srv_buf_pool_flushed += page_count;
-
- if (flush_type == BUF_FLUSH_LRU) {
- /* We keep track of all flushes happening as part of LRU
- flush. When estimating the desired rate at which flush_list
- should be flushed we factor in this value. */
- buf_lru_flush_page_count += page_count;
- }
+ srv_stats.buf_pool_flushed.add(page_count);
}
/******************************************************************//**
@@ -1750,7 +1816,7 @@ buf_flush_wait_batch_end(
}
} else {
thd_wait_begin(NULL, THD_WAIT_DISKIO);
- os_event_wait(buf_pool->no_flush[type]);
+ os_event_wait(buf_pool->no_flush[type]);
thd_wait_end(NULL);
}
}
@@ -1760,21 +1826,28 @@ This utility flushes dirty blocks from the end of the LRU list and also
puts replaceable clean pages from the end of the LRU list to the free
list.
NOTE: The calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
+@return true if a batch was queued successfully. false if another batch
+of same type was already running. */
static
-ulint
+bool
buf_flush_LRU(
/*==========*/
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
- ulint min_n) /*!< in: wished minimum mumber of blocks
+ ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
+ ulint* n_processed) /*!< out: the number of pages
+ which were processed is passed
+ back to caller. Ignored if NULL */
{
ulint page_count;
+ if (n_processed) {
+ *n_processed = 0;
+ }
+
if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
- return(ULINT_UNDEFINED);
+ return(false);
}
page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
@@ -1783,31 +1856,43 @@ buf_flush_LRU(
buf_flush_common(BUF_FLUSH_LRU, page_count);
- return(page_count);
+ if (n_processed) {
+ *n_processed = page_count;
+ }
+
+ return(true);
}
/*******************************************************************//**
This utility flushes dirty blocks from the end of the flush list of
all buffer pool instances.
NOTE: The calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
+@return true if a batch was queued successfully for each buffer pool
+instance. false if another batch of same type was already running in
+at least one of the buffer pool instance */
UNIV_INTERN
-ulint
+bool
buf_flush_list(
/*===========*/
ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
- lsn_t lsn_limit) /*!< in the case BUF_FLUSH_LIST all
+ lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
blocks whose oldest_modification is
smaller than this should be flushed
(if their number does not exceed
min_n), otherwise ignored */
+ ulint* n_processed) /*!< out: the number of pages
+ which were processed is passed
+ back to caller. Ignored if NULL */
+
{
ulint i;
- ulint total_page_count = 0;
- ibool skipped = FALSE;
+ bool success = true;
+
+ if (n_processed) {
+ *n_processed = 0;
+ }
if (min_n != ULINT_MAX) {
/* Ensure that flushing is spread evenly amongst the
@@ -1836,7 +1921,7 @@ buf_flush_list(
pools based on the assumption that it will
help in the retry which will follow the
failure. */
- skipped = TRUE;
+ success = false;
continue;
}
@@ -1848,7 +1933,9 @@ buf_flush_list(
buf_flush_common(BUF_FLUSH_LIST, page_count);
- total_page_count += page_count;
+ if (n_processed) {
+ *n_processed += page_count;
+ }
if (page_count) {
MONITOR_INC_VALUE_CUMULATIVE(
@@ -1859,8 +1946,7 @@ buf_flush_list(
}
}
- return(lsn_limit != LSN_MAX && skipped
- ? ULINT_UNDEFINED : total_page_count);
+ return(success);
}
/******************************************************************//**
@@ -1879,7 +1965,7 @@ buf_flush_single_page_from_LRU(
{
ulint scanned;
buf_page_t* bpage;
- mutex_t* block_mutex;
+ ib_mutex_t* block_mutex;
ibool freed;
ibool evict_zip;
@@ -1957,128 +2043,6 @@ buf_flush_single_page_from_LRU(
return(freed);
}
-/*********************************************************************
-Update the historical stats that we are collecting for flush rate
-heuristics at the end of each interval.
-Flush rate heuristic depends on (a) rate of redo log generation and
-(b) the rate at which LRU flush is happening. */
-UNIV_INTERN
-void
-buf_flush_stat_update(void)
-/*=======================*/
-{
- buf_flush_stat_t* item;
- lsn_t lsn_diff;
- lsn_t lsn;
- ulint n_flushed;
-
- lsn = log_get_lsn();
- if (buf_flush_stat_cur.redo == 0) {
- /* First time around. Just update the current LSN
- and return. */
- buf_flush_stat_cur.redo = lsn;
- return;
- }
-
- item = &buf_flush_stat_arr[buf_flush_stat_arr_ind];
-
- /* values for this interval */
- lsn_diff = lsn - buf_flush_stat_cur.redo;
- n_flushed = buf_lru_flush_page_count
- - buf_flush_stat_cur.n_flushed;
-
- /* add the current value and subtract the obsolete entry. */
- buf_flush_stat_sum.redo += lsn_diff - item->redo;
- buf_flush_stat_sum.n_flushed += n_flushed - item->n_flushed;
-
- /* put current entry in the array. */
- item->redo = lsn_diff;
- item->n_flushed = n_flushed;
-
- /* update the index */
- buf_flush_stat_arr_ind++;
- buf_flush_stat_arr_ind %= BUF_FLUSH_STAT_N_INTERVAL;
-
- /* reset the current entry. */
- buf_flush_stat_cur.redo = lsn;
- buf_flush_stat_cur.n_flushed = buf_lru_flush_page_count;
-}
-
-/*********************************************************************
-Determines the fraction of dirty pages that need to be flushed based
-on the speed at which we generate redo log. Note that if redo log
-is generated at a significant rate without corresponding increase
-in the number of dirty pages (for example, an in-memory workload)
-it can cause IO bursts of flushing. This function implements heuristics
-to avoid this burstiness.
-@return number of dirty pages to be flushed / second */
-static
-ulint
-buf_flush_get_desired_flush_rate(void)
-/*==================================*/
-{
- ulint i;
- lsn_t redo_avg;
- ulint n_dirty = 0;
- ib_uint64_t n_flush_req;
- ib_uint64_t lru_flush_avg;
- lsn_t lsn = log_get_lsn();
- lsn_t log_capacity = log_get_capacity();
-
- /* log_capacity should never be zero after the initialization
- of log subsystem. */
- ut_ad(log_capacity != 0);
-
- /* Get total number of dirty pages. It is OK to access
- flush_list without holding any mutex as we are using this
- only for heuristics. */
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
- n_dirty += UT_LIST_GET_LEN(buf_pool->flush_list);
- }
-
- /* An overflow can happen if we generate more than 2^32 bytes
- of redo in this interval i.e.: 4G of redo in 1 second. We can
- safely consider this as infinity because if we ever come close
- to 4G we'll start a synchronous flush of dirty pages. */
- /* redo_avg below is average at which redo is generated in
- past BUF_FLUSH_STAT_N_INTERVAL + redo generated in the current
- interval. */
- redo_avg = buf_flush_stat_sum.redo / BUF_FLUSH_STAT_N_INTERVAL
- + (lsn - buf_flush_stat_cur.redo);
-
- /* An overflow can happen possibly if we flush more than 2^32
- pages in BUF_FLUSH_STAT_N_INTERVAL. This is a very very
- unlikely scenario. Even when this happens it means that our
- flush rate will be off the mark. It won't affect correctness
- of any subsystem. */
- /* lru_flush_avg below is rate at which pages are flushed as
- part of LRU flush in past BUF_FLUSH_STAT_N_INTERVAL + the
- number of pages flushed in the current interval. */
- lru_flush_avg = buf_flush_stat_sum.n_flushed
- / BUF_FLUSH_STAT_N_INTERVAL
- + (buf_lru_flush_page_count
- - buf_flush_stat_cur.n_flushed);
-
- n_flush_req = (n_dirty * redo_avg) / log_capacity;
-
- /* The number of pages that we want to flush from the flush
- list is the difference between the required rate and the
- number of pages that we are historically flushing from the
- LRU list */
- if (n_flush_req <= lru_flush_avg) {
- return(0);
- } else {
- ib_uint64_t rate;
-
- rate = n_flush_req - lru_flush_avg;
-
- return((ulint) (rate < PCT_IO(100) ? rate : PCT_IO(100)));
- }
-}
-
/*********************************************************************//**
Clears up tail of the LRU lists:
* Put replaceable pages at the tail of LRU to the free list
@@ -2086,36 +2050,35 @@ Clears up tail of the LRU lists:
The depth to which we scan each buffer pool is controlled by dynamic
config parameter innodb_LRU_scan_depth.
@return total pages flushed */
-UNIV_INLINE
+UNIV_INTERN
ulint
-page_cleaner_flush_LRU_tail(void)
-/*=============================*/
+buf_flush_LRU_tail(void)
+/*====================*/
{
- ulint i;
- ulint j;
ulint total_flushed = 0;
- for (i = 0; i < srv_buf_pool_instances; i++) {
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool = buf_pool_from_array(i);
/* We divide LRU flush into smaller chunks because
there may be user threads waiting for the flush to
end in buf_LRU_get_free_block(). */
- for (j = 0;
+ for (ulint j = 0;
j < srv_LRU_scan_depth;
j += PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE) {
- ulint n_flushed = buf_flush_LRU(buf_pool,
- PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE);
+ ulint n_flushed = 0;
/* Currently page_cleaner is the only thread
that can trigger an LRU flush. It is possible
that a batch triggered during last iteration is
still running, */
- if (n_flushed != ULINT_UNDEFINED) {
- total_flushed += n_flushed;
- }
+ buf_flush_LRU(buf_pool,
+ PAGE_CLEANER_LRU_BATCH_CHUNK_SIZE,
+ &n_flushed);
+
+ total_flushed += n_flushed;
}
}
@@ -2132,14 +2095,12 @@ page_cleaner_flush_LRU_tail(void)
/*********************************************************************//**
Wait for any possible LRU flushes that are in progress to end. */
-UNIV_INLINE
+UNIV_INTERN
void
-page_cleaner_wait_LRU_flush(void)
-/*=============================*/
+buf_flush_wait_LRU_batch_end(void)
+/*==============================*/
{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i);
@@ -2166,22 +2127,87 @@ ulint
page_cleaner_do_flush_batch(
/*========================*/
ulint n_to_flush, /*!< in: number of pages that
- we should attempt to flush. If
- an lsn_limit is provided then
- this value will have no affect */
+ we should attempt to flush. */
lsn_t lsn_limit) /*!< in: LSN up to which flushing
must happen */
{
ulint n_flushed;
- ut_ad(n_to_flush == ULINT_MAX || lsn_limit == LSN_MAX);
+ buf_flush_list(n_to_flush, lsn_limit, &n_flushed);
+
+ return(n_flushed);
+}
- n_flushed = buf_flush_list(n_to_flush, lsn_limit);
- if (n_flushed == ULINT_UNDEFINED) {
- n_flushed = 0;
+/*********************************************************************//**
+Calculates if flushing is required based on number of dirty pages in
+the buffer pool.
+@return percent of io_capacity to flush to manage dirty page ratio */
+static
+ulint
+af_get_pct_for_dirty()
+/*==================*/
+{
+ ulint dirty_pct = buf_get_modified_ratio_pct();
+
+ ut_a(srv_max_dirty_pages_pct_lwm
+ <= srv_max_buf_pool_modified_pct);
+
+ if (srv_max_dirty_pages_pct_lwm == 0) {
+ /* The user has not set the option to preflush dirty
+ pages as we approach the high water mark. */
+ if (dirty_pct > srv_max_buf_pool_modified_pct) {
+ /* We have crossed the high water mark of dirty
+ pages In this case we start flushing at 100% of
+ innodb_io_capacity. */
+ return(100);
+ }
+ } else if (dirty_pct > srv_max_dirty_pages_pct_lwm) {
+ /* We should start flushing pages gradually. */
+ return((dirty_pct * 100)
+ / (srv_max_buf_pool_modified_pct + 1));
}
- return(n_flushed);
+ return(0);
+}
+
+/*********************************************************************//**
+Calculates if flushing is required based on redo generation rate.
+@return percent of io_capacity to flush to manage redo space */
+static
+ulint
+af_get_pct_for_lsn(
+/*===============*/
+ lsn_t age) /*!< in: current age of LSN. */
+{
+ lsn_t max_async_age;
+ lsn_t lsn_age_factor;
+ lsn_t af_lwm = (srv_adaptive_flushing_lwm
+ * log_get_capacity()) / 100;
+
+ if (age < af_lwm) {
+ /* No adaptive flushing. */
+ return(0);
+ }
+
+ max_async_age = log_get_max_modified_age_async();
+
+ if (age < max_async_age && !srv_adaptive_flushing) {
+ /* We have still not reached the max_async point and
+ the user has disabled adaptive flushing. */
+ return(0);
+ }
+
+ /* If we are here then we know that either:
+ 1) User has enabled adaptive flushing
+ 2) User may have disabled adaptive flushing but we have reached
+ max_async_age. */
+ lsn_age_factor = (age * 100) / max_async_age;
+
+ ut_ad(srv_max_io_capacity >= srv_io_capacity);
+ return(static_cast<ulint>(
+ ((srv_max_io_capacity / srv_io_capacity)
+ * (lsn_age_factor * sqrt((double)lsn_age_factor)))
+ / 7.5));
}
/*********************************************************************//**
@@ -2195,78 +2221,103 @@ ulint
page_cleaner_flush_pages_if_needed(void)
/*====================================*/
{
- ulint n_pages_flushed = 0;
- lsn_t lsn_limit = log_async_flush_lsn();
+ static lsn_t lsn_avg_rate = 0;
+ static lsn_t prev_lsn = 0;
+ static lsn_t last_lsn = 0;
+ static ulint sum_pages = 0;
+ static ulint last_pages = 0;
+ static ulint prev_pages = 0;
+ static ulint avg_page_rate = 0;
+ static ulint n_iterations = 0;
+ lsn_t oldest_lsn;
+ lsn_t cur_lsn;
+ lsn_t age;
+ lsn_t lsn_rate;
+ ulint n_pages = 0;
+ ulint pct_for_dirty = 0;
+ ulint pct_for_lsn = 0;
+ ulint pct_total = 0;
+ int age_factor = 0;
+
+ cur_lsn = log_get_lsn();
+
+ if (prev_lsn == 0) {
+ /* First time around. */
+ prev_lsn = cur_lsn;
+ return(0);
+ }
- /* Currently we decide whether or not to flush and how much to
- flush based on three factors.
+ if (prev_lsn == cur_lsn) {
+ return(0);
+ }
- 1) If the amount of LSN for which pages are not flushed to disk
- yet is greater than log_sys->max_modified_age_async. This is
- the most urgent type of flush and we attempt to cleanup enough
- of the tail of the flush_list to avoid flushing inside user
- threads.
+ /* We update our variables every srv_flushing_avg_loops
+ iterations to smooth out transition in workload. */
+ if (++n_iterations >= srv_flushing_avg_loops) {
- 2) If modified page ratio is greater than the one specified by
- the user. In that case we flush full 100% IO_CAPACITY of the
- server. Note that 1 and 2 are not mutually exclusive. We can
- end up executing both steps.
+ avg_page_rate = ((sum_pages / srv_flushing_avg_loops)
+ + avg_page_rate) / 2;
- 3) If adaptive_flushing is set by the user and neither of 1
- or 2 has occurred above then we flush a batch based on our
- heuristics. */
+ /* How much LSN we have generated since last call. */
+ lsn_rate = (cur_lsn - prev_lsn) / srv_flushing_avg_loops;
- if (lsn_limit != LSN_MAX) {
+ lsn_avg_rate = (lsn_avg_rate + lsn_rate) / 2;
- /* async flushing is requested */
- n_pages_flushed = page_cleaner_do_flush_batch(ULINT_MAX,
- lsn_limit);
+ prev_lsn = cur_lsn;
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_ASYNC_TOTAL_PAGE,
- MONITOR_FLUSH_ASYNC_COUNT,
- MONITOR_FLUSH_ASYNC_PAGES,
- n_pages_flushed);
+ n_iterations = 0;
+
+ sum_pages = 0;
}
- if (UNIV_UNLIKELY(n_pages_flushed < PCT_IO(100)
- && buf_get_modified_ratio_pct()
- > srv_max_buf_pool_modified_pct)) {
+ oldest_lsn = buf_pool_get_oldest_modification();
- /* Try to keep the number of modified pages in the
- buffer pool under the limit wished by the user */
+ ut_ad(oldest_lsn <= cur_lsn);
- n_pages_flushed += page_cleaner_do_flush_batch(PCT_IO(100),
- LSN_MAX);
+ age = cur_lsn - oldest_lsn;
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE,
- MONITOR_FLUSH_MAX_DIRTY_COUNT,
- MONITOR_FLUSH_MAX_DIRTY_PAGES,
- n_pages_flushed);
+ pct_for_dirty = af_get_pct_for_dirty();
+ pct_for_lsn = af_get_pct_for_lsn(age);
+
+ pct_total = ut_max(pct_for_dirty, pct_for_lsn);
+
+ /* Cap the maximum IO capacity that we are going to use by
+ max_io_capacity. */
+ n_pages = (PCT_IO(pct_total) + avg_page_rate) / 2;
+
+ if (n_pages > srv_max_io_capacity) {
+ n_pages = srv_max_io_capacity;
}
- if (srv_adaptive_flushing && n_pages_flushed == 0) {
+ if (last_pages && cur_lsn - last_lsn > lsn_avg_rate / 2) {
+ age_factor = prev_pages / last_pages;
+ }
- /* Try to keep the rate of flushing of dirty
- pages such that redo log generation does not
- produce bursts of IO at checkpoint time. */
- ulint n_flush = buf_flush_get_desired_flush_rate();
+ MONITOR_SET(MONITOR_FLUSH_N_TO_FLUSH_REQUESTED, n_pages);
- ut_ad(n_flush <= PCT_IO(100));
- if (n_flush) {
- n_pages_flushed = page_cleaner_do_flush_batch(
- n_flush, LSN_MAX);
+ prev_pages = n_pages;
+ n_pages = page_cleaner_do_flush_batch(
+ n_pages, oldest_lsn + lsn_avg_rate * (age_factor + 1));
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
- MONITOR_FLUSH_ADAPTIVE_COUNT,
- MONITOR_FLUSH_ADAPTIVE_PAGES,
- n_pages_flushed);
- }
+ last_lsn= cur_lsn;
+ last_pages= n_pages + 1;
+
+ MONITOR_SET(MONITOR_FLUSH_AVG_PAGE_RATE, avg_page_rate);
+ MONITOR_SET(MONITOR_FLUSH_LSN_AVG_RATE, lsn_avg_rate);
+ MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty);
+ MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn);
+
+ if (n_pages) {
+ MONITOR_INC_VALUE_CUMULATIVE(
+ MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+ MONITOR_FLUSH_ADAPTIVE_COUNT,
+ MONITOR_FLUSH_ADAPTIVE_PAGES,
+ n_pages);
+
+ sum_pages += n_pages;
}
- return(n_pages_flushed);
+ return(n_pages);
}
/*********************************************************************//**
@@ -2306,7 +2357,8 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
ulint next_loop_time = ut_time_ms() + 1000;
ulint n_flushed = 0;
ulint last_activity = srv_get_activity_count();
- ulint i;
+
+ ut_ad(!srv_read_only_mode);
#ifdef UNIV_PFS_THREAD
pfs_register_thread(buf_page_cleaner_thread_key);
@@ -2336,7 +2388,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
last_activity = srv_get_activity_count();
/* Flush pages from end of LRU if required */
- n_flushed = page_cleaner_flush_LRU_tail();
+ n_flushed = buf_flush_LRU_tail();
/* Flush pages from flush_list if required */
n_flushed += page_cleaner_flush_pages_if_needed();
@@ -2396,19 +2448,21 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
sweep and we'll come out of the loop leaving behind dirty pages
in the flush_list */
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
- page_cleaner_wait_LRU_flush();
+ buf_flush_wait_LRU_batch_end();
+
+ bool success;
do {
- n_flushed = buf_flush_list(PCT_IO(100), LSN_MAX);
+ success = buf_flush_list(PCT_IO(100), LSN_MAX, &n_flushed);
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
- } while (n_flushed > 0);
+ } while (!success || n_flushed > 0);
/* Some sanity checks */
ut_a(srv_get_active_thread_type() == SRV_NONE);
ut_a(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE);
- for (i = 0; i < srv_buf_pool_instances; i++) {
+ for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool = buf_pool_from_array(i);
ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == 0);
}
@@ -2521,3 +2575,66 @@ buf_flush_validate(
}
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#endif /* !UNIV_HOTBACKUP */
+
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush
+list in a particular buffer pool.
+@return number of dirty pages present in a single buffer pool */
+UNIV_INTERN
+ulint
+buf_pool_get_dirty_pages_count(
+/*===========================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool */
+ ulint id) /*!< in: space id to check */
+
+{
+ ulint count = 0;
+
+ buf_pool_mutex_enter(buf_pool);
+ buf_flush_list_mutex_enter(buf_pool);
+
+ buf_page_t* bpage;
+
+ for (bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
+ bpage != 0;
+ bpage = UT_LIST_GET_NEXT(list, bpage)) {
+
+ ut_ad(buf_page_in_file(bpage));
+ ut_ad(bpage->in_flush_list);
+ ut_ad(bpage->oldest_modification > 0);
+
+ if (buf_page_get_space(bpage) == id) {
+ ++count;
+ }
+ }
+
+ buf_flush_list_mutex_exit(buf_pool);
+ buf_pool_mutex_exit(buf_pool);
+
+ return(count);
+}
+
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush list.
+@return number of dirty pages present in all the buffer pools */
+UNIV_INTERN
+ulint
+buf_flush_get_dirty_pages_count(
+/*============================*/
+ ulint id) /*!< in: space id to check */
+
+{
+ ulint count = 0;
+
+ for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
+
+ count += buf_pool_get_dirty_pages_count(buf_pool, id);
+ }
+
+ return(count);
+}
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index c35d84cb985..270263d95f1 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -51,6 +51,9 @@ Created 11/5/1995 Heikki Tuuri
#include "log0recv.h"
#include "srv0srv.h"
#include "srv0mon.h"
+#include "lock0lock.h"
+
+#include "ha_prototypes.h"
/** The number of blocks from the LRU_old pointer onward, including
the block pointed to, must be buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
@@ -158,6 +161,22 @@ buf_LRU_block_free_hashed_page(
be in a state where it can be freed */
/******************************************************************//**
+Increases LRU size in bytes with zip_size for compressed page,
+UNIV_PAGE_SIZE for uncompressed page in inline function */
+static inline
+void
+incr_LRU_size_in_bytes(
+/*===================*/
+ buf_page_t* bpage, /*!< in: control block */
+ buf_pool_t* buf_pool) /*!< in: buffer pool instance */
+{
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ulint zip_size = page_zip_get_size(&bpage->zip);
+ buf_pool->stat.LRU_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+ ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size);
+}
+
+/******************************************************************//**
Determines if the unzip_LRU list should be used for evicting a victim
instead of the general LRU list.
@return TRUE if should use unzip_LRU */
@@ -342,39 +361,338 @@ next_page:
}
/******************************************************************//**
+While flushing (or removing dirty) pages from a tablespace we don't
+want to hog the CPU and resources. Release the buffer pool and block
+mutex and try to force a context switch. Then reacquire the same mutexes.
+The current page is "fixed" before the release of the mutexes and then
+"unfixed" again once we have reacquired the mutexes. */
+static __attribute__((nonnull))
+void
+buf_flush_yield(
+/*============*/
+ buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
+ buf_page_t* bpage) /*!< in/out: current page */
+{
+ ib_mutex_t* block_mutex;
+
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(buf_page_in_file(bpage));
+
+ block_mutex = buf_page_get_mutex(bpage);
+
+ mutex_enter(block_mutex);
+ /* "Fix" the block so that the position cannot be
+ changed after we release the buffer pool and
+ block mutexes. */
+ buf_page_set_sticky(bpage);
+
+ /* Now it is safe to release the buf_pool->mutex. */
+ buf_pool_mutex_exit(buf_pool);
+
+ mutex_exit(block_mutex);
+ /* Try and force a context switch. */
+ os_thread_yield();
+
+ buf_pool_mutex_enter(buf_pool);
+
+ mutex_enter(block_mutex);
+ /* "Unfix" the block now that we have both the
+ buffer pool and block mutex again. */
+ buf_page_unset_sticky(bpage);
+ mutex_exit(block_mutex);
+}
+
+/******************************************************************//**
+If we have hogged the resources for too long then release the buffer
+pool and flush list mutex and do a thread yield. Set the current page
+to "sticky" so that it is not relocated during the yield.
+@return true if yielded */
+static __attribute__((nonnull(1), warn_unused_result))
+bool
+buf_flush_try_yield(
+/*================*/
+ buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
+ buf_page_t* bpage, /*!< in/out: bpage to remove */
+ ulint processed) /*!< in: number of pages processed */
+{
+ /* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the
+ loop we release buf_pool->mutex to let other threads
+ do their job but only if the block is not IO fixed. This
+ ensures that the block stays in its position in the
+ flush_list. */
+
+ if (bpage != NULL
+ && processed >= BUF_LRU_DROP_SEARCH_SIZE
+ && buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
+
+ buf_flush_list_mutex_exit(buf_pool);
+
+ /* Release the buffer pool and block mutex
+ to give the other threads a go. */
+
+ buf_flush_yield(buf_pool, bpage);
+
+ buf_flush_list_mutex_enter(buf_pool);
+
+ /* Should not have been removed from the flush
+ list during the yield. However, this check is
+ not sufficient to catch a remove -> add. */
+
+ ut_ad(bpage->in_flush_list);
+
+ return(true);
+ }
+
+ return(false);
+}
+
+/******************************************************************//**
+Removes a single page from a given tablespace inside a specific
+buffer pool instance.
+@return true if page was removed. */
+static __attribute__((nonnull, warn_unused_result))
+bool
+buf_flush_or_remove_page(
+/*=====================*/
+ buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
+ buf_page_t* bpage, /*!< in/out: bpage to remove */
+ bool flush) /*!< in: flush to disk if true but
+ don't remove else remove without
+ flushing to disk */
+{
+ ib_mutex_t* block_mutex;
+ bool processed = false;
+
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
+
+ block_mutex = buf_page_get_mutex(bpage);
+
+ /* bpage->space and bpage->io_fix are protected by
+ buf_pool->mutex and block_mutex. It is safe to check
+ them while holding buf_pool->mutex only. */
+
+ if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+
+ /* We cannot remove this page during this scan
+ yet; maybe the system is currently reading it
+ in, or flushing the modifications to the file */
+
+ } else {
+
+ /* We have to release the flush_list_mutex to obey the
+ latching order. We are however guaranteed that the page
+ will stay in the flush_list because buf_flush_remove()
+ needs buf_pool->mutex as well (for the non-flush case). */
+
+ buf_flush_list_mutex_exit(buf_pool);
+
+ mutex_enter(block_mutex);
+
+ ut_ad(bpage->oldest_modification != 0);
+
+ if (bpage->buf_fix_count > 0) {
+
+ mutex_exit(block_mutex);
+
+ /* We cannot remove this page yet;
+ maybe the system is currently reading
+ it in, or flushing the modifications
+ to the file */
+
+ } else if (!flush) {
+
+ buf_flush_remove(bpage);
+
+ mutex_exit(block_mutex);
+
+ processed = true;
+
+ } else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+
+ /* Check the status again after releasing the flush
+ list mutex and acquiring the block mutex. The background
+ flush thread may be in the process of flushing this
+ page when we released the flush list mutex. */
+
+ /* The following call will release the buffer pool
+ and block mutex. */
+ buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE);
+
+ /* Wake possible simulated aio thread to actually
+ post the writes to the operating system */
+ os_aio_simulated_wake_handler_threads();
+
+ buf_pool_mutex_enter(buf_pool);
+
+ processed = true;
+ } else {
+ mutex_exit(block_mutex);
+ }
+
+ buf_flush_list_mutex_enter(buf_pool);
+ }
+
+ ut_ad(!mutex_own(block_mutex));
+
+ return(processed);
+}
+
+/******************************************************************//**
Remove all dirty pages belonging to a given tablespace inside a specific
buffer pool instance when we are deleting the data file(s) of that
tablespace. The pages still remain a part of LRU and are evicted from
-the list as they age towards the tail of the LRU. */
-static
+the list as they age towards the tail of the LRU.
+@retval DB_SUCCESS if all freed
+@retval DB_FAIL if not all freed
+@retval DB_INTERRUPTED if the transaction was interrupted */
+static __attribute__((nonnull(1), warn_unused_result))
+dberr_t
+buf_flush_or_remove_pages(
+/*======================*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ ulint id, /*!< in: target space id for which
+ to remove or flush pages */
+ bool flush, /*!< in: flush to disk if true but
+ don't remove else remove without
+ flushing to disk */
+ const trx_t* trx) /*!< to check if the operation must
+ be interrupted, can be 0 */
+{
+ buf_page_t* prev;
+ buf_page_t* bpage;
+ ulint processed = 0;
+ bool all_freed = true;
+
+ buf_flush_list_mutex_enter(buf_pool);
+
+ for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
+ bpage != NULL;
+ bpage = prev) {
+
+ ut_a(buf_page_in_file(bpage));
+
+ /* Save the previous link because once we free the
+ page we can't rely on the links. */
+
+ prev = UT_LIST_GET_PREV(list, bpage);
+
+ if (buf_page_get_space(bpage) != id) {
+
+ /* Skip this block, as it does not belong to
+ the target space. */
+
+ } else if (!buf_flush_or_remove_page(buf_pool, bpage, flush)) {
+
+ /* Remove was unsuccessful, we have to try again
+ by scanning the entire list from the end. */
+
+ all_freed = false;
+ }
+
+ ++processed;
+
+ /* Yield if we have hogged the CPU and mutexes for too long. */
+ if (buf_flush_try_yield(buf_pool, prev, processed)) {
+
+ /* Reset the batch size counter if we had to yield. */
+
+ processed = 0;
+ }
+
+#ifdef DBUG_OFF
+ if (flush) {
+ DBUG_EXECUTE_IF("ib_export_flush_crash",
+ static ulint n_pages;
+ if (++n_pages == 4) {DBUG_SUICIDE();});
+ }
+#endif /* DBUG_OFF */
+
+ /* The check for trx is interrupted is expensive, we want
+ to check every N iterations. */
+ if (!processed && trx && trx_is_interrupted(trx)) {
+ buf_flush_list_mutex_exit(buf_pool);
+ return(DB_INTERRUPTED);
+ }
+ }
+
+ buf_flush_list_mutex_exit(buf_pool);
+
+ return(all_freed ? DB_SUCCESS : DB_FAIL);
+}
+
+/******************************************************************//**
+Remove or flush all the dirty pages that belong to a given tablespace
+inside a specific buffer pool instance. The pages will remain in the LRU
+list and will be evicted from the LRU list as they age and move towards
+the tail of the LRU list. */
+static __attribute__((nonnull(1)))
void
-buf_LRU_remove_dirty_pages_for_tablespace(
-/*======================================*/
+buf_flush_dirty_pages(
+/*==================*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ ulint id, /*!< in: space id */
+ bool flush, /*!< in: flush to disk if true otherwise
+ remove the pages without flushing */
+ const trx_t* trx) /*!< to check if the operation must
+ be interrupted */
+{
+ dberr_t err;
+
+ do {
+ buf_pool_mutex_enter(buf_pool);
+
+ err = buf_flush_or_remove_pages(buf_pool, id, flush, trx);
+
+ buf_pool_mutex_exit(buf_pool);
+
+ ut_ad(buf_flush_validate(buf_pool));
+
+ if (err == DB_FAIL) {
+ os_thread_sleep(20000);
+ }
+
+ /* DB_FAIL is a soft error, it means that the task wasn't
+ completed, needs to be retried. */
+
+ ut_ad(buf_flush_validate(buf_pool));
+
+ } while (err == DB_FAIL);
+}
+
+/******************************************************************//**
+Remove all pages that belong to a given tablespace inside a specific
+buffer pool instance when we are DISCARDing the tablespace. */
+static __attribute__((nonnull))
+void
+buf_LRU_remove_all_pages(
+/*=====================*/
buf_pool_t* buf_pool, /*!< buffer pool instance */
ulint id) /*!< in: space id */
{
buf_page_t* bpage;
ibool all_freed;
- ulint i;
scan_again:
buf_pool_mutex_enter(buf_pool);
- buf_flush_list_mutex_enter(buf_pool);
all_freed = TRUE;
- for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list), i = 0;
- bpage != NULL; ++i) {
+ for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
+ bpage != NULL;
+ /* No op */) {
+ rw_lock_t* hash_lock;
buf_page_t* prev_bpage;
- mutex_t* block_mutex = NULL;
+ ib_mutex_t* block_mutex = NULL;
ut_a(buf_page_in_file(bpage));
+ ut_ad(bpage->in_LRU_list);
- prev_bpage = UT_LIST_GET_PREV(list, bpage);
+ prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
/* bpage->space and bpage->io_fix are protected by
- buf_pool->mutex and block_mutex. It is safe to check
+ buf_pool->mutex and the block_mutex. It is safe to check
them while holding buf_pool->mutex only. */
if (buf_page_get_space(bpage) != id) {
@@ -388,83 +706,103 @@ scan_again:
all_freed = FALSE;
goto next_page;
- }
+ } else {
+ ulint fold = buf_page_address_fold(
+ bpage->space, bpage->offset);
- /* We have to release the flush_list_mutex to obey the
- latching order. We are however guaranteed that the page
- will stay in the flush_list because buf_flush_remove()
- needs buf_pool->mutex as well. */
- buf_flush_list_mutex_exit(buf_pool);
- block_mutex = buf_page_get_mutex(bpage);
- mutex_enter(block_mutex);
+ hash_lock = buf_page_hash_lock_get(buf_pool, fold);
- if (bpage->buf_fix_count > 0) {
- mutex_exit(block_mutex);
- buf_flush_list_mutex_enter(buf_pool);
+ rw_lock_x_lock(hash_lock);
- /* We cannot remove this page during
- this scan yet; maybe the system is
- currently reading it in, or flushing
- the modifications to the file */
+ block_mutex = buf_page_get_mutex(bpage);
+ mutex_enter(block_mutex);
- all_freed = FALSE;
- goto next_page;
- }
+ if (bpage->buf_fix_count > 0) {
- ut_ad(bpage->oldest_modification != 0);
+ mutex_exit(block_mutex);
- buf_flush_remove(bpage);
+ rw_lock_x_unlock(hash_lock);
- mutex_exit(block_mutex);
- buf_flush_list_mutex_enter(buf_pool);
-next_page:
- bpage = prev_bpage;
+ /* We cannot remove this page during
+ this scan yet; maybe the system is
+ currently reading it in, or flushing
+ the modifications to the file */
- if (!bpage) {
- break;
+ all_freed = FALSE;
+
+ goto next_page;
+ }
}
- /* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the
- loop we release buf_pool->mutex to let other threads
- do their job. */
- if (i < BUF_LRU_DROP_SEARCH_SIZE) {
- continue;
+ ut_ad(mutex_own(block_mutex));
+
+#ifdef UNIV_DEBUG
+ if (buf_debug_prints) {
+ fprintf(stderr,
+ "Dropping space %lu page %lu\n",
+ (ulong) buf_page_get_space(bpage),
+ (ulong) buf_page_get_page_no(bpage));
}
+#endif
+ if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+ /* Do nothing, because the adaptive hash index
+ covers uncompressed pages only. */
+ } else if (((buf_block_t*) bpage)->index) {
+ ulint page_no;
+ ulint zip_size;
- /* We IO-fix the block to make sure that the block
- stays in its position in the flush_list. */
- if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
- /* Block is already IO-fixed. We don't
- want to change the value. Lets leave
- this block alone. */
- continue;
+ buf_pool_mutex_exit(buf_pool);
+
+ zip_size = buf_page_get_zip_size(bpage);
+ page_no = buf_page_get_page_no(bpage);
+
+ rw_lock_x_unlock(hash_lock);
+
+ mutex_exit(block_mutex);
+
+ /* Note that the following call will acquire
+ and release block->lock X-latch. */
+
+ btr_search_drop_page_hash_when_freed(
+ id, zip_size, page_no);
+
+ goto scan_again;
}
- buf_flush_list_mutex_exit(buf_pool);
- block_mutex = buf_page_get_mutex(bpage);
- mutex_enter(block_mutex);
- buf_page_set_sticky(bpage);
- mutex_exit(block_mutex);
+ if (bpage->oldest_modification != 0) {
- /* Now it is safe to release the buf_pool->mutex. */
- buf_pool_mutex_exit(buf_pool);
- os_thread_yield();
- buf_pool_mutex_enter(buf_pool);
+ buf_flush_remove(bpage);
+ }
- mutex_enter(block_mutex);
- buf_page_unset_sticky(bpage);
- mutex_exit(block_mutex);
+ ut_ad(!bpage->in_flush_list);
- buf_flush_list_mutex_enter(buf_pool);
- ut_ad(bpage->in_flush_list);
+ /* Remove from the LRU list. */
- i = 0;
+ if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
+ != BUF_BLOCK_ZIP_FREE) {
+
+ buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
+
+ } else {
+ /* The block_mutex should have been released
+ by buf_LRU_block_remove_hashed_page() when it
+ returns BUF_BLOCK_ZIP_FREE. */
+ ut_ad(block_mutex == &buf_pool->zip_mutex);
+ }
+
+ ut_ad(!mutex_own(block_mutex));
+
+#ifdef UNIV_SYNC_DEBUG
+ /* buf_LRU_block_remove_hashed_page() releases the hash_lock */
+ ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
+ ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+
+next_page:
+ bpage = prev_bpage;
}
buf_pool_mutex_exit(buf_pool);
- buf_flush_list_mutex_exit(buf_pool);
-
- ut_ad(buf_flush_validate(buf_pool));
if (!all_freed) {
os_thread_sleep(20000);
@@ -474,15 +812,60 @@ next_page:
}
/******************************************************************//**
-Invalidates all pages belonging to a given tablespace when we are deleting
-the data file(s) of that tablespace. */
+Remove pages belonging to a given tablespace inside a specific
+buffer pool instance when we are deleting the data file(s) of that
+tablespace. The pages still remain a part of LRU and are evicted from
+the list as they age towards the tail of the LRU only if buf_remove
+is BUF_REMOVE_FLUSH_NO_WRITE. */
+static __attribute__((nonnull(1)))
+void
+buf_LRU_remove_pages(
+/*=================*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ ulint id, /*!< in: space id */
+ buf_remove_t buf_remove, /*!< in: remove or flush strategy */
+ const trx_t* trx) /*!< to check if the operation must
+ be interrupted */
+{
+ switch (buf_remove) {
+ case BUF_REMOVE_ALL_NO_WRITE:
+ buf_LRU_remove_all_pages(buf_pool, id);
+ break;
+
+ case BUF_REMOVE_FLUSH_NO_WRITE:
+ ut_a(trx == 0);
+ buf_flush_dirty_pages(buf_pool, id, false, NULL);
+ ut_ad(trx_is_interrupted(trx)
+ || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
+ break;
+
+ case BUF_REMOVE_FLUSH_WRITE:
+ ut_a(trx != 0);
+ buf_flush_dirty_pages(buf_pool, id, true, trx);
+ ut_ad(trx_is_interrupted(trx)
+ || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
+ /* Ensure that all asynchronous IO is completed. */
+ os_aio_wait_until_no_pending_writes();
+ fil_flush(id);
+ break;
+ }
+}
+
+/******************************************************************//**
+Flushes all dirty pages or removes all pages belonging
+to a given tablespace. A PROBLEM: if readahead is being started, what
+guarantees that it will not try to read in pages after this operation
+has completed? */
UNIV_INTERN
void
-buf_LRU_invalidate_tablespace(
+buf_LRU_flush_or_remove_pages(
/*==========================*/
- ulint id) /*!< in: space id */
+ ulint id, /*!< in: space id */
+ buf_remove_t buf_remove, /*!< in: remove or flush strategy */
+ const trx_t* trx) /*!< to check if the operation must
+ be interrupted */
{
- ulint i;
+ ulint i;
/* Before we attempt to drop pages one by one we first
attempt to drop page hash index entries in batches to make
@@ -494,9 +877,28 @@ buf_LRU_invalidate_tablespace(
buf_pool_t* buf_pool;
buf_pool = buf_pool_from_array(i);
- buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
- buf_LRU_remove_dirty_pages_for_tablespace(buf_pool, id);
+
+ switch (buf_remove) {
+ case BUF_REMOVE_ALL_NO_WRITE:
+ case BUF_REMOVE_FLUSH_NO_WRITE:
+ buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
+ break;
+
+ case BUF_REMOVE_FLUSH_WRITE:
+ /* We allow read-only queries against the
+ table, there is no need to drop the AHI entries. */
+ break;
+ }
+
+ buf_LRU_remove_pages(buf_pool, id, buf_remove, trx);
}
+
+#ifdef UNIV_DEBUG
+ if (trx != 0 && id != 0) {
+ ut_ad(trx_is_interrupted(trx)
+ || buf_flush_get_dirty_pages_count(id) == 0);
+ }
+#endif /* UNIV_DEBUG */
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -790,7 +1192,7 @@ buf_LRU_check_size_of_non_data_objects(
buf_lru_switched_on_innodb_mon = TRUE;
srv_print_innodb_monitor = TRUE;
- os_event_set(srv_timeout_event);
+ os_event_set(lock_sys->timeout_event);
}
} else if (buf_lru_switched_on_innodb_mon) {
@@ -938,7 +1340,7 @@ loop:
mon_value_was = srv_print_innodb_monitor;
started_monitor = TRUE;
srv_print_innodb_monitor = TRUE;
- os_event_set(srv_timeout_event);
+ os_event_set(lock_sys->timeout_event);
}
/* If we have scanned the whole LRU and still are unable to
@@ -965,7 +1367,7 @@ loop:
++flush_failures;
}
- ++srv_buf_pool_wait_free;
+ srv_stats.buf_pool_wait_free.add(n_iterations, 1);
n_iterations++;
@@ -1107,6 +1509,7 @@ buf_LRU_remove_block(
buf_page_t* bpage) /*!< in: control block */
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ ulint zip_size;
ut_ad(buf_pool);
ut_ad(bpage);
@@ -1142,6 +1545,9 @@ buf_LRU_remove_block(
UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
ut_d(bpage->in_LRU_list = FALSE);
+ zip_size = page_zip_get_size(&bpage->zip);
+ buf_pool->stat.LRU_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+
buf_unzip_LRU_remove_block_if_needed(bpage);
/* If the LRU list is so short that LRU_old is not defined,
@@ -1202,7 +1608,10 @@ buf_unzip_LRU_add_block(
}
/******************************************************************//**
-Adds a block to the LRU list end. */
+Adds a block to the LRU list end. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
UNIV_INLINE
void
buf_LRU_add_block_to_end_low(
@@ -1221,6 +1630,8 @@ buf_LRU_add_block_to_end_low(
UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
ut_d(bpage->in_LRU_list = TRUE);
+ incr_LRU_size_in_bytes(bpage, buf_pool);
+
if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
ut_ad(buf_pool->LRU_old);
@@ -1249,7 +1660,10 @@ buf_LRU_add_block_to_end_low(
}
/******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
UNIV_INLINE
void
buf_LRU_add_block_low(
@@ -1291,6 +1705,8 @@ buf_LRU_add_block_low(
ut_d(bpage->in_LRU_list = TRUE);
+ incr_LRU_size_in_bytes(bpage, buf_pool);
+
if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
ut_ad(buf_pool->LRU_old);
@@ -1318,7 +1734,10 @@ buf_LRU_add_block_low(
}
/******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
UNIV_INTERN
void
buf_LRU_add_block(
@@ -1391,7 +1810,7 @@ buf_LRU_free_block(
bpage->offset);
rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(buf_page_in_file(bpage));
@@ -1540,6 +1959,8 @@ func_exit:
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
prev_b, b);
+ incr_LRU_size_in_bytes(b, buf_pool);
+
if (buf_page_is_old(b)) {
buf_pool->LRU_old_len++;
if (UNIV_UNLIKELY
@@ -1995,24 +2416,28 @@ buf_LRU_free_one_page(
be in a state where it can be freed; there
may or may not be a hash index to the page */
{
-#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-#endif
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ const ulint fold = buf_page_address_fold(bpage->space,
+ bpage->offset);
+ rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
+ ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
ut_ad(buf_pool_mutex_own(buf_pool));
- ut_ad(mutex_own(block_mutex));
+
+ rw_lock_x_lock(hash_lock);
+ mutex_enter(block_mutex);
if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
!= BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
- } else {
- /* The block_mutex should have been released by
- buf_LRU_block_remove_hashed_page() when it returns
- BUF_BLOCK_ZIP_FREE. */
- ut_ad(block_mutex == &buf_pool->zip_mutex);
- mutex_enter(block_mutex);
}
+
+ /* buf_LRU_block_remove_hashed_page() releases hash_lock and block_mutex */
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
+ && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!mutex_own(block_mutex));
}
/**********************************************************************//**
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 227cb083725..3a579e251ff 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -61,7 +61,7 @@ buf_read_page_handle_error(
buf_page_t* bpage) /*!< in: pointer to the block */
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- const ibool uncompressed = (buf_page_get_state(bpage)
+ const bool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
/* First unfix and release lock on the bpage */
@@ -79,13 +79,14 @@ buf_read_page_handle_error(
BUF_IO_READ);
}
+ mutex_exit(buf_page_get_mutex(bpage));
+
/* remove the block from LRU list */
buf_LRU_free_one_page(bpage);
ut_ad(buf_pool->n_pend_reads > 0);
buf_pool->n_pend_reads--;
- mutex_exit(buf_page_get_mutex(bpage));
buf_pool_mutex_exit(buf_pool);
}
@@ -103,7 +104,7 @@ static
ulint
buf_read_page_low(
/*==============*/
- ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
+ dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
trying to read from a non-existent tablespace, or a
tablespace which is just now being dropped */
ibool sync, /*!< in: TRUE if synchronous aio is desired */
@@ -192,13 +193,9 @@ buf_read_page_low(
}
thd_wait_end(NULL);
- if (*err == DB_TABLESPACE_DELETED) {
- buf_read_page_handle_error(bpage);
- return(0);
- }
-
if (*err != DB_SUCCESS) {
- if (ignore_nonexistent_pages) {
+ if (ignore_nonexistent_pages || *err == DB_TABLESPACE_DELETED) {
+ buf_read_page_handle_error(bpage);
return(0);
}
/* else */
@@ -248,7 +245,7 @@ buf_read_ahead_random(
ulint ibuf_mode;
ulint count;
ulint low, high;
- ulint err;
+ dberr_t err;
ulint i;
const ulint buf_read_ahead_random_area
= BUF_READ_AHEAD_AREA(buf_pool);
@@ -377,7 +374,7 @@ read_ahead:
buf_LRU_stat_inc_io();
buf_pool->stat.n_ra_pages_read_rnd += count;
- srv_buf_pool_reads += count;
+ srv_stats.buf_pool_reads.add(count);
return(count);
}
@@ -397,7 +394,7 @@ buf_read_page(
{
ib_int64_t tablespace_version;
ulint count;
- ulint err;
+ dberr_t err;
tablespace_version = fil_space_get_version(space);
@@ -407,7 +404,7 @@ buf_read_page(
count = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
zip_size, FALSE,
tablespace_version, offset);
- srv_buf_pool_reads += count;
+ srv_stats.buf_pool_reads.add(count);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -440,7 +437,7 @@ buf_read_page_async(
ulint zip_size;
ib_int64_t tablespace_version;
ulint count;
- ulint err;
+ dberr_t err;
zip_size = fil_space_get_zip_size(space);
@@ -455,7 +452,7 @@ buf_read_page_async(
| BUF_READ_IGNORE_NONEXISTENT_PAGES,
space, zip_size, FALSE,
tablespace_version, offset);
- srv_buf_pool_reads += count;
+ srv_stats.buf_pool_reads.add(count);
/* We do not increment number of I/O operations used for LRU policy
here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
@@ -513,7 +510,7 @@ buf_read_ahead_linear(
ulint fail_count;
ulint ibuf_mode;
ulint low, high;
- ulint err;
+ dberr_t err;
ulint i;
const ulint buf_read_ahead_linear_area
= BUF_READ_AHEAD_AREA(buf_pool);
@@ -784,7 +781,7 @@ buf_read_ibuf_merge_pages(
#endif
for (i = 0; i < n_stored; i++) {
- ulint err;
+ dberr_t err;
buf_pool_t* buf_pool;
ulint zip_size = fil_space_get_zip_size(space_ids[i]);
@@ -850,7 +847,7 @@ buf_read_recv_pages(
{
ib_int64_t tablespace_version;
ulint count;
- ulint err;
+ dberr_t err;
ulint i;
zip_size = fil_space_get_zip_size(space);
diff --git a/storage/innobase/dict/dict0boot.cc b/storage/innobase/dict/dict0boot.cc
index 8e305364ac8..eea10759fcd 100644
--- a/storage/innobase/dict/dict0boot.cc
+++ b/storage/innobase/dict/dict0boot.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -241,9 +241,10 @@ dict_hdr_create(
/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
-started. This function is also called when the data dictionary is created. */
+started. This function is also called when the data dictionary is created.
+@return DB_SUCCESS or error code. */
UNIV_INTERN
-void
+dberr_t
dict_boot(void)
/*===========*/
{
@@ -252,7 +253,7 @@ dict_boot(void)
dict_hdr_t* dict_hdr;
mem_heap_t* heap;
mtr_t mtr;
- ulint error;
+ dberr_t error;
/* Be sure these constants do not ever change. To avoid bloat,
only check the *NUM_FIELDS* in each table */
@@ -307,9 +308,7 @@ dict_boot(void)
dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
/* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
- /* If the format is UNIV_FORMAT_A, table->flags == 0, and
- TYPE == 1, which is defined as SYS_TABLE_TYPE_ANTELOPE.
- The low order bit of TYPE is always set to 1. If the format
+ /* The low order bit of TYPE is always set to 1. If the format
is UNIV_FORMAT_B or higher, this field matches table->flags. */
dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
@@ -454,14 +453,27 @@ dict_boot(void)
ibuf_init_at_db_start();
- /* Load definitions of other indexes on system tables */
+ dberr_t err = DB_SUCCESS;
+
+ if (srv_read_only_mode && !ibuf_is_empty()) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Change buffer must be empty when --innodb-read-only "
+ "is set!");
- dict_load_sys_table(dict_sys->sys_tables);
- dict_load_sys_table(dict_sys->sys_columns);
- dict_load_sys_table(dict_sys->sys_indexes);
- dict_load_sys_table(dict_sys->sys_fields);
+ err = DB_ERROR;
+ } else {
+ /* Load definitions of other indexes on system tables */
+
+ dict_load_sys_table(dict_sys->sys_tables);
+ dict_load_sys_table(dict_sys->sys_columns);
+ dict_load_sys_table(dict_sys->sys_indexes);
+ dict_load_sys_table(dict_sys->sys_fields);
+ }
mutex_exit(&(dict_sys->mutex));
+
+ return(err);
}
/*****************************************************************//**
@@ -476,9 +488,10 @@ dict_insert_initial_data(void)
}
/*****************************************************************//**
-Creates and initializes the data dictionary at the database creation. */
+Creates and initializes the data dictionary at the server bootstrap.
+@return DB_SUCCESS or error code. */
UNIV_INTERN
-void
+dberr_t
dict_create(void)
/*=============*/
{
@@ -490,7 +503,11 @@ dict_create(void)
mtr_commit(&mtr);
- dict_boot();
+ dberr_t err = dict_boot();
+
+ if (err == DB_SUCCESS) {
+ dict_insert_initial_data();
+ }
- dict_insert_initial_data();
+ return(err);
}
diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc
index d58b304ab92..864150b324a 100644
--- a/storage/innobase/dict/dict0crea.cc
+++ b/storage/innobase/dict/dict0crea.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -43,6 +43,7 @@ Created 1/8/1996 Heikki Tuuri
#include "usr0sess.h"
#include "ut0vec.h"
#include "dict0priv.h"
+#include "fts0priv.h"
/*****************************************************************//**
Based on a table object, this function builds the entry to be inserted
@@ -244,8 +245,8 @@ dict_create_sys_columns_tuple(
/***************************************************************//**
Builds a table definition to insert.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
dict_build_table_def_step(
/*======================*/
que_thr_t* thr, /*!< in: query thread */
@@ -253,9 +254,8 @@ dict_build_table_def_step(
{
dict_table_t* table;
dtuple_t* row;
- ulint error;
- const char* path_or_name;
- ibool is_path;
+ dberr_t error;
+ const char* path;
mtr_t mtr;
ulint space = 0;
bool use_tablespace;
@@ -263,7 +263,7 @@ dict_build_table_def_step(
ut_ad(mutex_own(&(dict_sys->mutex)));
table = node->table;
- use_tablespace = !!(table->flags2 & DICT_TF2_USE_TABLESPACE);
+ use_tablespace = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE);
dict_hdr_get_new_id(&table->id, NULL, NULL);
@@ -274,6 +274,11 @@ dict_build_table_def_step(
Get a new space id. */
dict_hdr_get_new_id(NULL, NULL, &space);
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_out_of_space_ids",
+ space = ULINT_UNDEFINED;
+ );
+
if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) {
return(DB_ERROR);
}
@@ -286,26 +291,19 @@ dict_build_table_def_step(
- page 3 will contain the root of the clustered index of the
table we create here. */
- if (table->dir_path_of_temp_table) {
- /* We place tables created with CREATE TEMPORARY
- TABLE in the tmp dir of mysqld server */
-
- path_or_name = table->dir_path_of_temp_table;
- is_path = TRUE;
- } else {
- path_or_name = table->name;
- is_path = FALSE;
- }
+ path = table->data_dir_path ? table->data_dir_path
+ : table->dir_path_of_temp_table;
ut_ad(dict_table_get_format(table) <= UNIV_FORMAT_MAX);
ut_ad(!dict_table_zip_size(table)
|| dict_table_get_format(table) >= UNIV_FORMAT_B);
error = fil_create_new_single_table_tablespace(
- space, path_or_name, is_path,
+ space, table->name, path,
dict_tf_to_fsp_flags(table->flags),
table->flags2,
FIL_IBD_FILE_INITIAL_SIZE);
+
table->space = (unsigned int) space;
if (error != DB_SUCCESS) {
@@ -333,10 +331,9 @@ dict_build_table_def_step(
}
/***************************************************************//**
-Builds a column definition to insert.
-@return DB_SUCCESS */
+Builds a column definition to insert. */
static
-ulint
+void
dict_build_col_def_step(
/*====================*/
tab_node_t* node) /*!< in: table create node */
@@ -346,8 +343,6 @@ dict_build_col_def_step(
row = dict_create_sys_columns_tuple(node->table, node->col_no,
node->heap);
ins_node_set_new_row(node->col_def, row);
-
- return(DB_SUCCESS);
}
/*****************************************************************//**
@@ -571,8 +566,8 @@ dict_create_search_tuple(
/***************************************************************//**
Builds an index definition row to insert.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
dict_build_index_def_step(
/*======================*/
que_thr_t* thr, /*!< in: query thread */
@@ -595,7 +590,10 @@ dict_build_index_def_step(
return(DB_TABLE_NOT_FOUND);
}
- trx->table_id = table->id;
+ if (!trx->table_id) {
+ /* Record only the first table id. */
+ trx->table_id = table->id;
+ }
node->table = table;
@@ -616,15 +614,16 @@ dict_build_index_def_step(
/* Note that the index was created by this transaction. */
index->trx_id = trx->id;
+ ut_ad(table->def_trx_id <= trx->id);
+ table->def_trx_id = trx->id;
return(DB_SUCCESS);
}
/***************************************************************//**
-Builds a field definition row to insert.
-@return DB_SUCCESS */
+Builds a field definition row to insert. */
static
-ulint
+void
dict_build_field_def_step(
/*======================*/
ind_node_t* node) /*!< in: index create node */
@@ -637,15 +636,13 @@ dict_build_field_def_step(
row = dict_create_sys_fields_tuple(index, node->field_no, node->heap);
ins_node_set_new_row(node->field_def, row);
-
- return(DB_SUCCESS);
}
/***************************************************************//**
Creates an index tree for the index if it is not a member of a cluster.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
dict_create_index_tree_step(
/*========================*/
ind_node_t* node) /*!< in: index create node */
@@ -653,7 +650,6 @@ dict_create_index_tree_step(
dict_index_t* index;
dict_table_t* sys_indexes;
dtuple_t* search_tuple;
- ulint zip_size;
btr_pcur_t pcur;
mtr_t mtr;
@@ -682,25 +678,37 @@ dict_create_index_tree_step(
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- zip_size = dict_table_zip_size(index->table);
- node->page_no = btr_create(index->type, index->space, zip_size,
- index->id, index, &mtr);
- /* printf("Created a new index tree in space %lu root page %lu\n",
- index->space, node->page_no); */
+ dberr_t err = DB_SUCCESS;
+ ulint zip_size = dict_table_zip_size(index->table);
- page_rec_write_field(btr_pcur_get_rec(&pcur),
- DICT_FLD__SYS_INDEXES__PAGE_NO,
- node->page_no, &mtr);
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
+ if (node->index->table->ibd_file_missing
+ || dict_table_is_discarded(node->index->table)) {
+
+ node->page_no = FIL_NULL;
+ } else {
+ node->page_no = btr_create(
+ index->type, index->space, zip_size,
+ index->id, index, &mtr);
- if (node->page_no == FIL_NULL) {
+ if (node->page_no == FIL_NULL) {
+ err = DB_OUT_OF_FILE_SPACE;
+ }
- return(DB_OUT_OF_FILE_SPACE);
+ DBUG_EXECUTE_IF("ib_import_create_index_failure_1",
+ node->page_no = FIL_NULL;
+ err = DB_OUT_OF_FILE_SPACE; );
}
- return(DB_SUCCESS);
+ page_rec_write_field(
+ btr_pcur_get_rec(&pcur), DICT_FLD__SYS_INDEXES__PAGE_NO,
+ node->page_no, &mtr);
+
+ btr_pcur_close(&pcur);
+
+ mtr_commit(&mtr);
+
+ return(err);
}
/*******************************************************************//**
@@ -883,7 +891,7 @@ create:
for (index = UT_LIST_GET_FIRST(table->indexes);
index;
index = UT_LIST_GET_NEXT(indexes, index)) {
- if (index->id == index_id) {
+ if (index->id == index_id && !(index->type & DICT_FTS)) {
root_page_no = btr_create(type, space, zip_size,
index_id, index, mtr);
index->page = (unsigned int) root_page_no;
@@ -910,7 +918,9 @@ tab_create_graph_create(
/*====================*/
dict_table_t* table, /*!< in: table to create, built as a memory data
structure */
- mem_heap_t* heap) /*!< in: heap where created */
+ mem_heap_t* heap, /*!< in: heap where created */
+ bool commit) /*!< in: true if the commit node should be
+ added to the query graph */
{
tab_node_t* node;
@@ -932,8 +942,12 @@ tab_create_graph_create(
heap);
node->col_def->common.parent = node;
- node->commit_node = trx_commit_node_create(heap);
- node->commit_node->common.parent = node;
+ if (commit) {
+ node->commit_node = trx_commit_node_create(heap);
+ node->commit_node->common.parent = node;
+ } else {
+ node->commit_node = 0;
+ }
return(node);
}
@@ -947,7 +961,9 @@ ind_create_graph_create(
/*====================*/
dict_index_t* index, /*!< in: index to create, built as a memory data
structure */
- mem_heap_t* heap) /*!< in: heap where created */
+ mem_heap_t* heap, /*!< in: heap where created */
+ bool commit) /*!< in: true if the commit node should be
+ added to the query graph */
{
ind_node_t* node;
@@ -970,8 +986,12 @@ ind_create_graph_create(
dict_sys->sys_fields, heap);
node->field_def->common.parent = node;
- node->commit_node = trx_commit_node_create(heap);
- node->commit_node->common.parent = node;
+ if (commit) {
+ node->commit_node = trx_commit_node_create(heap);
+ node->commit_node->common.parent = node;
+ } else {
+ node->commit_node = 0;
+ }
return(node);
}
@@ -986,7 +1006,7 @@ dict_create_table_step(
que_thr_t* thr) /*!< in: query thread */
{
tab_node_t* node;
- ulint err = DB_ERROR;
+ dberr_t err = DB_ERROR;
trx_t* trx;
ut_ad(thr);
@@ -1025,12 +1045,7 @@ dict_create_table_step(
if (node->col_no < (node->table)->n_def) {
- err = dict_build_col_def_step(node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
+ dict_build_col_def_step(node);
node->col_no++;
@@ -1063,7 +1078,7 @@ dict_create_table_step(
}
function_exit:
- trx->error_state = (enum db_err) err;
+ trx->error_state = err;
if (err == DB_SUCCESS) {
/* Ok: do nothing */
@@ -1093,7 +1108,7 @@ dict_create_index_step(
que_thr_t* thr) /*!< in: query thread */
{
ind_node_t* node;
- ulint err = DB_ERROR;
+ dberr_t err = DB_ERROR;
trx_t* trx;
ut_ad(thr);
@@ -1130,12 +1145,7 @@ dict_create_index_step(
if (node->field_no < (node->index)->n_fields) {
- err = dict_build_field_def_step(node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
+ dict_build_field_def_step(node);
node->field_no++;
@@ -1172,7 +1182,37 @@ dict_create_index_step(
err = dict_create_index_tree_step(node);
+ DBUG_EXECUTE_IF("ib_dict_create_index_tree_fail",
+ err = DB_OUT_OF_MEMORY;);
+
if (err != DB_SUCCESS) {
+ /* If this is a FTS index, we will need to remove
+ it from fts->cache->indexes list as well */
+ if ((node->index->type & DICT_FTS)
+ && node->table->fts) {
+ fts_index_cache_t* index_cache;
+
+ rw_lock_x_lock(
+ &node->table->fts->cache->init_lock);
+
+ index_cache = (fts_index_cache_t*)
+ fts_find_index_cache(
+ node->table->fts->cache,
+ node->index);
+
+ if (index_cache->words) {
+ rbt_free(index_cache->words);
+ index_cache->words = 0;
+ }
+
+ ib_vector_remove(
+ node->table->fts->cache->indexes,
+ *reinterpret_cast<void**>(index_cache));
+
+ rw_lock_x_unlock(
+ &node->table->fts->cache->init_lock);
+ }
+
dict_index_remove_from_cache(node->table, node->index);
node->index = NULL;
@@ -1180,6 +1220,11 @@ dict_create_index_step(
}
node->index->page = node->page_no;
+ /* These should have been set in
+ dict_build_index_def_step() and
+ dict_index_add_to_cache(). */
+ ut_ad(node->index->trx_id == trx->id);
+ ut_ad(node->index->table->def_trx_id == trx->id);
node->state = INDEX_COMMIT_WORK;
}
@@ -1197,7 +1242,7 @@ dict_create_index_step(
}
function_exit:
- trx->error_state = static_cast<enum db_err>(err);
+ trx->error_state = err;
if (err == DB_SUCCESS) {
/* Ok: do nothing */
@@ -1217,93 +1262,107 @@ function_exit:
}
/****************************************************************//**
-Check whether the system foreign key tables exist. Additionally, If
-they exist then move them to non-LRU end of the table LRU list.
-@return TRUE if they exist. */
+Check whether a system table exists. Additionally, if it exists,
+move it to the non-LRU end of the table LRU list. This is oly used
+for system tables that can be upgraded or added to an older database,
+which include SYS_FOREIGN, SYS_FOREIGN_COLS, SYS_TABLESPACES and
+SYS_DATAFILES.
+@return DB_SUCCESS if the sys table exists, DB_CORRUPTION if it exists
+but is not current, DB_TABLE_NOT_FOUND if it does not exist*/
static
-ibool
-dict_check_sys_foreign_tables_exist(void)
-/*=====================================*/
+dberr_t
+dict_check_if_system_table_exists(
+/*==============================*/
+ const char* tablename, /*!< in: name of table */
+ ulint num_fields, /*!< in: number of fields */
+ ulint num_indexes) /*!< in: number of indexes */
{
- dict_table_t* sys_foreign;
- ibool exists = FALSE;
- dict_table_t* sys_foreign_cols;
+ dict_table_t* sys_table;
+ dberr_t error = DB_SUCCESS;
ut_a(srv_get_active_thread_type() == SRV_NONE);
mutex_enter(&dict_sys->mutex);
- sys_foreign = dict_table_get_low("SYS_FOREIGN");
- sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS");
+ sys_table = dict_table_get_low(tablename);
- if (sys_foreign != NULL
- && sys_foreign_cols != NULL
- && UT_LIST_GET_LEN(sys_foreign->indexes) == 3
- && UT_LIST_GET_LEN(sys_foreign_cols->indexes) == 1) {
+ if (sys_table == NULL) {
+ error = DB_TABLE_NOT_FOUND;
- /* Foreign constraint system tables have already been
- created, and they are ok. Ensure that they can't be
- evicted from the table LRU cache. */
+ } else if (UT_LIST_GET_LEN(sys_table->indexes) != num_indexes
+ || sys_table->n_cols != num_fields) {
+ error = DB_CORRUPTION;
- dict_table_move_from_lru_to_non_lru(sys_foreign);
- dict_table_move_from_lru_to_non_lru(sys_foreign_cols);
+ } else {
+ /* This table has already been created, and it is OK.
+ Ensure that it can't be evicted from the table LRU cache. */
- exists = TRUE;
+ dict_table_move_from_lru_to_non_lru(sys_table);
}
mutex_exit(&dict_sys->mutex);
- return(exists);
+ return(error);
}
/****************************************************************//**
Creates the foreign key constraints system tables inside InnoDB
-at database creation or database start if they are not found or are
+at server bootstrap or server start if they are not found or are
not of the right form.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
dict_create_or_check_foreign_constraint_tables(void)
/*================================================*/
{
trx_t* trx;
- ulint error;
- ibool success;
- ibool srv_file_per_table_backup;
+ my_bool srv_file_per_table_backup;
+ dberr_t err;
+ dberr_t sys_foreign_err;
+ dberr_t sys_foreign_cols_err;
ut_a(srv_get_active_thread_type() == SRV_NONE);
/* Note: The master thread has not been started at this point. */
- if (dict_check_sys_foreign_tables_exist()) {
+
+ sys_foreign_err = dict_check_if_system_table_exists(
+ "SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
+ sys_foreign_cols_err = dict_check_if_system_table_exists(
+ "SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
+
+ if (sys_foreign_err == DB_SUCCESS
+ && sys_foreign_cols_err == DB_SUCCESS) {
return(DB_SUCCESS);
}
trx = trx_allocate_for_mysql();
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
trx->op_info = "creating foreign key sys tables";
row_mysql_lock_data_dictionary(trx);
/* Check which incomplete table definition to drop. */
- if (dict_table_get_low("SYS_FOREIGN") != NULL) {
- fprintf(stderr,
- "InnoDB: dropping incompletely created"
- " SYS_FOREIGN table\n");
+ if (sys_foreign_err == DB_CORRUPTION) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Dropping incompletely created "
+ "SYS_FOREIGN table.");
row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE);
}
- if (dict_table_get_low("SYS_FOREIGN_COLS") != NULL) {
- fprintf(stderr,
- "InnoDB: dropping incompletely created"
- " SYS_FOREIGN_COLS table\n");
+ if (sys_foreign_cols_err == DB_CORRUPTION) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Dropping incompletely created "
+ "SYS_FOREIGN_COLS table.");
row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE);
}
- fprintf(stderr,
- "InnoDB: Creating foreign key constraint system tables\n");
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Creating foreign key constraint system tables.");
/* NOTE: in dict_load_foreigns we use the fact that
there are 2 secondary indexes on SYS_FOREIGN, and they
@@ -1315,50 +1374,50 @@ dict_create_or_check_foreign_constraint_tables(void)
VARBINARY, like in other InnoDB system tables, to get a clean
design. */
- srv_file_per_table_backup = (ibool) srv_file_per_table;
+ srv_file_per_table_backup = srv_file_per_table;
/* We always want SYSTEM tables to be created inside the system
tablespace. */
srv_file_per_table = 0;
- error = que_eval_sql(NULL,
- "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n"
- "BEGIN\n"
- "CREATE TABLE\n"
- "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR,"
- " REF_NAME CHAR, N_COLS INT);\n"
- "CREATE UNIQUE CLUSTERED INDEX ID_IND"
- " ON SYS_FOREIGN (ID);\n"
- "CREATE INDEX FOR_IND"
- " ON SYS_FOREIGN (FOR_NAME);\n"
- "CREATE INDEX REF_IND"
- " ON SYS_FOREIGN (REF_NAME);\n"
- "CREATE TABLE\n"
- "SYS_FOREIGN_COLS(ID CHAR, POS INT,"
- " FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
- "CREATE UNIQUE CLUSTERED INDEX ID_IND"
- " ON SYS_FOREIGN_COLS (ID, POS);\n"
- "END;\n"
- , FALSE, trx);
-
- if (error != DB_SUCCESS) {
- fprintf(stderr, "InnoDB: error %lu in creation\n",
- (ulong) error);
-
- ut_a(error == DB_OUT_OF_FILE_SPACE
- || error == DB_TOO_MANY_CONCURRENT_TRXS);
-
- fprintf(stderr,
- "InnoDB: creation failed\n"
- "InnoDB: tablespace is full\n"
- "InnoDB: dropping incompletely created"
- " SYS_FOREIGN tables\n");
+ err = que_eval_sql(
+ NULL,
+ "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n"
+ "BEGIN\n"
+ "CREATE TABLE\n"
+ "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR,"
+ " REF_NAME CHAR, N_COLS INT);\n"
+ "CREATE UNIQUE CLUSTERED INDEX ID_IND"
+ " ON SYS_FOREIGN (ID);\n"
+ "CREATE INDEX FOR_IND"
+ " ON SYS_FOREIGN (FOR_NAME);\n"
+ "CREATE INDEX REF_IND"
+ " ON SYS_FOREIGN (REF_NAME);\n"
+ "CREATE TABLE\n"
+ "SYS_FOREIGN_COLS(ID CHAR, POS INT,"
+ " FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
+ "CREATE UNIQUE CLUSTERED INDEX ID_IND"
+ " ON SYS_FOREIGN_COLS (ID, POS);\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (err != DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Creation of SYS_FOREIGN and SYS_FOREIGN_COLS "
+ "has failed with error %lu. Tablespace is full. "
+ "Dropping incompletely created tables.",
+ (ulong) err);
+
+ ut_ad(err == DB_OUT_OF_FILE_SPACE
+ || err == DB_TOO_MANY_CONCURRENT_TRXS);
row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE);
row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE);
- error = DB_MUST_GET_MORE_FILE_SPACE;
+ if (err == DB_OUT_OF_FILE_SPACE) {
+ err = DB_MUST_GET_MORE_FILE_SPACE;
+ }
}
trx_commit_for_mysql(trx);
@@ -1367,28 +1426,31 @@ dict_create_or_check_foreign_constraint_tables(void)
trx_free_for_mysql(trx);
- if (error == DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Foreign key constraint system tables"
- " created\n");
+ srv_file_per_table = srv_file_per_table_backup;
+
+ if (err == DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Foreign key constraint system tables created");
}
/* Note: The master thread has not been started at this point. */
/* Confirm and move to the non-LRU part of the table LRU list. */
+ sys_foreign_err = dict_check_if_system_table_exists(
+ "SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
+ ut_a(sys_foreign_err == DB_SUCCESS);
- success = dict_check_sys_foreign_tables_exist();
- ut_a(success);
-
- srv_file_per_table = (my_bool) srv_file_per_table_backup;
+ sys_foreign_cols_err = dict_check_if_system_table_exists(
+ "SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
+ ut_a(sys_foreign_cols_err == DB_SUCCESS);
- return(error);
+ return(err);
}
/****************************************************************//**
Evaluate the given foreign key SQL statement.
@return error code or DB_SUCCESS */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
dict_foreign_eval_sql(
/*==================*/
pars_info_t* info, /*!< in: info struct, or NULL */
@@ -1397,8 +1459,8 @@ dict_foreign_eval_sql(
dict_foreign_t* foreign,/*!< in: foreign */
trx_t* trx) /*!< in: transaction */
{
- ulint error;
- FILE* ef = dict_foreign_err_file;
+ dberr_t error;
+ FILE* ef = dict_foreign_err_file;
error = que_eval_sql(info, sql, FALSE, trx);
@@ -1453,8 +1515,8 @@ dict_foreign_eval_sql(
Add a single foreign key field definition to the data dictionary tables in
the database.
@return error code or DB_SUCCESS */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
dict_create_add_foreign_field_to_dictionary(
/*========================================*/
ulint field_nr, /*!< in: foreign field number */
@@ -1492,17 +1554,17 @@ databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and
are given locally for this table, that is, the number is not global, as in
the old format constraints < 4.0.18 it used to be.
@return error code or DB_SUCCESS */
-static
-ulint
+UNIV_INTERN
+dberr_t
dict_create_add_foreign_to_dictionary(
/*==================================*/
ulint* id_nr, /*!< in/out: number to use in id generation;
incremented if used */
dict_table_t* table, /*!< in: table */
dict_foreign_t* foreign,/*!< in: foreign */
- trx_t* trx) /*!< in: transaction */
+ trx_t* trx) /*!< in/out: dictionary transaction */
{
- ulint error;
+ dberr_t error;
ulint i;
pars_info_t* info = pars_info_create();
@@ -1553,12 +1615,6 @@ dict_create_add_foreign_to_dictionary(
}
}
- trx->op_info = "committing foreign key definitions";
-
- trx_commit(trx);
-
- trx->op_info = "";
-
return(error);
}
@@ -1566,7 +1622,7 @@ dict_create_add_foreign_to_dictionary(
Adds foreign key definitions to data dictionary tables in the database.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
dict_create_add_foreigns_to_dictionary(
/*===================================*/
ulint start_id,/*!< in: if we are actually doing ALTER TABLE
@@ -1582,7 +1638,7 @@ dict_create_add_foreigns_to_dictionary(
{
dict_foreign_t* foreign;
ulint number = start_id + 1;
- ulint error;
+ dberr_t error;
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -1607,5 +1663,188 @@ dict_create_add_foreigns_to_dictionary(
}
}
+ trx->op_info = "committing foreign key definitions";
+
+ trx_commit(trx);
+
+ trx->op_info = "";
+
return(DB_SUCCESS);
}
+
+/****************************************************************//**
+Creates the tablespaces and datafiles system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_create_or_check_sys_tablespace(void)
+/*=====================================*/
+{
+ trx_t* trx;
+ my_bool srv_file_per_table_backup;
+ dberr_t err;
+ dberr_t sys_tablespaces_err;
+ dberr_t sys_datafiles_err;
+
+ ut_a(srv_get_active_thread_type() == SRV_NONE);
+
+ /* Note: The master thread has not been started at this point. */
+
+ sys_tablespaces_err = dict_check_if_system_table_exists(
+ "SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
+ sys_datafiles_err = dict_check_if_system_table_exists(
+ "SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
+
+ if (sys_tablespaces_err == DB_SUCCESS
+ && sys_datafiles_err == DB_SUCCESS) {
+ return(DB_SUCCESS);
+ }
+
+ trx = trx_allocate_for_mysql();
+
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+ trx->op_info = "creating tablepace and datafile sys tables";
+
+ row_mysql_lock_data_dictionary(trx);
+
+ /* Check which incomplete table definition to drop. */
+
+ if (sys_tablespaces_err == DB_CORRUPTION) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Dropping incompletely created "
+ "SYS_TABLESPACES table.");
+ row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE);
+ }
+
+ if (sys_datafiles_err == DB_CORRUPTION) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Dropping incompletely created "
+ "SYS_DATAFILES table.");
+
+ row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE);
+ }
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Creating tablespace and datafile system tables.");
+
+ /* We always want SYSTEM tables to be created inside the system
+ tablespace. */
+ srv_file_per_table_backup = srv_file_per_table;
+ srv_file_per_table = 0;
+
+ err = que_eval_sql(
+ NULL,
+ "PROCEDURE CREATE_SYS_TABLESPACE_PROC () IS\n"
+ "BEGIN\n"
+ "CREATE TABLE SYS_TABLESPACES(\n"
+ " SPACE INT, NAME CHAR, FLAGS INT);\n"
+ "CREATE UNIQUE CLUSTERED INDEX SYS_TABLESPACES_SPACE"
+ " ON SYS_TABLESPACES (SPACE);\n"
+ "CREATE TABLE SYS_DATAFILES(\n"
+ " SPACE INT, PATH CHAR);\n"
+ "CREATE UNIQUE CLUSTERED INDEX SYS_DATAFILES_SPACE"
+ " ON SYS_DATAFILES (SPACE);\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (err != DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Creation of SYS_TABLESPACES and SYS_DATAFILES "
+ "has failed with error %lu. Tablespace is full. "
+ "Dropping incompletely created tables.",
+ (ulong) err);
+
+ ut_a(err == DB_OUT_OF_FILE_SPACE
+ || err == DB_TOO_MANY_CONCURRENT_TRXS);
+
+ row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE);
+ row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE);
+
+ if (err == DB_OUT_OF_FILE_SPACE) {
+ err = DB_MUST_GET_MORE_FILE_SPACE;
+ }
+ }
+
+ trx_commit_for_mysql(trx);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx_free_for_mysql(trx);
+
+ srv_file_per_table = srv_file_per_table_backup;
+
+ if (err == DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Tablespace and datafile system tables created.");
+ }
+
+ /* Note: The master thread has not been started at this point. */
+ /* Confirm and move to the non-LRU part of the table LRU list. */
+
+ sys_tablespaces_err = dict_check_if_system_table_exists(
+ "SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
+ ut_a(sys_tablespaces_err == DB_SUCCESS);
+
+ sys_datafiles_err = dict_check_if_system_table_exists(
+ "SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
+ ut_a(sys_datafiles_err == DB_SUCCESS);
+
+ return(err);
+}
+
+/********************************************************************//**
+Add a single tablespace definition to the data dictionary tables in the
+database.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_tablespace_to_dictionary(
+/*=====================================*/
+ ulint space, /*!< in: tablespace id */
+ const char* name, /*!< in: tablespace name */
+ ulint flags, /*!< in: tablespace flags */
+ const char* path, /*!< in: tablespace path */
+ trx_t* trx, /*!< in/out: transaction */
+ bool commit) /*!< in: if true then commit the
+ transaction */
+{
+ dberr_t error;
+
+ pars_info_t* info = pars_info_create();
+
+ ut_a(space > TRX_SYS_SPACE);
+
+ pars_info_add_int4_literal(info, "space", space);
+
+ pars_info_add_str_literal(info, "name", name);
+
+ pars_info_add_int4_literal(info, "flags", flags);
+
+ pars_info_add_str_literal(info, "path", path);
+
+ error = que_eval_sql(info,
+ "PROCEDURE P () IS\n"
+ "BEGIN\n"
+ "INSERT INTO SYS_TABLESPACES VALUES"
+ "(:space, :name, :flags);\n"
+ "INSERT INTO SYS_DATAFILES VALUES"
+ "(:space, :path);\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (error != DB_SUCCESS) {
+ return(error);
+ }
+
+ if (commit) {
+ trx->op_info = "committing tablespace and datafile definition";
+ trx_commit(trx);
+ }
+
+ trx->op_info = "";
+
+ return(error);
+}
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 8282dafda0c..8e111645880 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,6 +26,7 @@ Created 1/8/1996 Heikki Tuuri
#include "dict0dict.h"
#include "fts0fts.h"
+#include "fil0fil.h"
#ifdef UNIV_NONINL
#include "dict0dict.ic"
@@ -56,7 +58,6 @@ UNIV_INTERN dict_index_t* dict_ind_compact;
#include "rem0cmp.h"
#include "fts0fts.h"
#include "fts0types.h"
-#include "row0merge.h"
#include "m_ctype.h" /* my_isspace() */
#include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str() */
#include "srv0mon.h"
@@ -64,6 +65,14 @@ UNIV_INTERN dict_index_t* dict_ind_compact;
#include "lock0lock.h"
#include "dict0priv.h"
#include "row0upd.h"
+#include "row0mysql.h"
+#include "row0merge.h"
+#include "row0log.h"
+#include "ut0ut.h" /* ut_format_name() */
+#include "m_string.h"
+#include "my_sys.h"
+#include "mysqld.h" /* system_charset_info */
+#include "strfunc.h" /* strconvert() */
#include <ctype.h>
@@ -77,17 +86,27 @@ backround operations purge, rollback, foreign key checks reserve this
in S-mode; we cannot trust that MySQL protects implicit or background
operations a table drop since MySQL does not know of them; therefore
we need this; NOTE: a transaction which reserves this must keep book
-on the mode in trx_struct::dict_operation_lock_mode */
+on the mode in trx_t::dict_operation_lock_mode */
UNIV_INTERN rw_lock_t dict_operation_lock;
+/** Percentage of compression failures that are allowed in a single
+round */
+UNIV_INTERN ulong zip_failure_threshold_pct = 5;
+
+/** Maximum percentage of a page that can be allowed as a pad to avoid
+compression failures */
+UNIV_INTERN ulong zip_pad_max = 50;
+
/* Keys to register rwlocks and mutexes with performance schema */
#ifdef UNIV_PFS_RWLOCK
UNIV_INTERN mysql_pfs_key_t dict_operation_lock_key;
UNIV_INTERN mysql_pfs_key_t index_tree_rw_lock_key;
+UNIV_INTERN mysql_pfs_key_t index_online_log_key;
UNIV_INTERN mysql_pfs_key_t dict_table_stats_latch_key;
#endif /* UNIV_PFS_RWLOCK */
#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t zip_pad_mutex_key;
UNIV_INTERN mysql_pfs_key_t dict_sys_mutex_key;
UNIV_INTERN mysql_pfs_key_t dict_foreign_err_mutex_key;
#endif /* UNIV_PFS_MUTEX */
@@ -157,13 +176,6 @@ dict_index_build_internal_fts(
dict_table_t* table, /*!< in: table */
dict_index_t* index); /*!< in: user representation of an FTS index */
/**********************************************************************//**
-Removes a foreign constraint struct from the dictionary cache. */
-static
-void
-dict_foreign_remove_from_cache(
-/*===========================*/
- dict_foreign_t* foreign); /*!< in, own: foreign constraint */
-/**********************************************************************//**
Prints a column data. */
static
void
@@ -185,14 +197,6 @@ void
dict_field_print_low(
/*=================*/
const dict_field_t* field); /*!< in: field */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Frees a foreign key struct. */
-static
-void
-dict_foreign_free(
-/*==============*/
- dict_foreign_t* foreign); /*!< in, own: foreign key struct */
/**********************************************************************//**
Removes an index from the dictionary cache. */
@@ -216,14 +220,14 @@ dict_table_remove_from_cache_low(
/**********************************************************************//**
Validate the dictionary table LRU list.
@return TRUE if validate OK */
-UNIV_INTERN
+static
ibool
dict_lru_validate(void);
/*===================*/
/**********************************************************************//**
Check if table is in the dictionary table LRU list.
@return TRUE if table found */
-UNIV_INTERN
+static
ibool
dict_lru_find_table(
/*================*/
@@ -239,11 +243,11 @@ dict_non_lru_find_table(
#endif /* UNIV_DEBUG */
/* Stream for storing detailed information about the latest foreign key
-and unique key errors */
+and unique key errors. Only created if !srv_read_only_mode */
UNIV_INTERN FILE* dict_foreign_err_file = NULL;
/* mutex protecting the foreign and unique error buffers */
-UNIV_INTERN mutex_t dict_foreign_err_mutex;
-#endif /* !UNIV_HOTBACKUP */
+UNIV_INTERN ib_mutex_t dict_foreign_err_mutex;
+
/******************************************************************//**
Makes all characters in a NUL-terminated UTF-8 string lower case. */
UNIV_INTERN
@@ -330,7 +334,7 @@ dict_mutex_exit_for_mysql(void)
/** Get the latch that protects the stats of a given table */
#define GET_TABLE_STATS_LATCH(table) \
- (&dict_table_stats_latches[ut_fold_ull(table->id) \
+ (&dict_table_stats_latches[ut_fold_ull((ib_uint64_t) table) \
% DICT_TABLE_STATS_LATCHES_SIZE])
/**********************************************************************//**
@@ -389,6 +393,75 @@ dict_table_stats_unlock(
}
}
+/**********************************************************************//**
+Try to drop any indexes after an aborted index creation.
+This can also be after a server kill during DROP INDEX. */
+static
+void
+dict_table_try_drop_aborted(
+/*========================*/
+ dict_table_t* table, /*!< in: table, or NULL if it
+ needs to be looked up again */
+ table_id_t table_id, /*!< in: table identifier */
+ ulint ref_count) /*!< in: expected table->n_ref_count */
+{
+ trx_t* trx;
+
+ trx = trx_allocate_for_background();
+ trx->op_info = "try to drop any indexes after an aborted index creation";
+ row_mysql_lock_data_dictionary(trx);
+ trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+
+ if (table == NULL) {
+ table = dict_table_open_on_id_low(table_id);
+ } else {
+ ut_ad(table->id == table_id);
+ }
+
+ if (table && table->n_ref_count == ref_count && table->drop_aborted) {
+ /* Silence a debug assertion in row_merge_drop_indexes(). */
+ ut_d(table->n_ref_count++);
+ row_merge_drop_indexes(trx, table, TRUE);
+ ut_d(table->n_ref_count--);
+ ut_ad(table->n_ref_count == ref_count);
+ trx_commit_for_mysql(trx);
+ }
+
+ row_mysql_unlock_data_dictionary(trx);
+ trx_free_for_background(trx);
+}
+
+/**********************************************************************//**
+When opening a table,
+try to drop any indexes after an aborted index creation.
+Release the dict_sys->mutex. */
+static
+void
+dict_table_try_drop_aborted_and_mutex_exit(
+/*=======================================*/
+ dict_table_t* table, /*!< in: table (may be NULL) */
+ ibool try_drop) /*!< in: FALSE if should try to
+ drop indexes whose online creation
+ was aborted */
+{
+ if (try_drop
+ && table != NULL
+ && table->drop_aborted
+ && table->n_ref_count == 1
+ && dict_table_get_first_index(table)) {
+
+ /* Attempt to drop the indexes whose online creation
+ was aborted. */
+ table_id_t table_id = table->id;
+
+ mutex_exit(&dict_sys->mutex);
+
+ dict_table_try_drop_aborted(table, table_id, 1);
+ } else {
+ mutex_exit(&dict_sys->mutex);
+ }
+}
+
/********************************************************************//**
Decrements the count of open handles to a table. */
UNIV_INTERN
@@ -396,7 +469,10 @@ void
dict_table_close(
/*=============*/
dict_table_t* table, /*!< in/out: table */
- ibool dict_locked) /*!< in: TRUE=data dictionary locked */
+ ibool dict_locked, /*!< in: TRUE=data dictionary locked */
+ ibool try_drop) /*!< in: TRUE=try to drop any orphan
+ indexes after an aborted online
+ index creation */
{
if (!dict_locked) {
mutex_enter(&dict_sys->mutex);
@@ -407,6 +483,18 @@ dict_table_close(
--table->n_ref_count;
+ /* Force persistent stats re-read upon next open of the table
+ so that FLUSH TABLE can be used to forcibly fetch stats from disk
+ if they have been manually modified. We reset table->stat_initialized
+ only if table reference count is 0 because we do not want too frequent
+ stats re-reads (e.g. in other cases than FLUSH TABLE). */
+ if (strchr(table->name, '/') != NULL
+ && table->n_ref_count == 0
+ && dict_stats_is_persistent_enabled(table)) {
+
+ dict_stats_deinit(table);
+ }
+
MONITOR_DEC(MONITOR_TABLE_REFERENCE);
ut_ad(dict_lru_validate());
@@ -420,7 +508,19 @@ dict_table_close(
#endif /* UNIV_DEBUG */
if (!dict_locked) {
+ table_id_t table_id = table->id;
+ ibool drop_aborted;
+
+ drop_aborted = try_drop
+ && table->drop_aborted
+ && table->n_ref_count == 1
+ && dict_table_get_first_index(table);
+
mutex_exit(&dict_sys->mutex);
+
+ if (drop_aborted) {
+ dict_table_try_drop_aborted(NULL, table_id, 0);
+ }
}
}
#endif /* !UNIV_HOTBACKUP */
@@ -550,33 +650,6 @@ dict_table_autoinc_unlock(
{
mutex_exit(&table->autoinc_mutex);
}
-
-/**********************************************************************//**
-Looks for an index with the given table and index id.
-Note: Does not reserve the dictionary mutex.
-@return index or NULL if not found in cache */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_on_id_low(
-/*=====================*/
- dict_table_t* table, /*!< in: table */
- index_id_t id) /*!< in: index id */
-{
- dict_index_t* index;
-
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- if (id == index->id) {
- /* Found */
-
- return(index);
- }
- }
-
- return(NULL);
-}
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
@@ -712,7 +785,10 @@ dict_table_t*
dict_table_open_on_id(
/*==================*/
table_id_t table_id, /*!< in: table id */
- ibool dict_locked) /*!< in: TRUE=data dictionary locked */
+ ibool dict_locked, /*!< in: TRUE=data dictionary locked */
+ ibool try_drop) /*!< in: TRUE=try to drop any orphan
+ indexes after an aborted online
+ index creation */
{
dict_table_t* table;
@@ -736,7 +812,7 @@ dict_table_open_on_id(
}
if (!dict_locked) {
- mutex_exit(&dict_sys->mutex);
+ dict_table_try_drop_aborted_and_mutex_exit(table, try_drop);
}
return(table);
@@ -815,11 +891,13 @@ dict_init(void)
rw_lock_create(dict_operation_lock_key,
&dict_operation_lock, SYNC_DICT_OPERATION);
- dict_foreign_err_file = os_file_create_tmpfile();
- ut_a(dict_foreign_err_file);
+ if (!srv_read_only_mode) {
+ dict_foreign_err_file = os_file_create_tmpfile();
+ ut_a(dict_foreign_err_file);
- mutex_create(dict_foreign_err_mutex_key,
- &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK);
+ mutex_create(dict_foreign_err_mutex_key,
+ &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK);
+ }
for (i = 0; i < DICT_TABLE_STATS_LATCHES_SIZE; i++) {
rw_lock_create(dict_table_stats_latch_key,
@@ -849,14 +927,20 @@ dict_move_to_mru(
}
/**********************************************************************//**
-Returns a table object and increments its open handle count.
+Returns a table object and increment its open handle count.
+NOTE! This is a high-level function to be used mainly from outside the
+'dict' module. Inside this directory dict_table_get_low
+is usually the appropriate function.
@return table, NULL if does not exist */
-static
+UNIV_INTERN
dict_table_t*
-dict_table_open_on_name_low(
-/*========================*/
+dict_table_open_on_name(
+/*====================*/
const char* table_name, /*!< in: table name */
ibool dict_locked, /*!< in: TRUE=data dictionary locked */
+ ibool try_drop, /*!< in: TRUE=try to drop any orphan
+ indexes after an aborted online
+ index creation */
dict_err_ignore_t
ignore_err) /*!< in: error to be ignored when
loading a table definition */
@@ -915,61 +999,11 @@ dict_table_open_on_name_low(
ut_ad(dict_lru_validate());
if (!dict_locked) {
- mutex_exit(&(dict_sys->mutex));
+ dict_table_try_drop_aborted_and_mutex_exit(table, try_drop);
}
return(table);
}
-
-/**********************************************************************//**
-Returns a table object and increment its open handle count.
-NOTE! This is a high-level function to be used mainly from outside the
-'dict' directory. Inside this directory dict_table_get_low
-is usually the appropriate function.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_name(
-/*====================*/
- const char* table_name, /*!< in: table name */
- ibool dict_locked) /*!< in: TRUE=data dictionary locked */
-{
- dict_table_t* table;
-
- table = dict_table_open_on_name_low(table_name, dict_locked,
- DICT_ERR_IGNORE_NONE);
-
- if (table != NULL) {
- /* If table->ibd_file_missing == TRUE, this will
- print an error message and return without doing
- anything. */
- dict_stats_update(table,
- DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY,
- dict_locked);
- }
-
- return(table);
-}
-
-/**********************************************************************//**
-Returns a table object and increment its open handle count. Table
-statistics will not be updated if they are not initialized.
-Call this function when dropping a table.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_name_no_stats(
-/*=============================*/
- const char* table_name, /*!< in: table name */
- ibool dict_locked, /*!< in: TRUE=data dictionary locked */
- dict_err_ignore_t
- ignore_err) /*!< in: error to be ignored during
- table open */
-{
- return(dict_table_open_on_name_low(table_name, dict_locked,
- ignore_err));
-}
-
#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
@@ -1156,7 +1190,7 @@ dict_table_can_be_evicted(
index != NULL;
index = dict_table_get_next_index(index)) {
- btr_search_t* info = index->search_info;
+ btr_search_t* info = btr_search_get_info(index);
/* We are not allowed to free the in-memory index
struct dict_index_t until all entries in the adaptive
@@ -1358,7 +1392,7 @@ dict_index_find_on_id_low(
Renames a table object.
@return TRUE if success */
UNIV_INTERN
-ibool
+dberr_t
dict_table_rename_in_cache(
/*=======================*/
dict_table_t* table, /*!< in/out: table */
@@ -1372,7 +1406,6 @@ dict_table_rename_in_cache(
ulint fold;
char old_name[MAX_FULL_NAME_LEN + 1];
- ut_ad(table);
ut_ad(mutex_own(&(dict_sys->mutex)));
/* store the old/current name to an automatic variable */
@@ -1389,28 +1422,59 @@ dict_table_rename_in_cache(
fold = ut_fold_string(new_name);
/* Look for a table with the same name: error if such exists */
- {
- dict_table_t* table2;
- HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
- dict_table_t*, table2, ut_ad(table2->cached),
- (ut_strcmp(table2->name, new_name) == 0));
- if (UNIV_LIKELY_NULL(table2)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: dictionary cache"
- " already contains a table ", stderr);
- ut_print_name(stderr, NULL, TRUE, new_name);
- fputs("\n"
- "InnoDB: cannot rename table ", stderr);
- ut_print_name(stderr, NULL, TRUE, old_name);
- putc('\n', stderr);
- return(FALSE);
- }
+ dict_table_t* table2;
+ HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
+ dict_table_t*, table2, ut_ad(table2->cached),
+ (ut_strcmp(table2->name, new_name) == 0));
+ DBUG_EXECUTE_IF("dict_table_rename_in_cache_failure",
+ if (table2 == NULL) {
+ table2 = (dict_table_t*) -1;
+ } );
+ if (table2) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot rename table '%s' to '%s' since the "
+ "dictionary cache already contains '%s'.",
+ old_name, new_name, new_name);
+ return(DB_ERROR);
}
/* If the table is stored in a single-table tablespace, rename the
- .ibd file */
+ .ibd file and rebuild the .isl file if needed. */
+
+ if (dict_table_is_discarded(table)) {
+ os_file_type_t type;
+ ibool exists;
+ char* filepath;
+
+ ut_ad(table->space != TRX_SYS_SPACE);
+
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+
+ dict_get_and_save_data_dir_path(table, true);
+ ut_a(table->data_dir_path);
+
+ filepath = os_file_make_remote_pathname(
+ table->data_dir_path, table->name, "ibd");
+ } else {
+ filepath = fil_make_ibd_name(table->name, false);
+ }
+
+ fil_delete_tablespace(table->space, BUF_REMOVE_FLUSH_NO_WRITE);
+
+ /* Delete any temp file hanging around. */
+ if (os_file_status(filepath, &exists, &type)
+ && exists
+ && !os_file_delete_if_exists(filepath)) {
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Delete of %s failed.", filepath);
+ }
+
+ mem_free(filepath);
+
+ } else if (table->space != TRX_SYS_SPACE) {
+ char* new_path = NULL;
- if (table->space != 0) {
if (table->dir_path_of_temp_table != NULL) {
ut_print_timestamp(stderr);
fputs(" InnoDB: Error: trying to rename a"
@@ -1420,10 +1484,40 @@ dict_table_rename_in_cache(
ut_print_filename(stderr,
table->dir_path_of_temp_table);
fputs(" )\n", stderr);
- return(FALSE);
- } else if (!fil_rename_tablespace(old_name, table->space,
- new_name)) {
- return(FALSE);
+ return(DB_ERROR);
+
+ } else if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ char* old_path;
+
+ old_path = fil_space_get_first_path(table->space);
+
+ new_path = os_file_make_new_pathname(
+ old_path, new_name);
+
+ mem_free(old_path);
+
+ dberr_t err = fil_create_link_file(
+ new_name, new_path);
+
+ if (err != DB_SUCCESS) {
+ mem_free(new_path);
+ return(DB_TABLESPACE_EXISTS);
+ }
+ }
+
+ ibool success = fil_rename_tablespace(
+ old_name, table->space, new_name, new_path);
+
+ /* If the tablespace is remote, a new .isl file was created
+ If success, delete the old one. If not, delete the new one. */
+ if (new_path) {
+
+ mem_free(new_path);
+ fil_delete_link_file(success ? old_name : new_name);
+ }
+
+ if (!success) {
+ return(DB_ERROR);
}
}
@@ -1450,12 +1544,11 @@ dict_table_rename_in_cache(
ut_a(dict_sys->size > 0);
/* Update the table_name field in indexes */
- index = dict_table_get_first_index(table);
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
- while (index != NULL) {
index->table_name = table->name;
-
- index = dict_table_get_next_index(index);
}
if (!rename_also_foreigns) {
@@ -1490,7 +1583,7 @@ dict_table_rename_in_cache(
UT_LIST_INIT(table->referenced_list);
- return(TRUE);
+ return(DB_SUCCESS);
}
/* Update the table name fields in foreign constraints, and update also
@@ -1571,9 +1664,10 @@ dict_table_rename_in_cache(
foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
}
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
+ for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
+ foreign != NULL;
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
- while (foreign != NULL) {
if (ut_strlen(foreign->referenced_table_name)
< ut_strlen(table->name)) {
/* Allocate a longer name buffer;
@@ -1581,16 +1675,19 @@ dict_table_rename_in_cache(
foreign->referenced_table_name = mem_heap_strdup(
foreign->heap, table->name);
- dict_mem_referenced_table_name_lookup_set(foreign, TRUE);
+
+ dict_mem_referenced_table_name_lookup_set(
+ foreign, TRUE);
} else {
/* Use the same buffer */
strcpy(foreign->referenced_table_name, table->name);
- dict_mem_referenced_table_name_lookup_set(foreign, FALSE);
+
+ dict_mem_referenced_table_name_lookup_set(
+ foreign, FALSE);
}
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
}
- return(TRUE);
+ return(DB_SUCCESS);
}
/**********************************************************************//**
@@ -1692,6 +1789,30 @@ dict_table_remove_from_cache_low(
ut_ad(dict_lru_validate());
+ if (lru_evict && table->drop_aborted) {
+ /* Do as dict_table_try_drop_aborted() does. */
+
+ trx_t* trx = trx_allocate_for_background();
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ /* Mimic row_mysql_lock_data_dictionary(). */
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+
+ trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+
+ /* Silence a debug assertion in row_merge_drop_indexes(). */
+ ut_d(table->n_ref_count++);
+ row_merge_drop_indexes(trx, table, TRUE);
+ ut_d(table->n_ref_count--);
+ ut_ad(table->n_ref_count == 0);
+ trx_commit_for_mysql(trx);
+ trx->dict_operation_lock_mode = 0;
+ trx_free_for_background(trx);
+ }
+
size = mem_heap_get_size(table->heap) + strlen(table->name) + 1;
ut_ad(dict_sys->size >= size);
@@ -1777,6 +1898,12 @@ dict_index_too_big_for_undo(
+ 10 + FIL_PAGE_DATA_END /* trx_undo_left() */
+ 2/* pointer to previous undo log record */;
+ /* FTS index consists of auxiliary tables, they shall be excluded from
+ index row size check */
+ if (new_index->type & DICT_FTS) {
+ return(false);
+ }
+
if (!clust_index) {
ut_a(dict_index_is_clust(new_index));
clust_index = new_index;
@@ -1900,6 +2027,12 @@ dict_index_too_big_for_tree(
/* maximum allowed size of a node pointer record */
ulint page_ptr_max;
+ /* FTS index consists of auxiliary tables, they shall be excluded from
+ index row size check */
+ if (new_index->type & DICT_FTS) {
+ return(false);
+ }
+
comp = dict_table_is_comp(table);
zip_size = dict_table_zip_size(table);
@@ -2032,7 +2165,7 @@ add_field_size:
Adds an index to the dictionary cache.
@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
UNIV_INTERN
-ulint
+dberr_t
dict_index_add_to_cache(
/*====================*/
dict_table_t* table, /*!< in: table on which the index is */
@@ -2051,6 +2184,7 @@ dict_index_add_to_cache(
ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(index->n_def == index->n_fields);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
+ ut_ad(!dict_index_is_online_ddl(index));
ut_ad(mem_heap_validate(index->heap));
ut_a(!dict_index_is_clust(index)
@@ -2077,6 +2211,7 @@ dict_index_add_to_cache(
number of fields in the cache internal representation */
new_index->n_fields = new_index->n_def;
+ new_index->trx_id = index->trx_id;
if (strict && dict_index_too_big_for_tree(table, new_index)) {
too_big:
@@ -2169,51 +2304,41 @@ undo_size_ok:
}
}
- /* Add the new index as the last index for the table */
-
- UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
- new_index->table = table;
- new_index->table_name = table->name;
-
- new_index->search_info = btr_search_info_create(new_index->heap);
-
- new_index->stat_index_size = 1;
- new_index->stat_n_leaf_pages = 1;
-
- new_index->page = page_no;
- rw_lock_create(index_tree_rw_lock_key, &new_index->lock,
- dict_index_is_ibuf(index)
- ? SYNC_IBUF_INDEX_TREE : SYNC_INDEX_TREE);
-
if (!dict_index_is_univ(new_index)) {
new_index->stat_n_diff_key_vals =
- static_cast<ib_uint64_t*>(mem_heap_alloc(
+ static_cast<ib_uint64_t*>(mem_heap_zalloc(
new_index->heap,
- (1 + dict_index_get_n_unique(new_index))
+ dict_index_get_n_unique(new_index)
* sizeof(*new_index->stat_n_diff_key_vals)));
new_index->stat_n_sample_sizes =
- static_cast<ib_uint64_t*>(mem_heap_alloc(
+ static_cast<ib_uint64_t*>(mem_heap_zalloc(
new_index->heap,
- (1 + dict_index_get_n_unique(new_index))
+ dict_index_get_n_unique(new_index)
* sizeof(*new_index->stat_n_sample_sizes)));
new_index->stat_n_non_null_key_vals =
static_cast<ib_uint64_t*>(mem_heap_zalloc(
new_index->heap,
- (1 + dict_index_get_n_unique(new_index))
+ dict_index_get_n_unique(new_index)
* sizeof(*new_index->stat_n_non_null_key_vals)));
+ }
- /* Give some sensible values to stat_n_... in case we do
- not calculate statistics quickly enough */
+ new_index->stat_index_size = 1;
+ new_index->stat_n_leaf_pages = 1;
- for (i = 0; i <= dict_index_get_n_unique(new_index); i++) {
+ /* Add the new index as the last index for the table */
- new_index->stat_n_diff_key_vals[i] = 100;
- new_index->stat_n_sample_sizes[i] = 0;
- }
- }
+ UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
+ new_index->table = table;
+ new_index->table_name = table->name;
+ new_index->search_info = btr_search_info_create(new_index->heap);
+
+ new_index->page = page_no;
+ rw_lock_create(index_tree_rw_lock_key, &new_index->lock,
+ dict_index_is_ibuf(index)
+ ? SYNC_IBUF_INDEX_TREE : SYNC_INDEX_TREE);
dict_sys->size += mem_heap_get_size(new_index->heap);
@@ -2242,9 +2367,17 @@ dict_index_remove_from_cache_low(
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(mutex_own(&(dict_sys->mutex)));
+ /* No need to acquire the dict_index_t::lock here because
+ there can't be any active operations on this index (or table). */
+
+ if (index->online_log) {
+ ut_ad(index->online_status == ONLINE_INDEX_CREATION);
+ row_log_free(index->online_log);
+ }
+
/* We always create search info whether or not adaptive
hash index is enabled or not. */
- info = index->search_info;
+ info = btr_search_get_info(index);
ut_ad(info);
/* We are not allowed to free the in-memory index struct
@@ -2270,15 +2403,15 @@ dict_index_remove_from_cache_low(
if (retries % 500 == 0) {
/* No luck after 5 seconds of wait. */
fprintf(stderr, "InnoDB: Error: Waited for"
- " %lu secs for hash index"
- " ref_count (%lu) to drop"
- " to 0.\n"
- "index: \"%s\""
- " table: \"%s\"\n",
- retries/100,
- ref_count,
- index->name,
- table->name);
+ " %lu secs for hash index"
+ " ref_count (%lu) to drop"
+ " to 0.\n"
+ "index: \"%s\""
+ " table: \"%s\"\n",
+ retries/100,
+ ref_count,
+ index->name,
+ table->name);
}
/* To avoid a hang here we commit suicide if the
@@ -2821,8 +2954,6 @@ dict_index_build_internal_fts(
return(new_index);
}
-
-#ifndef UNIV_HOTBACKUP
/*====================== FOREIGN KEY PROCESSING ========================*/
/*********************************************************************//**
@@ -2889,8 +3020,7 @@ dict_table_get_foreign_constraint(
foreign;
foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
- if (foreign->foreign_index == index
- || foreign->referenced_index == index) {
+ if (foreign->foreign_index == index) {
return(foreign);
}
@@ -2901,7 +3031,7 @@ dict_table_get_foreign_constraint(
/*********************************************************************//**
Frees a foreign key struct. */
-static
+UNIV_INTERN
void
dict_foreign_free(
/*==============*/
@@ -2912,7 +3042,7 @@ dict_foreign_free(
/**********************************************************************//**
Removes a foreign constraint struct from the dictionary cache. */
-static
+UNIV_INTERN
void
dict_foreign_remove_from_cache(
/*===========================*/
@@ -2976,84 +3106,50 @@ dict_foreign_find(
return(NULL);
}
+
/*********************************************************************//**
Tries to find an index whose first fields are the columns in the array,
in the same order and is not marked for deletion and is not the same
as types_idx.
@return matching index, NULL if not found */
-static
+UNIV_INTERN
dict_index_t*
dict_foreign_find_index(
/*====================*/
- dict_table_t* table, /*!< in: table */
- const char** columns,/*!< in: array of column names */
- ulint n_cols, /*!< in: number of columns */
- dict_index_t* types_idx, /*!< in: NULL or an index to whose types the
- column types must match */
- ibool check_charsets,
- /*!< in: whether to check charsets.
- only has an effect if types_idx != NULL */
- ulint check_null)
- /*!< in: nonzero if none of the columns must
- be declared NOT NULL */
+ const dict_table_t* table, /*!< in: table */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ const dict_index_t* types_idx,
+ /*!< in: NULL or an index
+ whose types the column types
+ must match */
+ ibool check_charsets,
+ /*!< in: whether to check
+ charsets. only has an effect
+ if types_idx != NULL */
+ ulint check_null)
+ /*!< in: nonzero if none of
+ the columns must be declared
+ NOT NULL */
{
dict_index_t* index;
+ ut_ad(mutex_own(&dict_sys->mutex));
+
index = dict_table_get_first_index(table);
while (index != NULL) {
/* Ignore matches that refer to the same instance
- or the index is to be dropped */
- if (index->to_be_dropped || types_idx == index
- || index->type & DICT_FTS) {
+ (or the index is to be dropped) */
+ if (types_idx == index || index->type & DICT_FTS
+ || index->to_be_dropped) {
goto next_rec;
- } else if (dict_index_get_n_fields(index) >= n_cols) {
- ulint i;
-
- for (i = 0; i < n_cols; i++) {
- dict_field_t* field;
- const char* col_name;
-
- field = dict_index_get_nth_field(index, i);
-
- col_name = dict_table_get_col_name(
- table, dict_col_get_no(field->col));
-
- if (field->prefix_len != 0) {
- /* We do not accept column prefix
- indexes here */
-
- break;
- }
-
- if (0 != innobase_strcasecmp(columns[i],
- col_name)) {
- break;
- }
-
- if (check_null
- && (field->col->prtype & DATA_NOT_NULL)) {
-
- return(NULL);
- }
-
- if (types_idx && !cmp_cols_are_equal(
- dict_index_get_nth_col(index, i),
- dict_index_get_nth_col(types_idx,
- i),
- check_charsets)) {
-
- break;
- }
- }
-
- if (i == n_cols) {
- /* We found a matching index */
-
- return(index);
- }
+ } else if (dict_foreign_qualify_index(
+ table, columns, n_cols, index, types_idx,
+ check_charsets, check_null)) {
+ return(index);
}
next_rec:
@@ -3064,90 +3160,6 @@ next_rec:
}
/**********************************************************************//**
-Find an index that is equivalent to the one passed in and is not marked
-for deletion.
-@return index equivalent to foreign->foreign_index, or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_foreign_find_equiv_index(
-/*==========================*/
- dict_foreign_t* foreign)/*!< in: foreign key */
-{
- ut_a(foreign != NULL);
-
- /* Try to find an index which contains the columns as the
- first fields and in the right order, and the types are the
- same as in foreign->foreign_index */
-
- return(dict_foreign_find_index(
- foreign->foreign_table,
- foreign->foreign_col_names, foreign->n_fields,
- foreign->foreign_index, TRUE, /* check types */
- FALSE/* allow columns to be NULL */));
-}
-
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Returns an index object by matching on the name and column names and
-if more than one index matches return the index with the max id
-@return matching index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_by_max_id(
-/*===========================*/
- dict_table_t* table, /*!< in: table */
- const char* name, /*!< in: the index name to find */
- const char** columns,/*!< in: array of column names */
- ulint n_cols) /*!< in: number of columns */
-{
- dict_index_t* index;
- dict_index_t* found;
-
- found = NULL;
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (ut_strcmp(index->name, name) == 0
- && dict_index_get_n_ordering_defined_by_user(index)
- == n_cols) {
-
- ulint i;
-
- for (i = 0; i < n_cols; i++) {
- dict_field_t* field;
- const char* col_name;
-
- field = dict_index_get_nth_field(index, i);
-
- col_name = dict_table_get_col_name(
- table, dict_col_get_no(field->col));
-
- if (0 != innobase_strcasecmp(
- columns[i], col_name)) {
-
- break;
- }
- }
-
- if (i == n_cols) {
- /* We found a matching index, select
- the index with the higher id*/
-
- if (!found || index->id > found->id) {
-
- found = index;
- }
- }
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(found);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
Report an error in a foreign key definition. */
static
void
@@ -3196,7 +3208,7 @@ At least one of the foreign table and the referenced table must already
be in the dictionary cache!
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
dict_foreign_add_to_cache(
/*======================*/
dict_foreign_t* foreign, /*!< in, own: foreign key constraint */
@@ -3325,7 +3337,6 @@ dict_foreign_add_to_cache(
return(DB_SUCCESS);
}
-#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Scans from pointer onwards. Stops if is at the start of a copy of
'string' where characters are compared without case sensitivity, and
@@ -3579,6 +3590,67 @@ dict_scan_col(
return(ptr);
}
+
+/*********************************************************************//**
+Open a table from its database and table name, this is currently used by
+foreign constraint parser to get the referenced table.
+@return complete table name with database and table name, allocated from
+heap memory passed in */
+UNIV_INTERN
+char*
+dict_get_referenced_table(
+/*======================*/
+ const char* name, /*!< in: foreign key table name */
+ const char* database_name, /*!< in: table db name */
+ ulint database_name_len, /*!< in: db name length */
+ const char* table_name, /*!< in: table name */
+ ulint table_name_len, /*!< in: table name length */
+ dict_table_t** table, /*!< out: table object or NULL */
+ mem_heap_t* heap) /*!< in/out: heap memory */
+{
+ char* ref;
+ const char* db_name;
+
+ if (!database_name) {
+ /* Use the database name of the foreign key table */
+
+ db_name = name;
+ database_name_len = dict_get_db_name_len(name);
+ } else {
+ db_name = database_name;
+ }
+
+ /* Copy database_name, '/', table_name, '\0' */
+ ref = static_cast<char*>(
+ mem_heap_alloc(heap, database_name_len + table_name_len + 2));
+
+ memcpy(ref, db_name, database_name_len);
+ ref[database_name_len] = '/';
+ memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
+
+ /* Values; 0 = Store and compare as given; case sensitive
+ 1 = Store and compare in lower; case insensitive
+ 2 = Store as given, compare in lower; case semi-sensitive */
+ if (innobase_get_lower_case_table_names() == 2) {
+ innobase_casedn_str(ref);
+ *table = dict_table_get_low(ref);
+ memcpy(ref, db_name, database_name_len);
+ ref[database_name_len] = '/';
+ memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
+
+ } else {
+#ifndef __WIN__
+ if (innobase_get_lower_case_table_names() == 1) {
+ innobase_casedn_str(ref);
+ }
+#else
+ innobase_casedn_str(ref);
+#endif /* !__WIN__ */
+ *table = dict_table_get_low(ref);
+ }
+
+ return(ref);
+}
/*********************************************************************//**
Scans a table name from an SQL string.
@return scanned to */
@@ -3598,9 +3670,7 @@ dict_scan_table_name(
const char* database_name = NULL;
ulint database_name_len = 0;
const char* table_name = NULL;
- ulint table_name_len;
const char* scan_name;
- char* ref;
*success = FALSE;
*table = NULL;
@@ -3648,46 +3718,11 @@ dict_scan_table_name(
table_name = scan_name;
}
- if (database_name == NULL) {
- /* Use the database name of the foreign key table */
-
- database_name = name;
- database_name_len = dict_get_db_name_len(name);
- }
-
- table_name_len = strlen(table_name);
-
- /* Copy database_name, '/', table_name, '\0' */
- ref = static_cast<char*>(
- mem_heap_alloc(heap, database_name_len + table_name_len + 2));
-
- memcpy(ref, database_name, database_name_len);
- ref[database_name_len] = '/';
- memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
-
- /* Values; 0 = Store and compare as given; case sensitive
- 1 = Store and compare in lower; case insensitive
- 2 = Store as given, compare in lower; case semi-sensitive */
- if (innobase_get_lower_case_table_names() == 2) {
- innobase_casedn_str(ref);
- *table = dict_table_get_low(ref);
- memcpy(ref, database_name, database_name_len);
- ref[database_name_len] = '/';
- memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
-
- } else {
-#ifndef __WIN__
- if (innobase_get_lower_case_table_names() == 1) {
- innobase_casedn_str(ref);
- }
-#else
- innobase_casedn_str(ref);
-#endif /* !__WIN__ */
- *table = dict_table_get_low(ref);
- }
+ *ref_name = dict_get_referenced_table(
+ name, database_name, database_name_len,
+ table_name, strlen(table_name), table, heap);
*success = TRUE;
- *ref_name = ref;
return(ptr);
}
@@ -3810,13 +3845,12 @@ end_of_string:
}
}
-#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Finds the highest [number] for foreign key constraints of the table. Looks
only at the >= 4.0.18-format id's, which are of the form
databasename/tablename_ibfk_[number].
@return highest number, 0 if table has no new format foreign key constraints */
-static
+UNIV_INTERN
ulint
dict_table_get_highest_foreign_id(
/*==============================*/
@@ -3871,6 +3905,8 @@ dict_foreign_report_syntax_err(
in the SQL string */
const char* ptr) /*!< in: place of the syntax error */
{
+ ut_ad(!srv_read_only_mode);
+
FILE* ef = dict_foreign_err_file;
mutex_enter(&dict_foreign_err_mutex);
@@ -3888,7 +3924,7 @@ be accompanied with indexes in both participating tables. The indexes are
allowed to contain more fields than mentioned in the constraint.
@return error code or DB_SUCCESS */
static
-ulint
+dberr_t
dict_create_foreign_constraints_low(
/*================================*/
trx_t* trx, /*!< in: transaction */
@@ -3919,7 +3955,7 @@ dict_create_foreign_constraints_low(
FILE* ef = dict_foreign_err_file;
const char* constraint_name;
ibool success;
- ulint error;
+ dberr_t error;
const char* ptr1;
const char* ptr2;
ulint i;
@@ -3931,6 +3967,7 @@ dict_create_foreign_constraints_low(
const char* column_names[500];
const char* referenced_table_name;
+ ut_ad(!srv_read_only_mode);
ut_ad(mutex_own(&(dict_sys->mutex)));
table = dict_table_get_low(name);
@@ -4470,11 +4507,11 @@ UNIV_INTERN
ibool
dict_str_starts_with_keyword(
/*=========================*/
- void* mysql_thd, /*!< in: MySQL thread handle */
+ THD* thd, /*!< in: MySQL thread handle */
const char* str, /*!< in: string to scan for keyword */
const char* keyword) /*!< in: keyword to look for */
{
- struct charset_info_st* cs = innobase_get_charset(mysql_thd);
+ struct charset_info_st* cs = innobase_get_charset(thd);
ibool success;
dict_accept(cs, str, keyword, &success);
@@ -4489,7 +4526,7 @@ be accompanied with indexes in both participating tables. The indexes are
allowed to contain more fields than mentioned in the constraint.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
dict_create_foreign_constraints(
/*============================*/
trx_t* trx, /*!< in: transaction */
@@ -4509,9 +4546,9 @@ dict_create_foreign_constraints(
code DB_CANNOT_ADD_CONSTRAINT if
any foreign keys are found. */
{
- char* str;
- ulint err;
- mem_heap_t* heap;
+ char* str;
+ dberr_t err;
+ mem_heap_t* heap;
ut_a(trx);
ut_a(trx->mysql_thd);
@@ -4534,7 +4571,7 @@ Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
constraint id does not match */
UNIV_INTERN
-ulint
+dberr_t
dict_foreign_parse_drop_constraints(
/*================================*/
mem_heap_t* heap, /*!< in: heap from which we can
@@ -4552,7 +4589,6 @@ dict_foreign_parse_drop_constraints(
size_t len;
const char* ptr;
const char* id;
- FILE* ef = dict_foreign_err_file;
struct charset_info_st* cs;
ut_a(trx);
@@ -4618,10 +4654,11 @@ loop:
foreign = UT_LIST_GET_FIRST(table->foreign_list);
while (foreign != NULL) {
- if (0 == strcmp(foreign->id, id)
+ if (0 == innobase_strcasecmp(foreign->id, id)
|| (strchr(foreign->id, '/')
- && 0 == strcmp(id,
- dict_remove_db_name(foreign->id)))) {
+ && 0 == innobase_strcasecmp(
+ id,
+ dict_remove_db_name(foreign->id)))) {
/* Found */
break;
}
@@ -4629,20 +4666,26 @@ loop:
foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
}
+
if (foreign == NULL) {
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Error in dropping of a foreign key constraint"
- " of table ", ef);
- ut_print_name(ef, NULL, TRUE, table->name);
- fputs(",\n"
- "in SQL command\n", ef);
- fputs(str, ef);
- fputs("\nCannot find a constraint with the given id ", ef);
- ut_print_name(ef, NULL, FALSE, id);
- fputs(".\n", ef);
- mutex_exit(&dict_foreign_err_mutex);
+
+ if (!srv_read_only_mode) {
+ FILE* ef = dict_foreign_err_file;
+
+ mutex_enter(&dict_foreign_err_mutex);
+ rewind(ef);
+ ut_print_timestamp(ef);
+ fputs(" Error in dropping of a foreign key "
+ "constraint of table ", ef);
+ ut_print_name(ef, NULL, TRUE, table->name);
+ fputs(",\nin SQL command\n", ef);
+ fputs(str, ef);
+ fputs("\nCannot find a constraint with the "
+ "given id ", ef);
+ ut_print_name(ef, NULL, FALSE, id);
+ fputs(".\n", ef);
+ mutex_exit(&dict_foreign_err_mutex);
+ }
mem_free(str);
@@ -4652,15 +4695,19 @@ loop:
goto loop;
syntax_error:
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Syntax error in dropping of a"
- " foreign key constraint of table ", ef);
- ut_print_name(ef, NULL, TRUE, table->name);
- fprintf(ef, ",\n"
- "close to:\n%s\n in SQL command\n%s\n", ptr, str);
- mutex_exit(&dict_foreign_err_mutex);
+ if (!srv_read_only_mode) {
+ FILE* ef = dict_foreign_err_file;
+
+ mutex_enter(&dict_foreign_err_mutex);
+ rewind(ef);
+ ut_print_timestamp(ef);
+ fputs(" Syntax error in dropping of a"
+ " foreign key constraint of table ", ef);
+ ut_print_name(ef, NULL, TRUE, table->name);
+ fprintf(ef, ",\n"
+ "close to:\n%s\n in SQL command\n%s\n", ptr, str);
+ mutex_exit(&dict_foreign_err_mutex);
+ }
mem_free(str);
@@ -4668,7 +4715,7 @@ syntax_error:
}
/*==================== END OF FOREIGN KEY PROCESSING ====================*/
-#endif /* !UNIV_HOTBACKUP */
+
/**********************************************************************//**
Returns an index object if it is found in the dictionary cache.
Assumes that dict_sys->mutex is already being held.
@@ -4908,7 +4955,6 @@ dict_index_calc_min_rec_len(
return(sum);
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Prints info of a foreign key constraint. */
static
@@ -4939,7 +4985,6 @@ dict_foreign_print_low(
fputs(" )\n", stderr);
}
-#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
Prints a table data. */
UNIV_INTERN
@@ -4948,60 +4993,29 @@ dict_table_print(
/*=============*/
dict_table_t* table) /*!< in: table */
{
- mutex_enter(&(dict_sys->mutex));
- dict_table_print_low(table);
- mutex_exit(&(dict_sys->mutex));
-}
-
-/**********************************************************************//**
-Prints a table data when we know the table name. */
-UNIV_INTERN
-void
-dict_table_print_by_name(
-/*=====================*/
- const char* name) /*!< in: table name */
-{
- dict_table_t* table;
-
- mutex_enter(&(dict_sys->mutex));
-
- table = dict_table_get_low(name);
-
- ut_a(table);
-
- dict_table_print_low(table);
- mutex_exit(&(dict_sys->mutex));
-}
-
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print_low(
-/*=================*/
- dict_table_t* table) /*!< in: table */
-{
dict_index_t* index;
dict_foreign_t* foreign;
ulint i;
ut_ad(mutex_own(&(dict_sys->mutex)));
- dict_stats_update(table, DICT_STATS_FETCH, TRUE);
+ dict_table_stats_lock(table, RW_X_LATCH);
- dict_table_stats_lock(table, RW_S_LATCH);
+ if (!table->stat_initialized) {
+ dict_stats_update_transient(table);
+ }
fprintf(stderr,
"--------------------------------------\n"
"TABLE: name %s, id %llu, flags %lx, columns %lu,"
- " indexes %lu, appr.rows %lu\n"
+ " indexes %lu, appr.rows " UINT64PF "\n"
" COLUMNS: ",
table->name,
(ullint) table->id,
(ulong) table->flags,
(ulong) table->n_cols,
(ulong) UT_LIST_GET_LEN(table->indexes),
- (ulong) table->stat_n_rows);
+ table->stat_n_rows);
for (i = 0; i < (ulint) table->n_cols; i++) {
dict_col_print_low(table, dict_table_get_nth_col(table, i));
@@ -5017,7 +5031,9 @@ dict_table_print_low(
index = UT_LIST_GET_NEXT(indexes, index);
}
- dict_table_stats_unlock(table, RW_S_LATCH);
+ table->stat_initialized = FALSE;
+
+ dict_table_stats_unlock(table, RW_X_LATCH);
foreign = UT_LIST_GET_FIRST(table->foreign_list);
@@ -5065,13 +5081,15 @@ dict_index_print_low(
ib_int64_t n_vals;
ulint i;
+ ut_a(index->table->stat_initialized);
+
ut_ad(mutex_own(&(dict_sys->mutex)));
if (index->n_user_defined_cols > 0) {
n_vals = index->stat_n_diff_key_vals[
- index->n_user_defined_cols];
+ index->n_user_defined_cols - 1];
} else {
- n_vals = index->stat_n_diff_key_vals[1];
+ n_vals = index->stat_n_diff_key_vals[0];
}
fprintf(stderr,
@@ -5121,7 +5139,6 @@ dict_field_print_low(
}
}
-#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Outputs info on a foreign key of a table in a format suitable for
CREATE TABLE. */
@@ -5310,7 +5327,6 @@ dict_print_info_on_foreign_keys(
mutex_exit(&(dict_sys->mutex));
}
-#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Displays the names of the index and the table. */
UNIV_INTERN
@@ -5318,7 +5334,7 @@ void
dict_index_name_print(
/*==================*/
FILE* file, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction */
+ const trx_t* trx, /*!< in: transaction */
const dict_index_t* index) /*!< in: index to print */
{
fputs("index ", file);
@@ -5393,7 +5409,9 @@ UNIV_INTERN
void
dict_set_corrupted(
/*===============*/
- dict_index_t* index) /*!< in/out: index */
+ dict_index_t* index, /*!< in/out: index */
+ trx_t* trx, /*!< in/out: transaction */
+ const char* ctx) /*!< in: context */
{
mem_heap_t* heap;
mtr_t mtr;
@@ -5401,8 +5419,14 @@ dict_set_corrupted(
dtuple_t* tuple;
dfield_t* dfield;
byte* buf;
+ char* table_name;
const char* status;
btr_cur_t cursor;
+ bool locked = RW_X_LATCH == trx->dict_operation_lock_mode;
+
+ if (!locked) {
+ row_mysql_lock_data_dictionary(trx);
+ }
ut_ad(index);
ut_ad(mutex_own(&dict_sys->mutex));
@@ -5422,7 +5446,7 @@ dict_set_corrupted(
if (index->type & DICT_CORRUPT) {
/* The index was already flagged corrupted. */
ut_ad(!dict_index_is_clust(index) || index->table->corrupted);
- return;
+ goto func_exit;
}
heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t)
@@ -5463,19 +5487,29 @@ dict_set_corrupted(
goto fail;
}
mlog_write_ulint(field, index->type, MLOG_4BYTES, &mtr);
- status = " InnoDB: Flagged corruption of ";
+ status = "Flagged";
} else {
fail:
- status = " InnoDB: Unable to flag corruption of ";
+ status = "Unable to flag";
}
mtr_commit(&mtr);
+ mem_heap_empty(heap);
+ table_name = static_cast<char*>(mem_heap_alloc(heap, FN_REFLEN + 1));
+ *innobase_convert_name(
+ table_name, FN_REFLEN,
+ index->table_name, strlen(index->table_name),
+ NULL, TRUE) = 0;
+
+ ib_logf(IB_LOG_LEVEL_ERROR, "%s corruption of %s in table %s in %s",
+ status, index->name, table_name, ctx);
+
mem_heap_free(heap);
- ut_print_timestamp(stderr);
- fputs(status, stderr);
- dict_index_name_print(stderr, NULL, index);
- putc('\n', stderr);
+func_exit:
+ if (!locked) {
+ row_mysql_unlock_data_dictionary(trx);
+ }
}
/**********************************************************************//**
@@ -5582,7 +5616,7 @@ dict_table_get_index_on_name(
/* If name is NULL, just return */
if (!name) {
- return NULL;
+ return(NULL);
}
index = dict_table_get_first_index(table);
@@ -5597,42 +5631,47 @@ dict_table_get_index_on_name(
}
return(NULL);
-
}
/**********************************************************************//**
-Replace the index passed in with another equivalent index in the tables
-foreign key list. */
+Replace the index passed in with another equivalent index in the
+foreign key lists of the table. */
UNIV_INTERN
void
-dict_table_replace_index_in_foreign_list(
-/*=====================================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index, /*!< in: index to be replaced */
- const trx_t* trx) /*!< in: transaction handle */
+dict_foreign_replace_index(
+/*=======================*/
+ dict_table_t* table, /*!< in/out: table */
+ const dict_index_t* index, /*!< in: index to be replaced */
+ const trx_t* trx) /*!< in: transaction handle */
{
dict_foreign_t* foreign;
+ ut_ad(index->to_be_dropped);
+
for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
foreign;
foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
- if (foreign->foreign_index == index) {
- dict_index_t* new_index
- = dict_foreign_find_equiv_index(foreign);
+ dict_index_t* new_index;
- /* There must exist an alternative index if
- check_foreigns (FOREIGN_KEY_CHECKS) is on,
- since ha_innobase::prepare_drop_index had done
- the check before we reach here. */
+ if (foreign->foreign_index == index) {
+ ut_ad(foreign->foreign_table == index->table);
+ new_index = dict_foreign_find_index(
+ foreign->foreign_table,
+ foreign->foreign_col_names,
+ foreign->n_fields, index,
+ /*check_charsets=*/TRUE, /*check_null=*/FALSE);
+ /* There must exist an alternative index,
+ since this must have been checked earlier. */
ut_a(new_index || !trx->check_foreigns);
+ ut_ad(!new_index || new_index->table == index->table);
+ ut_ad(!new_index || !new_index->to_be_dropped);
foreign->foreign_index = new_index;
}
}
-
for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
foreign;
foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
@@ -5647,8 +5686,11 @@ dict_table_replace_index_in_foreign_list(
foreign->referenced_col_names,
foreign->n_fields, index,
/*check_charsets=*/TRUE, /*check_null=*/FALSE);
- ut_ad(new_index || !trx->check_foreigns);
+ /* There must exist an alternative index,
+ since this must have been checked earlier. */
+ ut_a(new_index || !trx->check_foreigns);
ut_ad(!new_index || new_index->table == index->table);
+ ut_ad(!new_index || !new_index->to_be_dropped);
foreign->referenced_index = new_index;
}
@@ -5696,8 +5738,8 @@ dict_table_check_for_dup_indexes(
/*=============================*/
const dict_table_t* table, /*!< in: Check for dup indexes
in this table */
- ibool tmp_ok) /*!< in: TRUE=allow temporary
- index names */
+ enum check_name check) /*!< in: whether and when to allow
+ temporary index names */
{
/* Check for duplicates, ignoring indexes that are marked
as to be dropped */
@@ -5713,17 +5755,32 @@ dict_table_check_for_dup_indexes(
index1 = UT_LIST_GET_FIRST(table->indexes);
do {
- ut_ad(tmp_ok || *index1->name != TEMP_INDEX_PREFIX);
-
- index2 = UT_LIST_GET_NEXT(indexes, index1);
-
- while (index2) {
-
- if (!index2->to_be_dropped) {
- ut_ad(ut_strcmp(index1->name, index2->name));
+ if (*index1->name == TEMP_INDEX_PREFIX) {
+ ut_a(!dict_index_is_clust(index1));
+
+ switch (check) {
+ case CHECK_ALL_COMPLETE:
+ ut_error;
+ case CHECK_ABORTED_OK:
+ switch (dict_index_get_online_status(index1)) {
+ case ONLINE_INDEX_COMPLETE:
+ case ONLINE_INDEX_CREATION:
+ ut_error;
+ break;
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ break;
+ }
+ /* fall through */
+ case CHECK_PARTIAL_OK:
+ break;
}
+ }
- index2 = UT_LIST_GET_NEXT(indexes, index2);
+ for (index2 = UT_LIST_GET_NEXT(indexes, index1);
+ index2 != NULL;
+ index2 = UT_LIST_GET_NEXT(indexes, index2)) {
+ ut_ad(ut_strcmp(index1->name, index2->name));
}
index1 = UT_LIST_GET_NEXT(indexes, index1);
@@ -5739,17 +5796,17 @@ The caller must own the dictionary mutex.
dict_table_schema_check() @{
@return DB_SUCCESS if the table exists and contains the necessary columns */
UNIV_INTERN
-enum db_err
+dberr_t
dict_table_schema_check(
/*====================*/
dict_table_schema_t* req_schema, /*!< in/out: required table
schema */
char* errstr, /*!< out: human readable error
- message if != DB_SUCCESS and
- != DB_TABLE_NOT_FOUND is
+ message if != DB_SUCCESS is
returned */
size_t errstr_sz) /*!< in: errstr size */
{
+ char buf[MAX_FULL_NAME_LEN];
dict_table_t* table;
ulint i;
@@ -5757,8 +5814,24 @@ dict_table_schema_check(
table = dict_table_get_low(req_schema->table_name);
- if (table == NULL || table->ibd_file_missing) {
- /* no such table or missing tablespace */
+ if (table == NULL) {
+ /* no such table */
+
+ ut_snprintf(errstr, errstr_sz,
+ "Table %s not found.",
+ ut_format_name(req_schema->table_name,
+ TRUE, buf, sizeof(buf)));
+
+ return(DB_TABLE_NOT_FOUND);
+ }
+
+ if (table->ibd_file_missing) {
+ /* missing tablespace */
+
+ ut_snprintf(errstr, errstr_sz,
+ "Tablespace for table %s is missing.",
+ ut_format_name(req_schema->table_name,
+ TRUE, buf, sizeof(buf)));
return(DB_TABLE_NOT_FOUND);
}
@@ -5769,7 +5842,8 @@ dict_table_schema_check(
ut_snprintf(errstr, errstr_sz,
"%s has %d columns but should have %lu.",
- req_schema->table_name,
+ ut_format_name(req_schema->table_name,
+ TRUE, buf, sizeof(buf)),
table->n_def - DATA_N_SYS_COLS,
req_schema->n_cols);
@@ -5814,9 +5888,12 @@ dict_table_schema_check(
if (j == table->n_def) {
ut_snprintf(errstr, errstr_sz,
- "required column %s.%s not found.",
- req_schema->table_name,
- req_schema->columns[i].name);
+ "required column %s "
+ "not found in table %s.",
+ req_schema->columns[i].name,
+ ut_format_name(
+ req_schema->table_name,
+ TRUE, buf, sizeof(buf)));
return(DB_ERROR);
}
@@ -5839,10 +5916,11 @@ dict_table_schema_check(
if (req_schema->columns[i].len != table->cols[j].len) {
ut_snprintf(errstr, errstr_sz,
- "Column %s.%s is %s but should be %s "
- "(length mismatch).",
- req_schema->table_name,
+ "Column %s in table %s is %s "
+ "but should be %s (length mismatch).",
req_schema->columns[i].name,
+ ut_format_name(req_schema->table_name,
+ TRUE, buf, sizeof(buf)),
actual_type, req_type);
return(DB_ERROR);
@@ -5852,10 +5930,11 @@ dict_table_schema_check(
if (req_schema->columns[i].mtype != table->cols[j].mtype) {
ut_snprintf(errstr, errstr_sz,
- "Column %s.%s is %s but should be %s "
- "(type mismatch).",
- req_schema->table_name,
+ "Column %s in table %s is %s "
+ "but should be %s (type mismatch).",
req_schema->columns[i].name,
+ ut_format_name(req_schema->table_name,
+ TRUE, buf, sizeof(buf)),
actual_type, req_type);
return(DB_ERROR);
@@ -5868,20 +5947,110 @@ dict_table_schema_check(
!= req_schema->columns[i].prtype_mask) {
ut_snprintf(errstr, errstr_sz,
- "Column %s.%s is %s but should be %s "
- "(flags mismatch).",
- req_schema->table_name,
+ "Column %s in table %s is %s "
+ "but should be %s (flags mismatch).",
req_schema->columns[i].name,
+ ut_format_name(req_schema->table_name,
+ TRUE, buf, sizeof(buf)),
actual_type, req_type);
return(DB_ERROR);
}
}
+ if (req_schema->n_foreign != UT_LIST_GET_LEN(table->foreign_list)) {
+ ut_snprintf(
+ errstr, errstr_sz,
+ "Table %s has %lu foreign key(s) pointing to other "
+ "tables, but it must have %lu.",
+ ut_format_name(req_schema->table_name,
+ TRUE, buf, sizeof(buf)),
+ UT_LIST_GET_LEN(table->foreign_list),
+ req_schema->n_foreign);
+ return(DB_ERROR);
+ }
+
+ if (req_schema->n_referenced != UT_LIST_GET_LEN(table->referenced_list)) {
+ ut_snprintf(
+ errstr, errstr_sz,
+ "There are %lu foreign key(s) pointing to %s, "
+ "but there must be %lu.",
+ UT_LIST_GET_LEN(table->referenced_list),
+ ut_format_name(req_schema->table_name,
+ TRUE, buf, sizeof(buf)),
+ req_schema->n_referenced);
+ return(DB_ERROR);
+ }
+
return(DB_SUCCESS);
}
/* @} */
+/*********************************************************************//**
+Converts a database and table name from filesystem encoding
+(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
+strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
+at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
+UNIV_INTERN
+void
+dict_fs2utf8(
+/*=========*/
+ const char* db_and_table, /*!< in: database and table names,
+ e.g. d@i1b/a@q1b@1Kc */
+ char* db_utf8, /*!< out: database name, e.g. dцb */
+ size_t db_utf8_size, /*!< in: dbname_utf8 size */
+ char* table_utf8, /*!< out: table name, e.g. aюbØc */
+ size_t table_utf8_size)/*!< in: table_utf8 size */
+{
+ char db[MAX_DATABASE_NAME_LEN + 1];
+ ulint db_len;
+ uint errors;
+
+ db_len = dict_get_db_name_len(db_and_table);
+
+ ut_a(db_len <= sizeof(db));
+
+ memcpy(db, db_and_table, db_len);
+ db[db_len] = '\0';
+
+ strconvert(
+ &my_charset_filename, db,
+ system_charset_info, db_utf8, db_utf8_size,
+ &errors);
+
+ /* convert each # to @0023 in table name and store the result in buf */
+ const char* table = dict_remove_db_name(db_and_table);
+ const char* table_p;
+ char buf[MAX_TABLE_NAME_LEN * 5 + 1];
+ char* buf_p;
+ for (table_p = table, buf_p = buf; table_p[0] != '\0'; table_p++) {
+ if (table_p[0] != '#') {
+ buf_p[0] = table_p[0];
+ buf_p++;
+ } else {
+ buf_p[0] = '@';
+ buf_p[1] = '0';
+ buf_p[2] = '0';
+ buf_p[3] = '2';
+ buf_p[4] = '3';
+ buf_p += 5;
+ }
+ ut_a((size_t) (buf_p - buf) < sizeof(buf));
+ }
+ buf_p[0] = '\0';
+
+ errors = 0;
+ strconvert(
+ &my_charset_filename, buf,
+ system_charset_info, table_utf8, table_utf8_size,
+ &errors);
+
+ if (errors != 0) {
+ ut_snprintf(table_utf8, table_utf8_size, "%s%s",
+ srv_mysql50_table_name_prefix, table);
+ }
+}
+
/**********************************************************************//**
Closes the data dictionary module. */
UNIV_INTERN
@@ -5929,7 +6098,9 @@ dict_close(void)
rw_lock_free(&dict_operation_lock);
memset(&dict_operation_lock, 0x0, sizeof(dict_operation_lock));
- mutex_free(&dict_foreign_err_mutex);
+ if (!srv_read_only_mode) {
+ mutex_free(&dict_foreign_err_mutex);
+ }
mem_free(dict_sys);
dict_sys = NULL;
@@ -5943,7 +6114,7 @@ dict_close(void)
/**********************************************************************//**
Validate the dictionary table LRU list.
@return TRUE if valid */
-UNIV_INTERN
+static
ibool
dict_lru_validate(void)
/*===================*/
@@ -5972,7 +6143,7 @@ dict_lru_validate(void)
/**********************************************************************//**
Check if a table exists in the dict table LRU list.
@return TRUE if table found in LRU list */
-UNIV_INTERN
+static
ibool
dict_lru_find_table(
/*================*/
@@ -6025,4 +6196,279 @@ dict_non_lru_find_table(
return(FALSE);
}
# endif /* UNIV_DEBUG */
+/*********************************************************************//**
+Check an index to see whether its first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
+@return true if the index qualifies, otherwise false */
+UNIV_INTERN
+bool
+dict_foreign_qualify_index(
+/*=======================*/
+ const dict_table_t* table, /*!< in: table */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ const dict_index_t* index, /*!< in: index to check */
+ const dict_index_t* types_idx,
+ /*!< in: NULL or an index
+ whose types the column types
+ must match */
+ ibool check_charsets,
+ /*!< in: whether to check
+ charsets. only has an effect
+ if types_idx != NULL */
+ ulint check_null)
+ /*!< in: nonzero if none of
+ the columns must be declared
+ NOT NULL */
+{
+ ulint i;
+
+ if (dict_index_get_n_fields(index) < n_cols) {
+ return(false);
+ }
+
+ for (i= 0; i < n_cols; i++) {
+ dict_field_t* field;
+ const char* col_name;
+
+ field = dict_index_get_nth_field(index, i);
+
+ col_name = dict_table_get_col_name(
+ table, dict_col_get_no(field->col));
+
+ if (field->prefix_len != 0) {
+ /* We do not accept column prefix
+ indexes here */
+
+ break;
+ }
+
+ if (0 != innobase_strcasecmp(columns[i],
+ col_name)) {
+ break;
+ }
+
+ if (check_null
+ && (field->col->prtype & DATA_NOT_NULL)) {
+
+ break;
+ }
+
+ if (types_idx && !cmp_cols_are_equal(
+ dict_index_get_nth_col(index, i),
+ dict_index_get_nth_col(types_idx,
+ i),
+ check_charsets)) {
+
+ break;
+ }
+ }
+
+ return((i == n_cols) ? true : false);
+}
+
+/*********************************************************************//**
+Update the state of compression failure padding heuristics. This is
+called whenever a compression operation succeeds or fails.
+The caller must be holding info->mutex */
+static
+void
+dict_index_zip_pad_update(
+/*======================*/
+ zip_pad_info_t* info, /*<! in/out: info to be updated */
+ ulint zip_threshold) /*<! in: zip threshold value */
+{
+ ulint total;
+ ulint fail_pct;
+
+ ut_ad(info);
+
+ total = info->success + info->failure;
+
+ ut_ad(total > 0);
+
+ if(zip_threshold == 0) {
+ /* User has just disabled the padding. */
+ return;
+ }
+
+ if (total < ZIP_PAD_ROUND_LEN) {
+ /* We are in middle of a round. Do nothing. */
+ return;
+ }
+
+ /* We are at a 'round' boundary. Reset the values but first
+ calculate fail rate for our heuristic. */
+ fail_pct = (info->failure * 100) / total;
+ info->failure = 0;
+ info->success = 0;
+
+ if (fail_pct > zip_threshold) {
+ /* Compression failures are more then user defined
+ threshold. Increase the pad size to reduce chances of
+ compression failures. */
+ ut_ad(info->pad % ZIP_PAD_INCR == 0);
+
+ /* Only do increment if it won't increase padding
+ beyond max pad size. */
+ if (info->pad + ZIP_PAD_INCR
+ < (UNIV_PAGE_SIZE * zip_pad_max) / 100) {
+#ifdef HAVE_ATOMIC_BUILTINS
+ /* Use atomics even though we have the mutex.
+ This is to ensure that we are able to read
+ info->pad atomically where atomics are
+ supported. */
+ os_atomic_increment_ulint(&info->pad, ZIP_PAD_INCR);
+#else /* HAVE_ATOMIC_BUILTINS */
+ info->pad += ZIP_PAD_INCR;
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+ MONITOR_INC(MONITOR_PAD_INCREMENTS);
+ }
+
+ info->n_rounds = 0;
+
+ } else {
+ /* Failure rate was OK. Another successful round
+ completed. */
+ ++info->n_rounds;
+
+ /* If enough successful rounds are completed with
+ compression failure rate in control, decrease the
+ padding. */
+ if (info->n_rounds >= ZIP_PAD_SUCCESSFUL_ROUND_LIMIT
+ && info->pad > 0) {
+
+ ut_ad(info->pad % ZIP_PAD_INCR == 0);
+#ifdef HAVE_ATOMIC_BUILTINS
+ /* Use atomics even though we have the mutex.
+ This is to ensure that we are able to read
+ info->pad atomically where atomics are
+ supported. */
+ os_atomic_decrement_ulint(&info->pad, ZIP_PAD_INCR);
+#else /* HAVE_ATOMIC_BUILTINS */
+ info->pad -= ZIP_PAD_INCR;
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+ info->n_rounds = 0;
+
+ MONITOR_INC(MONITOR_PAD_DECREMENTS);
+ }
+ }
+}
+
+/*********************************************************************//**
+This function should be called whenever a page is successfully
+compressed. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_success(
+/*===================*/
+ dict_index_t* index) /*!< in/out: index to be updated. */
+{
+ ut_ad(index);
+
+ ulint zip_threshold = zip_failure_threshold_pct;
+ if (!zip_threshold) {
+ /* Disabled by user. */
+ return;
+ }
+
+ os_fast_mutex_lock(&index->zip_pad.mutex);
+ ++index->zip_pad.success;
+ dict_index_zip_pad_update(&index->zip_pad, zip_threshold);
+ os_fast_mutex_unlock(&index->zip_pad.mutex);
+}
+
+/*********************************************************************//**
+This function should be called whenever a page compression attempt
+fails. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_failure(
+/*===================*/
+ dict_index_t* index) /*!< in/out: index to be updated. */
+{
+ ut_ad(index);
+
+ ulint zip_threshold = zip_failure_threshold_pct;
+ if (!zip_threshold) {
+ /* Disabled by user. */
+ return;
+ }
+
+ os_fast_mutex_lock(&index->zip_pad.mutex);
+ ++index->zip_pad.failure;
+ dict_index_zip_pad_update(&index->zip_pad, zip_threshold);
+ os_fast_mutex_unlock(&index->zip_pad.mutex);
+}
+
+
+/*********************************************************************//**
+Return the optimal page size, for which page will likely compress.
+@return page size beyond which page might not compress */
+UNIV_INTERN
+ulint
+dict_index_zip_pad_optimal_page_size(
+/*=================================*/
+ dict_index_t* index) /*!< in: index for which page size
+ is requested */
+{
+ ulint pad;
+ ulint min_sz;
+ ulint sz;
+
+ ut_ad(index);
+
+ if (!zip_failure_threshold_pct) {
+ /* Disabled by user. */
+ return(UNIV_PAGE_SIZE);
+ }
+
+ /* We use atomics to read index->zip_pad.pad. Here we use zero
+ as increment as are not changing the value of the 'pad'. On
+ platforms where atomics are not available we grab the mutex. */
+
+#ifdef HAVE_ATOMIC_BUILTINS
+ pad = os_atomic_increment_ulint(&index->zip_pad.pad, 0);
+#else /* HAVE_ATOMIC_BUILTINS */
+ os_fast_mutex_lock(&index->zip_pad.mutex);
+ pad = index->zip_pad.pad;
+ os_fast_mutex_unlock(&index->zip_pad.mutex);
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+ ut_ad(pad < UNIV_PAGE_SIZE);
+ sz = UNIV_PAGE_SIZE - pad;
+
+ /* Min size allowed by user. */
+ ut_ad(zip_pad_max < 100);
+ min_sz = (UNIV_PAGE_SIZE * (100 - zip_pad_max)) / 100;
+
+ return(ut_max(sz, min_sz));
+}
+
+/*************************************************************//**
+Convert table flag to row format string.
+@return row format name. */
+UNIV_INTERN
+const char*
+dict_tf_to_row_format_string(
+/*=========================*/
+ ulint table_flag) /*!< in: row format setting */
+{
+ switch (dict_tf_get_rec_format(table_flag)) {
+ case REC_FORMAT_REDUNDANT:
+ return("ROW_TYPE_REDUNDANT");
+ case REC_FORMAT_COMPACT:
+ return("ROW_TYPE_COMPACT");
+ case REC_FORMAT_COMPRESSED:
+ return("ROW_TYPE_COMPRESSED");
+ case REC_FORMAT_DYNAMIC:
+ return("ROW_TYPE_DYNAMIC");
+ }
+
+ ut_error;
+ return(0);
+}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index ff93be3e76a..46d72786ac6 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -41,18 +41,22 @@ Created 4/24/1996 Heikki Tuuri
#include "rem0cmp.h"
#include "srv0start.h"
#include "srv0srv.h"
+#include "dict0crea.h"
#include "dict0priv.h"
#include "ha_prototypes.h" /* innobase_casedn_str() */
#include "fts0priv.h"
-/** Following are six InnoDB system tables */
+/** Following are the InnoDB system tables. The positions in
+this array are referenced by enum dict_system_table_id. */
static const char* SYSTEM_TABLE_NAME[] = {
"SYS_TABLES",
"SYS_INDEXES",
"SYS_COLUMNS",
"SYS_FIELDS",
"SYS_FOREIGN",
- "SYS_FOREIGN_COLS"
+ "SYS_FOREIGN_COLS",
+ "SYS_TABLESPACES",
+ "SYS_DATAFILES"
};
/* If this flag is TRUE, then we will load the cluster index's (and tables')
@@ -183,7 +187,8 @@ dict_print(void)
os_increment_counter_by_amount(
server_mutex,
- srv_fatal_semaphore_wait_threshold, 7200/*2 hours*/);
+ srv_fatal_semaphore_wait_threshold,
+ SRV_SEMAPHORE_WAIT_EXTENSION);
heap = mem_heap_create(1000);
mutex_enter(&(dict_sys->mutex));
@@ -196,13 +201,11 @@ dict_print(void)
err_msg = static_cast<const char*>(
dict_process_sys_tables_rec_and_mtr_commit(
- heap, rec, &table,
- static_cast<dict_table_info_t>(
- DICT_TABLE_LOAD_FROM_CACHE
- | DICT_TABLE_UPDATE_STATS), &mtr));
+ heap, rec, &table, DICT_TABLE_LOAD_FROM_CACHE,
+ &mtr));
if (!err_msg) {
- dict_table_print_low(table);
+ dict_table_print(table);
} else {
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: %s\n", err_msg);
@@ -221,7 +224,8 @@ dict_print(void)
/* Restore the fatal semaphore wait timeout */
os_decrement_counter_by_amount(
server_mutex,
- srv_fatal_semaphore_wait_threshold, 7200/*2 hours*/);
+ srv_fatal_semaphore_wait_threshold,
+ SRV_SEMAPHORE_WAIT_EXTENSION);
}
/********************************************************************//**
@@ -278,8 +282,8 @@ dict_startscan_system(
clust_index = UT_LIST_GET_FIRST(system_table->indexes);
- btr_pcur_open_at_index_side(TRUE, clust_index, BTR_SEARCH_LEAF, pcur,
- TRUE, mtr);
+ btr_pcur_open_at_index_side(true, clust_index, BTR_SEARCH_LEAF, pcur,
+ true, 0, mtr);
rec = dict_getnext_system_low(pcur, mtr);
@@ -307,6 +311,7 @@ dict_getnext_system(
return(rec);
}
+
/********************************************************************//**
This function processes one SYS_TABLES record and populate the dict_table_t
struct for the table. Extracted out of dict_print() to be used by
@@ -362,15 +367,6 @@ dict_process_sys_tables_rec_and_mtr_commit(
return(err_msg);
}
- if ((status & DICT_TABLE_UPDATE_STATS)
- && dict_table_get_first_index(*table)) {
-
- /* Update statistics member fields in *table if
- DICT_TABLE_UPDATE_STATS is set */
- ut_ad(mutex_own(&dict_sys->mutex));
- dict_stats_update(*table, DICT_STATS_FETCH, TRUE);
- }
-
return(NULL);
}
@@ -401,6 +397,7 @@ dict_process_sys_indexes_rec(
return(err_msg);
}
+
/********************************************************************//**
This function parses a SYS_COLUMNS record and populate a dict_column_t
structure with the information from the record.
@@ -423,6 +420,7 @@ dict_process_sys_columns_rec(
return(err_msg);
}
+
/********************************************************************//**
This function parses a SYS_FIELDS record and populates a dict_field_t
structure with the information from the record.
@@ -475,7 +473,7 @@ dict_process_sys_foreign_rec(
const byte* field;
ulint n_fields_and_type;
- if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, 0))) {
+ if (rec_get_deleted_flag(rec, 0)) {
return("delete-marked record in SYS_FOREIGN");
}
@@ -485,7 +483,7 @@ dict_process_sys_foreign_rec(
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FOREIGN__ID, &len);
- if (UNIV_UNLIKELY(len < 1 || len == UNIV_SQL_NULL)) {
+ if (len == 0 || len == UNIV_SQL_NULL) {
err_len:
return("incorrect column length in SYS_FOREIGN");
}
@@ -512,7 +510,7 @@ err_len:
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len);
- if (len < 1 || len == UNIV_SQL_NULL) {
+ if (len == 0 || len == UNIV_SQL_NULL) {
goto err_len;
}
foreign->foreign_table_name = mem_heap_strdupl(
@@ -520,7 +518,7 @@ err_len:
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len);
- if (len < 1 || len == UNIV_SQL_NULL) {
+ if (len == 0 || len == UNIV_SQL_NULL) {
goto err_len;
}
foreign->referenced_table_name = mem_heap_strdupl(
@@ -568,7 +566,7 @@ dict_process_sys_foreign_col_rec(
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len);
- if (len < 1 || len == UNIV_SQL_NULL) {
+ if (len == 0 || len == UNIV_SQL_NULL) {
err_len:
return("incorrect column length in SYS_FOREIGN_COLS");
}
@@ -594,14 +592,14 @@ err_len:
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, &len);
- if (len < 1 || len == UNIV_SQL_NULL) {
+ if (len == 0 || len == UNIV_SQL_NULL) {
goto err_len;
}
*for_col_name = mem_heap_strdupl(heap, (char*) field, len);
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, &len);
- if (len < 1 || len == UNIV_SQL_NULL) {
+ if (len == 0 || len == UNIV_SQL_NULL) {
goto err_len;
}
*ref_col_name = mem_heap_strdupl(heap, (char*) field, len);
@@ -610,6 +608,127 @@ err_len:
}
/********************************************************************//**
+This function parses a SYS_TABLESPACES record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_tablespaces(
+/*=========================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_TABLESPACES rec */
+ ulint* space, /*!< out: space id */
+ const char** name, /*!< out: tablespace name */
+ ulint* flags) /*!< out: tablespace flags */
+{
+ ulint len;
+ const byte* field;
+
+ /* Initialize the output values */
+ *space = ULINT_UNDEFINED;
+ *name = NULL;
+ *flags = ULINT_UNDEFINED;
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return("delete-marked record in SYS_TABLESPACES");
+ }
+
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLESPACES) {
+ return("wrong number of columns in SYS_TABLESPACES record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len);
+ if (len != DICT_FLD_LEN_SPACE) {
+err_len:
+ return("incorrect column length in SYS_TABLESPACES");
+ }
+ *space = mach_read_from_4(field);
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLESPACES__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__NAME, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ *name = mem_heap_strdupl(heap, (char*) field, len);
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLESPACES__FLAGS, &len);
+ if (len != DICT_FLD_LEN_FLAGS) {
+ goto err_len;
+ }
+ *flags = mach_read_from_4(field);
+
+ return(NULL);
+}
+
+/********************************************************************//**
+This function parses a SYS_DATAFILES record, extracts necessary
+information from the record and returns it to the caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_datafiles(
+/*=======================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_DATAFILES rec */
+ ulint* space, /*!< out: space id */
+ const char** path) /*!< out: datafile paths */
+{
+ ulint len;
+ const byte* field;
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ return("delete-marked record in SYS_DATAFILES");
+ }
+
+ if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_DATAFILES) {
+ return("wrong number of columns in SYS_DATAFILES record");
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_DATAFILES__SPACE, &len);
+ if (len != DICT_FLD_LEN_SPACE) {
+err_len:
+ return("incorrect column length in SYS_DATAFILES");
+ }
+ *space = mach_read_from_4(field);
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_DATAFILES__DB_TRX_ID, &len);
+ if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ rec_get_nth_field_offs_old(
+ rec, DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR, &len);
+ if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
+ goto err_len;
+ }
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
+ if (len == 0 || len == UNIV_SQL_NULL) {
+ goto err_len;
+ }
+ *path = mem_heap_strdupl(heap, (char*) field, len);
+
+ return(NULL);
+}
+
+/********************************************************************//**
Determine the flags of a table as stored in SYS_TABLES.TYPE and N_COLS.
@return ULINT_UNDEFINED if error, else a valid dict_table_t::flags. */
static
@@ -629,11 +748,9 @@ dict_sys_tables_get_flags(
ut_a(len == 4);
type = mach_read_from_4(field);
- /* The low order bit of SYS_TABLES.TYPE is always set to 1. If no
- other bits are used, that is defined as SYS_TABLE_TYPE_ANTELOPE.
- But in dict_table_t::flags the low order bit is used to determine
- if the row format is Redundant or Compact when the format is
- Antelope.
+ /* The low order bit of SYS_TABLES.TYPE is always set to 1. But in
+ dict_table_t::flags the low order bit is used to determine if the
+ row format is Redundant or Compact when the format is Antelope.
Read the 4 byte N_COLS field and look at the high order bit. It
should be set for COMPACT and later. It should not be set for
REDUNDANT. */
@@ -645,10 +762,193 @@ dict_sys_tables_get_flags(
/* This validation function also combines the DICT_N_COLS_COMPACT
flag in n_cols into the type field to effectively make it a
dict_table_t::flags. */
- return(dict_sys_tables_type_validate(type, n_cols));
+
+ if (ULINT_UNDEFINED == dict_sys_tables_type_validate(type, n_cols)) {
+ return(ULINT_UNDEFINED);
+ }
+
+ return(dict_sys_tables_type_to_tf(type, n_cols));
}
/********************************************************************//**
+Gets the filepath for a spaceid from SYS_DATAFILES and checks it against
+the contents of a link file. This function is called when there is no
+fil_node_t entry for this space ID so both durable locations on disk
+must be checked and compared.
+We use a temporary heap here for the table lookup, but not for the path
+returned which the caller must free.
+This function can return NULL if the space ID is not found in SYS_DATAFILES,
+then the caller will assume that the ibd file is in the normal datadir.
+@return own: A copy of the first datafile found in SYS_DATAFILES.PATH for
+the given space ID. NULL if space ID is zero or not found. */
+UNIV_INTERN
+char*
+dict_get_first_path(
+/*================*/
+ ulint space, /*!< in: space id */
+ const char* name) /*!< in: tablespace name */
+{
+ mtr_t mtr;
+ dict_table_t* sys_datafiles;
+ dict_index_t* sys_index;
+ dtuple_t* tuple;
+ dfield_t* dfield;
+ byte* buf;
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ char* dict_filepath = NULL;
+ mem_heap_t* heap = mem_heap_create(1024);
+
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ mtr_start(&mtr);
+
+ sys_datafiles = dict_table_get_low("SYS_DATAFILES");
+ sys_index = UT_LIST_GET_FIRST(sys_datafiles->indexes);
+ ut_ad(!dict_table_is_comp(sys_datafiles));
+ ut_ad(name_of_col_is(sys_datafiles, sys_index,
+ DICT_FLD__SYS_DATAFILES__SPACE, "SPACE"));
+ ut_ad(name_of_col_is(sys_datafiles, sys_index,
+ DICT_FLD__SYS_DATAFILES__PATH, "PATH"));
+
+ tuple = dtuple_create(heap, 1);
+ dfield = dtuple_get_nth_field(tuple, DICT_FLD__SYS_DATAFILES__SPACE);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+ mach_write_to_4(buf, space);
+
+ dfield_set_data(dfield, buf, 4);
+ dict_index_copy_types(tuple, sys_index, 1);
+
+ btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ /* If the file-per-table tablespace was created with
+ an earlier version of InnoDB, then this record is not
+ in SYS_DATAFILES. But a link file still might exist. */
+
+ if (btr_pcur_is_on_user_rec(&pcur)) {
+ /* A record for this space ID was found. */
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
+ ut_a(len > 0 || len == UNIV_SQL_NULL);
+ ut_a(len < OS_FILE_MAX_PATH);
+ dict_filepath = mem_strdupl((char*) field, len);
+ ut_a(dict_filepath);
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+ mem_heap_free(heap);
+
+ return(dict_filepath);
+}
+
+/********************************************************************//**
+Update the record for space_id in SYS_TABLESPACES to this filepath.
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_update_filepath(
+/*=================*/
+ ulint space_id, /*!< in: space id */
+ const char* filepath) /*!< in: filepath */
+{
+ dberr_t err = DB_SUCCESS;
+ trx_t* trx;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ trx = trx_allocate_for_background();
+ trx->op_info = "update filepath";
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+ pars_info_t* info = pars_info_create();
+
+ pars_info_add_int4_literal(info, "space", space_id);
+ pars_info_add_str_literal(info, "path", filepath);
+
+ err = que_eval_sql(info,
+ "PROCEDURE UPDATE_FILEPATH () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_DATAFILES"
+ " SET PATH = :path\n"
+ " WHERE SPACE = :space;\n"
+ "END;\n", FALSE, trx);
+
+ trx_commit_for_mysql(trx);
+ trx->dict_operation_lock_mode = 0;
+ trx_free_for_background(trx);
+
+ if (err == DB_SUCCESS) {
+ /* We just updated SYS_DATAFILES due to the contents in
+ a link file. Make a note that we did this. */
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "The InnoDB data dictionary table SYS_DATAFILES "
+ "for tablespace ID %lu was updated to use file %s.",
+ (ulong) space_id, filepath);
+ } else {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Problem updating InnoDB data dictionary table "
+ "SYS_DATAFILES for tablespace ID %lu to file %s.",
+ (ulong) space_id, filepath);
+ }
+
+ return(err);
+}
+
+/********************************************************************//**
+Insert records into SYS_TABLESPACES and SYS_DATAFILES.
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_insert_tablespace_and_filepath(
+/*================================*/
+ ulint space, /*!< in: space id */
+ const char* name, /*!< in: talespace name */
+ const char* filepath, /*!< in: filepath */
+ ulint fsp_flags) /*!< in: tablespace flags */
+{
+ dberr_t err = DB_SUCCESS;
+ trx_t* trx;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_ad(filepath);
+
+ trx = trx_allocate_for_background();
+ trx->op_info = "insert tablespace and filepath";
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+ /* A record for this space ID was not found in
+ SYS_DATAFILES. Assume the record is also missing in
+ SYS_TABLESPACES. Insert records onto them both. */
+ err = dict_create_add_tablespace_to_dictionary(
+ space, name, fsp_flags, filepath, trx, false);
+
+ trx_commit_for_mysql(trx);
+ trx->dict_operation_lock_mode = 0;
+ trx_free_for_background(trx);
+
+ return(err);
+}
+
+/********************************************************************//**
+This function looks at each table defined in SYS_TABLES. It checks the
+tablespace for any table with a space_id > 0. It looks up the tablespace
+in SYS_DATAFILES to ensure the correct path.
+
In a crash recovery we already have all the tablespace objects created.
This function compares the space id information in the InnoDB data dictionary
to what we already read with fil_load_single_table_tablespaces().
@@ -669,6 +969,7 @@ dict_check_tablespaces_and_store_max_id(
ulint max_space_id;
mtr_t mtr;
+ rw_lock_x_lock(&dict_operation_lock);
mutex_enter(&(dict_sys->mutex));
mtr_start(&mtr);
@@ -682,8 +983,8 @@ dict_check_tablespaces_and_store_max_id(
MLOG_4BYTES, &mtr);
fil_set_max_space_id_if_bigger(max_space_id);
- btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur,
- TRUE, &mtr);
+ btr_pcur_open_at_index_side(true, sys_index, BTR_SEARCH_LEAF, &pcur,
+ true, 0, &mtr);
loop:
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
@@ -703,6 +1004,7 @@ loop:
fil_set_max_space_id_if_bigger(max_space_id);
mutex_exit(&(dict_sys->mutex));
+ rw_lock_x_unlock(&dict_operation_lock);
return;
}
@@ -718,8 +1020,14 @@ loop:
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_TABLES__NAME, &len);
+
name = mem_strdupl((char*) field, len);
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name), name, FALSE);
+
flags = dict_sys_tables_get_flags(rec);
if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
/* Read again the 4 bytes from rec. */
@@ -728,13 +1036,9 @@ loop:
ut_ad(len == 4); /* this was checked earlier */
flags = mach_read_from_4(field);
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary"
- " has unknown type %lx.\n",
- (ulong) flags);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Table '%s' in InnoDB data dictionary"
+ " has unknown type %lx", table_name, flags);
goto loop;
}
@@ -749,43 +1053,84 @@ loop:
mtr_commit(&mtr);
+ /* For tables created with old versions of InnoDB,
+ SYS_TABLES.MIX_LEN may contain garbage. Such tables
+ would always be in ROW_FORMAT=REDUNDANT. Pretend that
+ all such tables are non-temporary. That is, do not
+ suppress error printouts about temporary or discarded
+ tablespaces not being found. */
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
+
+ bool is_temp = false;
+ bool discarded = false;
+ ib_uint32_t flags2 = mach_read_from_4(field);
+
+ /* Check that the tablespace (the .ibd file) really
+ exists; print a warning to the .err log if not.
+ Do not print warnings for temporary tables or for
+ tablespaces that have been discarded. */
+
+ field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
+
+ /* MIX_LEN valid only for ROW_FORMAT > REDUNDANT. */
+ if (mach_read_from_4(field) & DICT_N_COLS_COMPACT) {
+
+ is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
+ discarded = !!(flags2 & DICT_TF2_DISCARDED);
+ }
+
if (space_id == 0) {
/* The system tablespace always exists. */
+ ut_ad(!discarded);
} else if (in_crash_recovery) {
- /* Check that the tablespace (the .ibd file) really
- exists; print a warning to the .err log if not.
- Do not print warnings for temporary tables. */
- ibool is_temp;
+ /* All tablespaces should have been found in
+ fil_load_single_table_tablespaces(). */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
- if (mach_read_from_4(field) & DICT_N_COLS_COMPACT) {
- /* ROW_FORMAT=COMPACT: read the is_temp
- flag from SYS_TABLES.MIX_LEN. */
- field = rec_get_nth_field_old(
- rec, 7/*MIX_LEN*/, &len);
- is_temp = !!(mach_read_from_4(field)
- & DICT_TF2_TEMPORARY);
- } else {
- /* For tables created with old versions
- of InnoDB, SYS_TABLES.MIX_LEN may contain
- garbage. Such tables would always be
- in ROW_FORMAT=REDUNDANT. Pretend that
- all such tables are non-temporary. That is,
- do not suppress error printouts about
- temporary tables not being found. */
- is_temp = FALSE;
+ fil_space_for_table_exists_in_mem(
+ space_id, name, TRUE, !(is_temp || discarded),
+ false, NULL, 0);
+
+ } else if (!discarded) {
+
+ /* It is a normal database startup: create the
+ space object and check that the .ibd file exists.
+ If the table uses a remote tablespace, look for the
+ space_id in SYS_DATAFILES to find the filepath */
+
+ /* Use the remote filepath if known. */
+ char* filepath = NULL;
+ if (DICT_TF_HAS_DATA_DIR(flags)) {
+ filepath = dict_get_first_path(
+ space_id, name);
}
- fil_space_for_table_exists_in_mem(
- space_id, name, TRUE, !is_temp);
- } else {
- /* It is a normal database startup: create the space
- object and check that the .ibd file exists. */
+ /* We set the 2nd param (fix_dict = true)
+ here because we already have an x-lock on
+ dict_operation_lock and dict_sys->mutex. Besides,
+ this is at startup and we are now single threaded.
+ If the filepath is not known, it will need to
+ be discovered. */
+ dberr_t err = fil_open_single_table_tablespace(
+ false, srv_read_only_mode ? false : true,
+ space_id, dict_tf_to_fsp_flags(flags),
+ name, filepath);
+
+ if (err != DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Tablespace open failed for '%s', "
+ "ignored.", table_name);
+ }
- fil_open_single_table_tablespace(
- FALSE, space_id,
- dict_tf_to_fsp_flags(flags), name);
+ if (filepath) {
+ mem_free(filepath);
+ }
+ } else {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "DISCARD flag set for table '%s', ignored.",
+ table_name);
}
mem_free(name);
@@ -879,7 +1224,7 @@ err_len:
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_COLUMNS__NAME, &len);
- if (len < 1 || len == UNIV_SQL_NULL) {
+ if (len == 0 || len == UNIV_SQL_NULL) {
goto err_len;
}
@@ -1003,6 +1348,11 @@ dict_load_columns(
err_msg = dict_load_column_low(table, heap, NULL, NULL,
&name, rec);
+ if (err_msg) {
+ fprintf(stderr, "InnoDB: %s\n", err_msg);
+ ut_error;
+ }
+
/* Note: Currently we have one DOC_ID column that is
shared by all FTS indexes on a table. */
if (innobase_strcasecmp(name,
@@ -1037,11 +1387,6 @@ dict_load_columns(
table->fts->doc_col = i;
}
- if (err_msg) {
- fprintf(stderr, "InnoDB: %s\n", err_msg);
- ut_error;
- }
-
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
}
@@ -1154,7 +1499,7 @@ err_len:
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FIELDS__COL_NAME, &len);
- if (len < 1 || len == UNIV_SQL_NULL) {
+ if (len == 0 || len == UNIV_SQL_NULL) {
goto err_len;
}
@@ -1194,7 +1539,7 @@ dict_load_fields(
byte* buf;
ulint i;
mtr_t mtr;
- ulint error;
+ dberr_t error;
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -1394,8 +1739,8 @@ Loads definitions for table indexes. Adds them to the data dictionary
cache.
@return DB_SUCCESS if ok, DB_CORRUPTION if corruption of dictionary
table or DB_UNSUPPORTED if table has unknown index type */
-static
-ulint
+static __attribute__((nonnull))
+dberr_t
dict_load_indexes(
/*==============*/
dict_table_t* table, /*!< in/out: table */
@@ -1412,7 +1757,7 @@ dict_load_indexes(
const rec_t* rec;
byte* buf;
mtr_t mtr;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -1443,6 +1788,21 @@ dict_load_indexes(
if (!btr_pcur_is_on_user_rec(&pcur)) {
+ /* We should allow the table to open even
+ without index when DICT_ERR_IGNORE_CORRUPT is set.
+ DICT_ERR_IGNORE_CORRUPT is currently only set
+ for drop table */
+ if (dict_table_get_first_index(table) == NULL
+ && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Cannot load table %s "
+ "because it has no indexes in "
+ "InnoDB internal data dictionary.",
+ table->name);
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
+
break;
}
@@ -1456,6 +1816,20 @@ dict_load_indexes(
if (err_msg == dict_load_index_id_err) {
/* TABLE_ID mismatch means that we have
run out of index definitions for the table. */
+
+ if (dict_table_get_first_index(table) == NULL
+ && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Failed to load the "
+ "clustered index for table %s "
+ "because of the following error: %s. "
+ "Refusing to load the rest of the "
+ "indexes (if any) and the whole table "
+ "altogether.", table->name, err_msg);
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
+
break;
} else if (err_msg == dict_load_index_del) {
/* Skip delete-marked records. */
@@ -1510,15 +1884,15 @@ dict_load_indexes(
subsequent checks are relevant for the supported types. */
if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE
| DICT_CORRUPT | DICT_FTS)) {
- fprintf(stderr,
- "InnoDB: Error: unknown type %lu"
- " of index %s of table %s\n",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unknown type %lu of index %s of table %s",
(ulong) index->type, index->name, table->name);
error = DB_UNSUPPORTED;
dict_mem_index_free(index);
goto func_exit;
} else if (index->page == FIL_NULL
+ && !table->ibd_file_missing
&& (!(index->type & DICT_FTS))) {
fprintf(stderr,
@@ -1560,7 +1934,7 @@ corrupted:
" is not clustered!\n", stderr);
goto corrupted;
- } else if (table->id < DICT_HDR_FIRST_ID
+ } else if (dict_is_sys_table(table->id)
&& (dict_index_is_clust(index)
|| ((table == dict_sys->sys_tables)
&& !strcmp("ID_IND", index->name)))) {
@@ -1570,8 +1944,10 @@ corrupted:
dict_mem_index_free(index);
} else {
dict_load_fields(index, heap);
- error = dict_index_add_to_cache(table, index,
- index->page, FALSE);
+
+ error = dict_index_add_to_cache(
+ table, index, index->page, FALSE);
+
/* The data dictionary tables should never contain
invalid index definitions. If we ignored this error
and simply did not load this index definition, the
@@ -1629,7 +2005,7 @@ dict_load_table_low(
rec_get_nth_field_offs_old(
rec, DICT_FLD__SYS_TABLES__NAME, &len);
- if (len < 1 || len == UNIV_SQL_NULL) {
+ if (len == 0 || len == UNIV_SQL_NULL) {
err_len:
return("incorrect column length in SYS_TABLES");
}
@@ -1751,6 +2127,77 @@ err_len:
}
/********************************************************************//**
+Using the table->heap, copy the null-terminated filepath into
+table->data_dir_path and replace the 'databasename/tablename.ibd'
+portion with 'tablename'.
+This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
+Make this data directory path only if it has not yet been saved. */
+UNIV_INTERN
+void
+dict_save_data_dir_path(
+/*====================*/
+ dict_table_t* table, /*!< in/out: table */
+ char* filepath) /*!< in: filepath of tablespace */
+{
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+ ut_a(DICT_TF_HAS_DATA_DIR(table->flags));
+
+ ut_a(!table->data_dir_path);
+ ut_a(filepath);
+
+ /* Be sure this filepath is not the default filepath. */
+ char* default_filepath = fil_make_ibd_name(table->name, false);
+ if (strcmp(filepath, default_filepath)) {
+ ulint pathlen = strlen(filepath);
+ ut_a(pathlen < OS_FILE_MAX_PATH);
+ ut_a(0 == strcmp(filepath + pathlen - 4, ".ibd"));
+
+ table->data_dir_path = mem_heap_strdup(table->heap, filepath);
+ os_file_make_data_dir_path(table->data_dir_path);
+ } else {
+ /* This does not change SYS_DATAFILES or SYS_TABLES
+ or FSP_FLAGS on the header page of the tablespace,
+ but it makes dict_table_t consistent */
+ table->flags &= ~DICT_TF_MASK_DATA_DIR;
+ }
+ mem_free(default_filepath);
+}
+
+/*****************************************************************//**
+Make sure the data_file_name is saved in dict_table_t if needed. Try to
+read it from the file dictionary first, then from SYS_DATAFILES. */
+UNIV_INTERN
+void
+dict_get_and_save_data_dir_path(
+/*============================*/
+ dict_table_t* table, /*!< in/out: table */
+ bool dict_mutex_own) /*!< in: true if dict_sys->mutex
+ is owned already */
+{
+ if (DICT_TF_HAS_DATA_DIR(table->flags)
+ && (!table->data_dir_path)) {
+ char* path = fil_space_get_first_path(table->space);
+
+ if (!dict_mutex_own) {
+ dict_mutex_enter_for_mysql();
+ }
+ if (!path) {
+ path = dict_get_first_path(
+ table->space, table->name);
+ }
+
+ if (path) {
+ dict_save_data_dir_path(table, path);
+ mem_free(path);
+ }
+
+ if (!dict_mutex_own) {
+ dict_mutex_exit_for_mysql();
+ }
+ }
+}
+
+/********************************************************************//**
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
all foreign key constraints where the foreign key is in the table or where
@@ -1770,6 +2217,7 @@ dict_load_table(
/*!< in: error to be ignored when loading
table and its indexes' definition */
{
+ dberr_t err;
dict_table_t* table;
dict_table_t* sys_tables;
btr_pcur_t pcur;
@@ -1780,7 +2228,7 @@ dict_load_table(
const rec_t* rec;
const byte* field;
ulint len;
- ulint err;
+ char* filepath = NULL;
const char* err_msg;
mtr_t mtr;
@@ -1843,39 +2291,71 @@ err_exit:
goto err_exit;
}
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(table_name, sizeof(table_name), name, FALSE);
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
if (table->space == 0) {
/* The system tablespace is always available. */
+ } else if (table->flags2 & DICT_TF2_DISCARDED) {
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Table '%s' tablespace is set as discarded.",
+ table_name);
+
+ table->ibd_file_missing = TRUE;
+
} else if (!fil_space_for_table_exists_in_mem(
- table->space, name, FALSE, FALSE)) {
+ table->space, name, FALSE, FALSE, true, heap,
+ table->id)) {
- if (table->flags2 & DICT_TF2_TEMPORARY) {
+ if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
/* Do not bother to retry opening temporary tables. */
table->ibd_file_missing = TRUE;
+
} else {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: error: space object of table ");
- ut_print_filename(stderr, name);
- fprintf(stderr, ",\n"
- "InnoDB: space id %lu did not exist in memory."
- " Retrying an open.\n",
- (ulong) table->space);
- /* Try to open the tablespace */
- if (!fil_open_single_table_tablespace(
- TRUE, table->space,
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Failed to find tablespace for table '%s' "
+ "in the cache. Attempting to load the "
+ "tablespace with space id %lu.",
+ table_name, (ulong) table->space);
+
+ /* Use the remote filepath if needed. */
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ /* This needs to be added to the table
+ from SYS_DATAFILES */
+ dict_get_and_save_data_dir_path(table, true);
+
+ if (table->data_dir_path) {
+ filepath = os_file_make_remote_pathname(
+ table->data_dir_path,
+ table->name, "ibd");
+ }
+ }
+
+ /* Try to open the tablespace. We set the
+ 2nd param (fix_dict = false) here because we
+ do not have an x-lock on dict_operation_lock */
+ err = fil_open_single_table_tablespace(
+ true, false, table->space,
dict_tf_to_fsp_flags(table->flags),
- name)) {
+ name, filepath);
+
+ if (err != DB_SUCCESS) {
/* We failed to find a sensible
tablespace file */
table->ibd_file_missing = TRUE;
}
+ if (filepath) {
+ mem_free(filepath);
+ }
}
}
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
dict_load_columns(table, heap);
if (cached) {
@@ -1886,7 +2366,15 @@ err_exit:
mem_heap_empty(heap);
- err = dict_load_indexes(table, heap, ignore_err);
+ /* If there is no tablespace for the table then we only need to
+ load the index definitions. So that we can IMPORT the tablespace
+ later. */
+ if (table->ibd_file_missing) {
+ err = dict_load_indexes(
+ table, heap, DICT_ERR_IGNORE_ALL);
+ } else {
+ err = dict_load_indexes(table, heap, ignore_err);
+ }
if (err == DB_INDEX_CORRUPT) {
/* Refuse to load the table if the table has a corrupted
@@ -1920,7 +2408,8 @@ err_exit:
of the error condition, since the user may want to dump data from the
clustered index. However we load the foreign key information only if
all indexes were loaded. */
- if (!cached) {
+ if (!cached || table->ibd_file_missing) {
+ /* Don't attempt to load the indexes from disk. */
} else if (err == DB_SUCCESS) {
err = dict_load_foreigns(table->name, TRUE, TRUE);
@@ -1937,11 +2426,15 @@ err_exit:
Otherwise refuse to load the table */
index = dict_table_get_first_index(table);
- if (!srv_force_recovery || !index
+ if (!srv_force_recovery
+ || !index
|| !dict_index_is_clust(index)) {
+
dict_table_remove_from_cache(table);
table = NULL;
- } else if (dict_index_is_corrupted(index)) {
+
+ } else if (dict_index_is_corrupted(index)
+ && !table->ibd_file_missing) {
/* It is possible we force to load a corrupted
clustered index if srv_load_corrupted is set.
@@ -1949,36 +2442,28 @@ err_exit:
table->corrupted = TRUE;
}
}
-#if 0
- if (err != DB_SUCCESS && table != NULL) {
- mutex_enter(&dict_foreign_err_mutex);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: could not make a foreign key"
- " definition to match\n"
- "InnoDB: the foreign key table"
- " or the referenced table!\n"
- "InnoDB: The data dictionary of InnoDB is corrupt."
- " You may need to drop\n"
- "InnoDB: and recreate the foreign key table"
- " or the referenced table.\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n"
- "InnoDB: Latest foreign key error printout:\n%s\n",
- dict_foreign_err_buf);
-
- mutex_exit(&dict_foreign_err_mutex);
- }
-#endif /* 0 */
func_exit:
mem_heap_free(heap);
- ut_ad(!table || ignore_err != DICT_ERR_IGNORE_NONE
+ ut_ad(!table
+ || ignore_err != DICT_ERR_IGNORE_NONE
+ || table->ibd_file_missing
|| !table->corrupted);
+ if (table && table->fts) {
+ if (!(dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID))) {
+ /* the table->fts could be created in dict_load_column
+ when a user defined FTS_DOC_ID is present, but no
+ FTS */
+ fts_free(table);
+ } else {
+ fts_optimize_add_table(table);
+ }
+ }
+
return(table);
}
@@ -2019,6 +2504,7 @@ dict_load_table_on_id(
sys_table_ids = dict_table_get_next_index(
dict_table_get_first_index(sys_tables));
ut_ad(!dict_table_is_comp(sys_tables));
+ ut_ad(!dict_index_is_clust(sys_table_ids));
heap = mem_heap_create(256);
tuple = dtuple_create(heap, 1);
@@ -2099,15 +2585,20 @@ dict_load_sys_table(
}
/********************************************************************//**
-Loads foreign key constraint col names (also for the referenced table). */
+Loads foreign key constraint col names (also for the referenced table).
+Members that must be set (and valid) in foreign:
+foreign->heap
+foreign->n_fields
+foreign->id ('\0'-terminated)
+Members that will be created and set by this function:
+foreign->foreign_col_names[i]
+foreign->referenced_col_names[i]
+(for i=0..foreign->n_fields-1) */
static
void
dict_load_foreign_cols(
/*===================*/
- const char* id, /*!< in: foreign constraint id, not
- necessary '\0'-terminated */
- ulint id_len, /*!< in: id length */
- dict_foreign_t* foreign)/*!< in: foreign constraint object */
+ dict_foreign_t* foreign)/*!< in/out: foreign constraint object */
{
dict_table_t* sys_foreign_cols;
dict_index_t* sys_index;
@@ -2119,9 +2610,12 @@ dict_load_foreign_cols(
ulint len;
ulint i;
mtr_t mtr;
+ size_t id_len;
ut_ad(mutex_own(&(dict_sys->mutex)));
+ id_len = strlen(foreign->id);
+
foreign->foreign_col_names = static_cast<const char**>(
mem_heap_alloc(foreign->heap,
foreign->n_fields * sizeof(void*)));
@@ -2140,7 +2634,7 @@ dict_load_foreign_cols(
tuple = dtuple_create(foreign->heap, 1);
dfield = dtuple_get_nth_field(tuple, 0);
- dfield_set_data(dfield, id, id_len);
+ dfield_set_data(dfield, foreign->id, id_len);
dict_index_copy_types(tuple, sys_index, 1);
btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
@@ -2154,8 +2648,42 @@ dict_load_foreign_cols(
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len);
- ut_a(len == id_len);
- ut_a(ut_memcmp(id, field, len) == 0);
+
+ if (len != id_len || ut_memcmp(foreign->id, field, len) != 0) {
+ const rec_t* pos;
+ ulint pos_len;
+ const rec_t* for_col_name;
+ ulint for_col_name_len;
+ const rec_t* ref_col_name;
+ ulint ref_col_name_len;
+
+ pos = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__POS,
+ &pos_len);
+
+ for_col_name = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME,
+ &for_col_name_len);
+
+ ref_col_name = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME,
+ &ref_col_name_len);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unable to load columns names for foreign "
+ "key '%s' because it was not found in "
+ "InnoDB internal table SYS_FOREIGN_COLS. The "
+ "closest entry we found is: "
+ "(ID='%.*s', POS=%lu, FOR_COL_NAME='%.*s', "
+ "REF_COL_NAME='%.*s')",
+ foreign->id,
+ (int) len, field,
+ mach_read_from_4(pos),
+ (int) for_col_name_len, for_col_name,
+ (int) ref_col_name_len, ref_col_name);
+
+ ut_error;
+ }
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FOREIGN_COLS__POS, &len);
@@ -2182,13 +2710,12 @@ dict_load_foreign_cols(
/***********************************************************************//**
Loads a foreign key constraint to the dictionary cache.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
dict_load_foreign(
/*==============*/
- const char* id, /*!< in: foreign constraint id, not
- necessary '\0'-terminated */
- ulint id_len, /*!< in: id length */
+ const char* id, /*!< in: foreign constraint id, must be
+ '\0'-terminated */
ibool check_charsets,
/*!< in: TRUE=check charset compatibility */
ibool check_recursive)
@@ -2210,9 +2737,12 @@ dict_load_foreign(
mtr_t mtr;
dict_table_t* for_table;
dict_table_t* ref_table;
+ size_t id_len;
ut_ad(mutex_own(&(dict_sys->mutex)));
+ id_len = strlen(id);
+
heap2 = mem_heap_create(1000);
mtr_start(&mtr);
@@ -2238,8 +2768,8 @@ dict_load_foreign(
fprintf(stderr,
"InnoDB: Error: cannot load foreign constraint "
- "%.*s: could not find the relevant record in "
- "SYS_FOREIGN\n", (int) id_len, id);
+ "%s: could not find the relevant record in "
+ "SYS_FOREIGN\n", id);
btr_pcur_close(&pcur);
mtr_commit(&mtr);
@@ -2255,8 +2785,8 @@ dict_load_foreign(
fprintf(stderr,
"InnoDB: Error: cannot load foreign constraint "
- "%.*s: found %.*s instead in SYS_FOREIGN\n",
- (int) id_len, id, (int) len, field);
+ "%s: found %.*s instead in SYS_FOREIGN\n",
+ id, (int) len, field);
btr_pcur_close(&pcur);
mtr_commit(&mtr);
@@ -2301,7 +2831,7 @@ dict_load_foreign(
btr_pcur_close(&pcur);
mtr_commit(&mtr);
- dict_load_foreign_cols(id, id_len, foreign);
+ dict_load_foreign_cols(foreign);
ref_table = dict_table_check_if_in_cache_low(
foreign->referenced_table_name_lookup);
@@ -2371,7 +2901,7 @@ cache already contains all constraints where the other relevant table is
already in the dictionary cache.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
dict_load_foreigns(
/*===============*/
const char* table_name, /*!< in: table name */
@@ -2389,7 +2919,7 @@ dict_load_foreigns(
const rec_t* rec;
const byte* field;
ulint len;
- ulint err;
+ dberr_t err;
mtr_t mtr;
ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -2414,6 +2944,7 @@ dict_load_foreigns(
sec_index = dict_table_get_next_index(
dict_table_get_first_index(sys_foreign));
+ ut_ad(!dict_index_is_clust(sec_index));
start_load:
tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1);
@@ -2436,7 +2967,6 @@ loop:
/* Now we have the record in the secondary index containing a table
name and a foreign constraint ID */
- rec = btr_pcur_get_rec(&pcur);
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME, &len);
@@ -2475,14 +3005,21 @@ loop:
field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__ID, &len);
+ /* Copy the string because the page may be modified or evicted
+ after mtr_commit() below. */
+ char fk_id[MAX_TABLE_NAME_LEN + 1];
+
+ ut_a(len <= MAX_TABLE_NAME_LEN);
+ memcpy(fk_id, field, len);
+ fk_id[len] = '\0';
+
btr_pcur_store_position(&pcur, &mtr);
mtr_commit(&mtr);
/* Load the foreign constraint definition to the dictionary cache */
- err = dict_load_foreign((char*) field, len, check_charsets,
- check_recursive);
+ err = dict_load_foreign(fk_id, check_charsets, check_recursive);
if (err != DB_SUCCESS) {
btr_pcur_close(&pcur);
diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc
index 28b935d2e58..116a6a6d96a 100644
--- a/storage/innobase/dict/dict0mem.cc
+++ b/storage/innobase/dict/dict0mem.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -35,8 +36,9 @@ Created 1/8/1996 Heikki Tuuri
#include "dict0dict.h"
#include "fts0priv.h"
#ifndef UNIV_HOTBACKUP
-#include "ha_prototypes.h" /* innobase_casedn_str(),
+# include "ha_prototypes.h" /* innobase_casedn_str(),
innobase_get_lower_case_table_names */
+# include "mysql_com.h" /* NAME_LEN */
# include "lock0lock.h"
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_BLOB_DEBUG
@@ -51,6 +53,10 @@ Created 1/8/1996 Heikki Tuuri
UNIV_INTERN mysql_pfs_key_t autoinc_mutex_key;
#endif /* UNIV_PFS_MUTEX */
+/** Prefix for tmp tables, adopted from sql/table.h */
+#define tmp_file_prefix "#sql"
+#define tmp_file_prefix_length 4
+
/**********************************************************************//**
Creates a table memory object.
@return own: table object */
@@ -60,9 +66,7 @@ dict_mem_table_create(
/*==================*/
const char* name, /*!< in: table name */
ulint space, /*!< in: space where the clustered index of
- the table is placed; this parameter is
- ignored if the table is made a member of
- a cluster */
+ the table is placed */
ulint n_cols, /*!< in: number of columns */
ulint flags, /*!< in: table flags */
ulint flags2) /*!< in: table flags2 */
@@ -71,7 +75,7 @@ dict_mem_table_create(
mem_heap_t* heap;
ut_ad(name);
- dict_tf_validate(flags);
+ ut_a(dict_tf_is_valid(flags));
ut_a(!(flags2 & ~DICT_TF2_BIT_MASK));
heap = mem_heap_create(DICT_HEAP_SIZE);
@@ -115,7 +119,6 @@ dict_mem_table_create(
|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
table->fts = fts_create(table);
table->fts->cache = fts_cache_create(table);
- fts_optimize_add_table(table);
} else {
table->fts = NULL;
}
@@ -243,6 +246,156 @@ dict_mem_table_add_col(
dict_mem_fill_column_struct(col, i, mtype, prtype, len);
}
+/**********************************************************************//**
+Renames a column of a table in the data dictionary cache. */
+static __attribute__((nonnull))
+void
+dict_mem_table_col_rename_low(
+/*==========================*/
+ dict_table_t* table, /*!< in/out: table */
+ unsigned i, /*!< in: column offset corresponding to s */
+ const char* to, /*!< in: new column name */
+ const char* s) /*!< in: pointer to table->col_names */
+{
+ size_t from_len = strlen(s), to_len = strlen(to);
+
+ ut_ad(i < table->n_def);
+ ut_ad(from_len <= NAME_LEN);
+ ut_ad(to_len <= NAME_LEN);
+
+ if (from_len == to_len) {
+ /* The easy case: simply replace the column name in
+ table->col_names. */
+ strcpy(const_cast<char*>(s), to);
+ } else {
+ /* We need to adjust all affected index->field
+ pointers, as in dict_index_add_col(). First, copy
+ table->col_names. */
+ ulint prefix_len = s - table->col_names;
+
+ for (; i < table->n_def; i++) {
+ s += strlen(s) + 1;
+ }
+
+ ulint full_len = s - table->col_names;
+ char* col_names;
+
+ if (to_len > from_len) {
+ col_names = static_cast<char*>(
+ mem_heap_alloc(
+ table->heap,
+ full_len + to_len - from_len));
+
+ memcpy(col_names, table->col_names, prefix_len);
+ } else {
+ col_names = const_cast<char*>(table->col_names);
+ }
+
+ memcpy(col_names + prefix_len, to, to_len);
+ memmove(col_names + prefix_len + to_len,
+ table->col_names + (prefix_len + from_len),
+ full_len - (prefix_len + from_len));
+
+ /* Replace the field names in every index. */
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ ulint n_fields = dict_index_get_n_fields(index);
+
+ for (ulint i = 0; i < n_fields; i++) {
+ dict_field_t* field
+ = dict_index_get_nth_field(
+ index, i);
+ ulint name_ofs
+ = field->name - table->col_names;
+ if (name_ofs <= prefix_len) {
+ field->name = col_names + name_ofs;
+ } else {
+ ut_a(name_ofs < full_len);
+ field->name = col_names
+ + name_ofs + to_len - from_len;
+ }
+ }
+ }
+
+ table->col_names = col_names;
+ }
+
+ /* Replace the field names in every foreign key constraint. */
+ for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(table->foreign_list);
+ foreign != NULL;
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
+ for (unsigned f = 0; f < foreign->n_fields; f++) {
+ /* These can point straight to
+ table->col_names, because the foreign key
+ constraints will be freed at the same time
+ when the table object is freed. */
+ foreign->foreign_col_names[f]
+ = dict_index_get_nth_field(
+ foreign->foreign_index, f)->name;
+ }
+ }
+
+ for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+ table->referenced_list);
+ foreign != NULL;
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
+ for (unsigned f = 0; f < foreign->n_fields; f++) {
+ /* foreign->referenced_col_names[] need to be
+ copies, because the constraint may become
+ orphan when foreign_key_checks=0 and the
+ parent table is dropped. */
+
+ const char* col_name = dict_index_get_nth_field(
+ foreign->referenced_index, f)->name;
+
+ if (strcmp(foreign->referenced_col_names[f],
+ col_name)) {
+ char** rc = const_cast<char**>(
+ foreign->referenced_col_names + f);
+ size_t col_name_len_1 = strlen(col_name) + 1;
+
+ if (col_name_len_1 <= strlen(*rc) + 1) {
+ memcpy(*rc, col_name, col_name_len_1);
+ } else {
+ *rc = static_cast<char*>(
+ mem_heap_dup(
+ foreign->heap,
+ col_name,
+ col_name_len_1));
+ }
+ }
+ }
+ }
+}
+
+/**********************************************************************//**
+Renames a column of a table in the data dictionary cache. */
+UNIV_INTERN
+void
+dict_mem_table_col_rename(
+/*======================*/
+ dict_table_t* table, /*!< in/out: table */
+ unsigned nth_col,/*!< in: column index */
+ const char* from, /*!< in: old column name */
+ const char* to) /*!< in: new column name */
+{
+ const char* s = table->col_names;
+
+ ut_ad(nth_col < table->n_def);
+
+ for (unsigned i = 0; i < nth_col; i++) {
+ size_t len = strlen(s);
+ ut_ad(len > 0);
+ s += len + 1;
+ }
+
+ /* This could fail if the data dictionaries are out of sync.
+ Proceed with the renaming anyway. */
+ ut_ad(!strcmp(from, s));
+
+ dict_mem_table_col_rename_low(table, nth_col, to, s);
+}
/**********************************************************************//**
This function populates a dict_col_t memory structure with
@@ -304,6 +457,8 @@ dict_mem_index_create(
dict_mem_fill_index_struct(index, heap, table_name, index_name,
space, type, n_fields);
+ os_fast_mutex_init(zip_pad_mutex_key, &index->zip_pad.mutex);
+
return(index);
}
@@ -436,5 +591,31 @@ dict_mem_index_free(
}
#endif /* UNIV_BLOB_DEBUG */
+ os_fast_mutex_free(&index->zip_pad.mutex);
+
mem_heap_free(index->heap);
}
+
+/*******************************************************************//**
+Create a temporary tablename.
+@return temporary tablename suitable for InnoDB use */
+UNIV_INTERN
+char*
+dict_mem_create_temporary_tablename(
+/*================================*/
+ mem_heap_t* heap, /*!< in: memory heap */
+ const char* dbtab, /*!< in: database/table name */
+ table_id_t id) /*!< in: InnoDB table id */
+{
+ const char* dbend = strchr(dbtab, '/');
+ ut_ad(dbend);
+ size_t dblen = dbend - dbtab + 1;
+ size_t size = tmp_file_prefix_length + 4 + 9 + 9 + dblen;
+
+ char* name = static_cast<char*>(mem_heap_alloc(heap, size));
+ memcpy(name, dbtab, dblen);
+ ut_snprintf(name + dblen, size - dblen,
+ tmp_file_prefix "-ib" UINT64PF, id);
+ return(name);
+}
+
diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc
index eebf6b1ec26..ff7e1ce642c 100644
--- a/storage/innobase/dict/dict0stats.cc
+++ b/storage/innobase/dict/dict0stats.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2009, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2009, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,27 +29,27 @@ Created Jan 06, 2010 Vasil Dimov
#include "btr0btr.h" /* btr_get_size() */
#include "btr0cur.h" /* btr_estimate_number_of_different_key_vals() */
-#include "dict0dict.h" /* dict_table_get_first_index() */
+#include "dict0dict.h" /* dict_table_get_first_index(), dict_fs2utf8() */
#include "dict0mem.h" /* DICT_TABLE_MAGIC_N */
#include "dict0stats.h"
#include "data0type.h" /* dtype_t */
-#include "db0err.h" /* db_err */
+#include "db0err.h" /* dberr_t */
#include "dyn0dyn.h" /* dyn_array* */
+#include "page0page.h" /* page_align() */
#include "pars0pars.h" /* pars_info_create() */
#include "pars0types.h" /* pars_info_t */
#include "que0que.h" /* que_eval_sql() */
#include "rem0cmp.h" /* REC_MAX_N_FIELDS,cmp_rec_rec_with_match() */
-#include "row0sel.h" /* sel_node_struct */
+#include "row0sel.h" /* sel_node_t */
#include "row0types.h" /* sel_node_t */
#include "trx0trx.h" /* trx_create() */
#include "trx0roll.h" /* trx_rollback_to_savepoint() */
#include "ut0rnd.h" /* ut_rnd_interval() */
-
-#include "ha_prototypes.h" /* innobase_strcasecmp() */
+#include "ut0ut.h" /* ut_format_name(), ut_time() */
/* Sampling algorithm description @{
-The algorithm is controlled by one number - srv_stats_persistent_sample_pages,
+The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
let it be A, which is the number of leaf pages to analyze for a given index
for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
analyzed).
@@ -124,126 +124,34 @@ where n=1..n_uniq.
#define DEBUG_PRINTF(fmt, ...) /* noop */
#endif /* UNIV_STATS_DEBUG */
-/* number of distinct records on a given level that are required to stop
-descending to lower levels and fetch
-srv_stats_persistent_sample_pages records from that level */
-#define N_DIFF_REQUIRED (srv_stats_persistent_sample_pages * 10)
+/* Gets the number of leaf pages to sample in persistent stats estimation */
+#define N_SAMPLE_PAGES(index) \
+ ((index)->table->stats_sample_pages != 0 ? \
+ (index)->table->stats_sample_pages : \
+ srv_stats_persistent_sample_pages)
-/** Open handles on the stats tables. Currently this is used to increase the
-reference count of the stats tables. */
-typedef struct dict_stats_struct {
- dict_table_t* table_stats; /*!< Handle to open TABLE_STATS_NAME */
- dict_table_t* index_stats; /*!< Handle to open INDEX_STATS_NAME */
-} dict_stats_t;
+/* number of distinct records on a given level that are required to stop
+descending to lower levels and fetch N_SAMPLE_PAGES(index) records
+from that level */
+#define N_DIFF_REQUIRED(index) (N_SAMPLE_PAGES(index) * 10)
/*********************************************************************//**
-Calculates new estimates for table and index statistics. This function
-is relatively quick and is used to calculate transient statistics that
-are not saved on disk.
-This was the only way to calculate statistics before the
-Persistent Statistics feature was introduced.
-dict_stats_update_transient() @{ */
-static
-void
-dict_stats_update_transient(
-/*========================*/
- dict_table_t* table) /*!< in/out: table */
+Checks whether an index should be ignored in stats manipulations:
+* stats fetch
+* stats recalc
+* stats save
+dict_stats_should_ignore_index() @{
+@return true if exists and all tables are ok */
+UNIV_INLINE
+bool
+dict_stats_should_ignore_index(
+/*===========================*/
+ const dict_index_t* index) /*!< in: index */
{
- dict_index_t* index;
- ulint sum_of_index_sizes = 0;
-
- /* Find out the sizes of the indexes and how many different values
- for the key they approximately have */
-
- index = dict_table_get_first_index(table);
-
- if (index == NULL) {
- /* Table definition is corrupt */
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: table %s has no indexes. "
- "Cannot calculate statistics.\n", table->name);
- return;
- }
-
- do {
-
- if (index->type & DICT_FTS) {
- index = dict_table_get_next_index(index);
- continue;
- }
-
- if (UNIV_LIKELY
- (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
- || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO
- && dict_index_is_clust(index)))) {
- mtr_t mtr;
- ulint size;
-
- mtr_start(&mtr);
- mtr_s_lock(dict_index_get_lock(index), &mtr);
-
- size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
-
- if (size != ULINT_UNDEFINED) {
- index->stat_index_size = size;
-
- size = btr_get_size(
- index, BTR_N_LEAF_PAGES, &mtr);
- }
-
- mtr_commit(&mtr);
-
- switch (size) {
- case ULINT_UNDEFINED:
- goto fake_statistics;
- case 0:
- /* The root node of the tree is a leaf */
- size = 1;
- }
-
- sum_of_index_sizes += index->stat_index_size;
-
- index->stat_n_leaf_pages = size;
-
- btr_estimate_number_of_different_key_vals(index);
- } else {
- /* If we have set a high innodb_force_recovery
- level, do not calculate statistics, as a badly
- corrupted index can cause a crash in it.
- Initialize some bogus index cardinality
- statistics, so that the data can be queried in
- various means, also via secondary indexes. */
- ulint i;
-
-fake_statistics:
- sum_of_index_sizes++;
- index->stat_index_size = index->stat_n_leaf_pages = 1;
-
- for (i = dict_index_get_n_unique(index); i; ) {
- index->stat_n_diff_key_vals[i--] = 1;
- }
-
- memset(index->stat_n_non_null_key_vals, 0,
- (1 + dict_index_get_n_unique(index))
- * sizeof(*index->stat_n_non_null_key_vals));
- }
-
- index = dict_table_get_next_index(index);
- } while (index);
-
- index = dict_table_get_first_index(table);
-
- table->stat_n_rows = index->stat_n_diff_key_vals[
- dict_index_get_n_unique(index)];
-
- table->stat_clustered_index_size = index->stat_index_size;
-
- table->stat_sum_of_other_index_sizes = sum_of_index_sizes
- - index->stat_index_size;
-
- table->stat_modified_counter = 0;
-
- table->stat_initialized = TRUE;
+ return((index->type & DICT_FTS)
+ || dict_index_is_corrupted(index)
+ || index->to_be_dropped
+ || *index->name == TEMP_INDEX_PREFIX);
}
/* @} */
@@ -251,24 +159,24 @@ fake_statistics:
Checks whether the persistent statistics storage exists and that all
tables have the proper structure.
dict_stats_persistent_storage_check() @{
-@return TRUE if exists and all tables are ok */
+@return true if exists and all tables are ok */
static
-ibool
+bool
dict_stats_persistent_storage_check(
/*================================*/
- ibool caller_has_dict_sys_mutex) /*!< in: TRUE if the caller
+ bool caller_has_dict_sys_mutex) /*!< in: true if the caller
owns dict_sys->mutex */
{
/* definition for the table TABLE_STATS_NAME */
dict_col_meta_t table_stats_columns[] = {
{"database_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+ DATA_NOT_NULL, 192},
{"table_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+ DATA_NOT_NULL, 192},
- {"last_update", DATA_INT,
- DATA_NOT_NULL | DATA_UNSIGNED, 4},
+ {"last_update", DATA_FIXBINARY,
+ DATA_NOT_NULL, 4},
{"n_rows", DATA_INT,
DATA_NOT_NULL | DATA_UNSIGNED, 8},
@@ -282,22 +190,24 @@ dict_stats_persistent_storage_check(
dict_table_schema_t table_stats_schema = {
TABLE_STATS_NAME,
UT_ARR_SIZE(table_stats_columns),
- table_stats_columns
+ table_stats_columns,
+ 0 /* n_foreign */,
+ 0 /* n_referenced */
};
/* definition for the table INDEX_STATS_NAME */
dict_col_meta_t index_stats_columns[] = {
{"database_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+ DATA_NOT_NULL, 192},
{"table_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+ DATA_NOT_NULL, 192},
{"index_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192 /* NAME_LEN from mysql_com.h */},
+ DATA_NOT_NULL, 192},
- {"last_update", DATA_INT,
- DATA_NOT_NULL | DATA_UNSIGNED, 4},
+ {"last_update", DATA_FIXBINARY,
+ DATA_NOT_NULL, 4},
{"stat_name", DATA_VARMYSQL,
DATA_NOT_NULL, 64*3},
@@ -314,11 +224,13 @@ dict_stats_persistent_storage_check(
dict_table_schema_t index_stats_schema = {
INDEX_STATS_NAME,
UT_ARR_SIZE(index_stats_columns),
- index_stats_columns
+ index_stats_columns,
+ 0 /* n_foreign */,
+ 0 /* n_referenced */
};
char errstr[512];
- enum db_err ret;
+ dberr_t ret;
if (!caller_has_dict_sys_mutex) {
mutex_enter(&(dict_sys->mutex));
@@ -339,24 +251,660 @@ dict_stats_persistent_storage_check(
mutex_exit(&(dict_sys->mutex));
}
- if (ret != DB_SUCCESS && ret != DB_TABLE_NOT_FOUND) {
+ if (ret != DB_SUCCESS) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: Error: %s\n", errstr);
+ return(false);
+ }
+ /* else */
+
+ return(true);
+}
+/* @} */
+
+/*********************************************************************//**
+Executes a given SQL statement using the InnoDB internal SQL parser
+in its own transaction and commits it.
+This function will free the pinfo object.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+dict_stats_exec_sql(
+/*================*/
+ pars_info_t* pinfo, /*!< in/out: pinfo to pass to que_eval_sql()
+ must already have any literals bound to it */
+ const char* sql) /*!< in: SQL string to execute */
+{
+ trx_t* trx;
+ dberr_t err;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ if (!dict_stats_persistent_storage_check(true)) {
+ pars_info_free(pinfo);
+ return(DB_STATS_DO_NOT_EXIST);
+ }
+
+ trx = trx_allocate_for_background();
+ trx_start_if_not_started(trx);
+
+ err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
+
+ if (err == DB_SUCCESS) {
+ trx_commit_for_mysql(trx);
+ } else {
+ trx->op_info = "rollback of internal trx on stats tables";
+ trx->dict_operation_lock_mode = RW_X_LATCH;
+ trx_rollback_to_savepoint(trx, NULL);
+ trx->dict_operation_lock_mode = 0;
+ trx->op_info = "";
+ ut_a(trx->error_state == DB_SUCCESS);
+ }
+
+ trx_free_for_background(trx);
+
+ return(err);
+}
+
+/*********************************************************************//**
+Duplicate a table object and its indexes.
+This function creates a dummy dict_table_t object and initializes the
+following table and index members:
+dict_table_t::id (copied)
+dict_table_t::heap (newly created)
+dict_table_t::name (copied)
+dict_table_t::corrupted (copied)
+dict_table_t::indexes<> (newly created)
+dict_table_t::magic_n
+for each entry in dict_table_t::indexes, the following are initialized:
+(indexes that have DICT_FTS set in index->type are skipped)
+dict_index_t::id (copied)
+dict_index_t::name (copied)
+dict_index_t::table_name (points to the copied table name)
+dict_index_t::table (points to the above semi-initialized object)
+dict_index_t::type (copied)
+dict_index_t::to_be_dropped (copied)
+dict_index_t::online_status (copied)
+dict_index_t::n_uniq (copied)
+dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
+dict_index_t::indexes<> (newly created)
+dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
+dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
+dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
+dict_index_t::magic_n
+The returned object should be freed with dict_stats_table_clone_free()
+when no longer needed.
+@return incomplete table object */
+static
+dict_table_t*
+dict_stats_table_clone_create(
+/*==========================*/
+ const dict_table_t* table) /*!< in: table whose stats to copy */
+{
+ size_t heap_size;
+ dict_index_t* index;
+
+ /* Estimate the size needed for the table and all of its indexes */
+
+ heap_size = 0;
+ heap_size += sizeof(dict_table_t);
+ heap_size += strlen(table->name) + 1;
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (dict_stats_should_ignore_index(index)) {
+ continue;
+ }
+
+ ut_ad(!dict_index_is_univ(index));
+
+ ulint n_uniq = dict_index_get_n_unique(index);
+
+ heap_size += sizeof(dict_index_t);
+ heap_size += strlen(index->name) + 1;
+ heap_size += n_uniq * sizeof(index->fields[0]);
+ for (ulint i = 0; i < n_uniq; i++) {
+ heap_size += strlen(index->fields[i].name) + 1;
+ }
+ heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]);
+ heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]);
+ heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]);
+ }
+
+ /* Allocate the memory and copy the members */
+
+ mem_heap_t* heap;
+
+ heap = mem_heap_create(heap_size);
+
+ dict_table_t* t;
+
+ t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
+
+ UNIV_MEM_ASSERT_RW_ABORT(&table->id, sizeof(table->id));
+ t->id = table->id;
+
+ t->heap = heap;
+
+ UNIV_MEM_ASSERT_RW_ABORT(table->name, strlen(table->name) + 1);
+ t->name = (char*) mem_heap_strdup(heap, table->name);
+
+ t->corrupted = table->corrupted;
+
+ UT_LIST_INIT(t->indexes);
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (dict_stats_should_ignore_index(index)) {
+ continue;
+ }
+
+ ut_ad(!dict_index_is_univ(index));
+
+ dict_index_t* idx;
+
+ idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
+
+ UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id));
+ idx->id = index->id;
+
+ UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name) + 1);
+ idx->name = (char*) mem_heap_strdup(heap, index->name);
+
+ idx->table_name = t->name;
+
+ idx->table = t;
+
+ idx->type = index->type;
+
+ idx->to_be_dropped = 0;
+
+ idx->online_status = ONLINE_INDEX_COMPLETE;
+
+ idx->n_uniq = index->n_uniq;
+
+ idx->fields = (dict_field_t*) mem_heap_alloc(
+ heap, idx->n_uniq * sizeof(idx->fields[0]));
+
+ for (ulint i = 0; i < idx->n_uniq; i++) {
+ UNIV_MEM_ASSERT_RW_ABORT(index->fields[i].name, strlen(index->fields[i].name) + 1);
+ idx->fields[i].name = (char*) mem_heap_strdup(
+ heap, index->fields[i].name);
+ }
+
+ /* hook idx into t->indexes */
+ UT_LIST_ADD_LAST(indexes, t->indexes, idx);
+
+ idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
+ heap,
+ idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
+
+ idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
+ heap,
+ idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
+
+ idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
+ heap,
+ idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
+ ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
+ }
+
+ ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
+
+ return(t);
+}
+
+/*********************************************************************//**
+Free the resources occupied by an object returned by
+dict_stats_table_clone_create().
+dict_stats_table_clone_free() @{ */
+static
+void
+dict_stats_table_clone_free(
+/*========================*/
+ dict_table_t* t) /*!< in: dummy table object to free */
+{
+ mem_heap_free(t->heap);
+}
+/* @} */
+
+/*********************************************************************//**
+Write all zeros (or 1 where it makes sense) into an index
+statistics members. The resulting stats correspond to an empty index.
+The caller must own index's table stats latch in X mode
+(dict_table_stats_lock(table, RW_X_LATCH))
+dict_stats_empty_index() @{ */
+static
+void
+dict_stats_empty_index(
+/*===================*/
+ dict_index_t* index) /*!< in/out: index */
+{
+ ut_ad(!(index->type & DICT_FTS));
+ ut_ad(!dict_index_is_univ(index));
+
+ ulint n_uniq = index->n_uniq;
+
+ for (ulint i = 0; i < n_uniq; i++) {
+ index->stat_n_diff_key_vals[i] = 0;
+ index->stat_n_sample_sizes[i] = 1;
+ index->stat_n_non_null_key_vals[i] = 0;
+ }
+
+ index->stat_index_size = 1;
+ index->stat_n_leaf_pages = 1;
+}
+/* @} */
+
+/*********************************************************************//**
+Write all zeros (or 1 where it makes sense) into a table and its indexes'
+statistics members. The resulting stats correspond to an empty table.
+dict_stats_empty_table() @{ */
+static
+void
+dict_stats_empty_table(
+/*===================*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ /* Zero the stats members */
+
+ dict_table_stats_lock(table, RW_X_LATCH);
+
+ table->stat_n_rows = 0;
+ table->stat_clustered_index_size = 1;
+ /* 1 page for each index, not counting the clustered */
+ table->stat_sum_of_other_index_sizes
+ = UT_LIST_GET_LEN(table->indexes) - 1;
+ table->stat_modified_counter = 0;
+
+ dict_index_t* index;
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (index->type & DICT_FTS) {
+ continue;
+ }
+
+ ut_ad(!dict_index_is_univ(index));
+
+ dict_stats_empty_index(index);
+ }
+
+ table->stat_initialized = TRUE;
+
+ dict_table_stats_unlock(table, RW_X_LATCH);
+}
+/* @} */
+
+/*********************************************************************//**
+Check whether index's stats are initialized (assert if they are not). */
+static
+void
+dict_stats_assert_initialized_index(
+/*================================*/
+ const dict_index_t* index) /*!< in: index */
+{
+ UNIV_MEM_ASSERT_RW_ABORT(
+ index->stat_n_diff_key_vals,
+ index->n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
+
+ UNIV_MEM_ASSERT_RW_ABORT(
+ index->stat_n_sample_sizes,
+ index->n_uniq * sizeof(index->stat_n_sample_sizes[0]));
+
+ UNIV_MEM_ASSERT_RW_ABORT(
+ index->stat_n_non_null_key_vals,
+ index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
+
+ UNIV_MEM_ASSERT_RW_ABORT(
+ &index->stat_index_size,
+ sizeof(index->stat_index_size));
+
+ UNIV_MEM_ASSERT_RW_ABORT(
+ &index->stat_n_leaf_pages,
+ sizeof(index->stat_n_leaf_pages));
+}
+/*********************************************************************//**
+Check whether table's stats are initialized (assert if they are not). */
+static
+void
+dict_stats_assert_initialized(
+/*==========================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ ut_a(table->stat_initialized);
+
+ UNIV_MEM_ASSERT_RW_ABORT(&table->stats_last_recalc,
+ sizeof(table->stats_last_recalc));
+
+ UNIV_MEM_ASSERT_RW_ABORT(&table->stat_persistent,
+ sizeof(table->stat_persistent));
+
+ UNIV_MEM_ASSERT_RW_ABORT(&table->stats_auto_recalc,
+ sizeof(table->stats_auto_recalc));
+
+ UNIV_MEM_ASSERT_RW_ABORT(&table->stats_sample_pages,
+ sizeof(table->stats_sample_pages));
+
+ UNIV_MEM_ASSERT_RW_ABORT(&table->stat_n_rows,
+ sizeof(table->stat_n_rows));
+
+ UNIV_MEM_ASSERT_RW_ABORT(&table->stat_clustered_index_size,
+ sizeof(table->stat_clustered_index_size));
+
+ UNIV_MEM_ASSERT_RW_ABORT(&table->stat_sum_of_other_index_sizes,
+ sizeof(table->stat_sum_of_other_index_sizes));
+
+ UNIV_MEM_ASSERT_RW_ABORT(&table->stat_modified_counter,
+ sizeof(table->stat_modified_counter));
+
+ UNIV_MEM_ASSERT_RW_ABORT(&table->stats_bg_flag,
+ sizeof(table->stats_bg_flag));
+
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ if (!dict_stats_should_ignore_index(index)) {
+ dict_stats_assert_initialized_index(index);
+ }
+ }
+}
+
+#define INDEX_EQ(i1, i2) \
+ ((i1) != NULL \
+ && (i2) != NULL \
+ && (i1)->id == (i2)->id \
+ && strcmp((i1)->name, (i2)->name) == 0)
+/*********************************************************************//**
+Copy table and index statistics from one table to another, including index
+stats. Extra indexes in src are ignored and extra indexes in dst are
+initialized to correspond to an empty index. */
+static
+void
+dict_stats_copy(
+/*============*/
+ dict_table_t* dst, /*!< in/out: destination table */
+ const dict_table_t* src) /*!< in: source table */
+{
+ dst->stats_last_recalc = src->stats_last_recalc;
+ dst->stat_n_rows = src->stat_n_rows;
+ dst->stat_clustered_index_size = src->stat_clustered_index_size;
+ dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
+ dst->stat_modified_counter = src->stat_modified_counter;
+
+ dict_index_t* dst_idx;
+ dict_index_t* src_idx;
+
+ for (dst_idx = dict_table_get_first_index(dst),
+ src_idx = dict_table_get_first_index(src);
+ dst_idx != NULL;
+ dst_idx = dict_table_get_next_index(dst_idx),
+ (src_idx != NULL
+ && (src_idx = dict_table_get_next_index(src_idx)))) {
+
+ if (dict_stats_should_ignore_index(dst_idx)) {
+ continue;
+ }
+
+ ut_ad(!dict_index_is_univ(dst_idx));
+
+ if (!INDEX_EQ(src_idx, dst_idx)) {
+ for (src_idx = dict_table_get_first_index(src);
+ src_idx != NULL;
+ src_idx = dict_table_get_next_index(src_idx)) {
+
+ if (INDEX_EQ(src_idx, dst_idx)) {
+ break;
+ }
+ }
+ }
+
+ if (!INDEX_EQ(src_idx, dst_idx)) {
+ dict_stats_empty_index(dst_idx);
+ continue;
+ }
+
+ ulint n_copy_el;
+
+ if (dst_idx->n_uniq > src_idx->n_uniq) {
+ n_copy_el = src_idx->n_uniq;
+ /* Since src is smaller some elements in dst
+ will remain untouched by the following memmove(),
+ thus we init all of them here. */
+ dict_stats_empty_index(dst_idx);
+ } else {
+ n_copy_el = dst_idx->n_uniq;
+ }
+
+ memmove(dst_idx->stat_n_diff_key_vals,
+ src_idx->stat_n_diff_key_vals,
+ n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0]));
+
+ memmove(dst_idx->stat_n_sample_sizes,
+ src_idx->stat_n_sample_sizes,
+ n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0]));
+
+ memmove(dst_idx->stat_n_non_null_key_vals,
+ src_idx->stat_n_non_null_key_vals,
+ n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0]));
+
+ dst_idx->stat_index_size = src_idx->stat_index_size;
+
+ dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
+ }
+
+ dst->stat_initialized = TRUE;
+}
+
+/*********************************************************************//**
+Duplicate the stats of a table and its indexes.
+This function creates a dummy dict_table_t object and copies the input
+table's stats into it. The returned table object is not in the dictionary
+cache and cannot be accessed by any other threads. In addition to the
+members copied in dict_stats_table_clone_create() this function initializes
+the following:
+dict_table_t::stat_initialized
+dict_table_t::stat_persistent
+dict_table_t::stat_n_rows
+dict_table_t::stat_clustered_index_size
+dict_table_t::stat_sum_of_other_index_sizes
+dict_table_t::stat_modified_counter
+dict_index_t::stat_n_diff_key_vals[]
+dict_index_t::stat_n_sample_sizes[]
+dict_index_t::stat_n_non_null_key_vals[]
+dict_index_t::stat_index_size
+dict_index_t::stat_n_leaf_pages
+The returned object should be freed with dict_stats_snapshot_free()
+when no longer needed.
+@return incomplete table object */
+static
+dict_table_t*
+dict_stats_snapshot_create(
+/*=======================*/
+ const dict_table_t* table) /*!< in: table whose stats to copy */
+{
+ mutex_enter(&dict_sys->mutex);
+
+ dict_table_stats_lock(table, RW_S_LATCH);
+
+ dict_stats_assert_initialized(table);
+
+ dict_table_t* t;
+
+ t = dict_stats_table_clone_create(table);
+
+ dict_stats_copy(t, table);
+
+ t->stat_persistent = table->stat_persistent;
+ t->stats_auto_recalc = table->stats_auto_recalc;
+ t->stats_sample_pages = table->stats_sample_pages;
+ t->stats_bg_flag = table->stats_bg_flag;
+
+ dict_table_stats_unlock(table, RW_S_LATCH);
+
+ mutex_exit(&dict_sys->mutex);
+
+ return(t);
+}
+
+/*********************************************************************//**
+Free the resources occupied by an object returned by
+dict_stats_snapshot_create().
+dict_stats_snapshot_free() @{ */
+static
+void
+dict_stats_snapshot_free(
+/*=====================*/
+ dict_table_t* t) /*!< in: dummy table object to free */
+{
+ dict_stats_table_clone_free(t);
+}
+/* @} */
+
+/*********************************************************************//**
+Calculates new estimates for index statistics. This function is
+relatively quick and is used to calculate transient statistics that
+are not saved on disk. This was the only way to calculate statistics
+before the Persistent Statistics feature was introduced.
+dict_stats_update_transient_for_index() @{ */
+static
+void
+dict_stats_update_transient_for_index(
+/*==================================*/
+ dict_index_t* index) /*!< in/out: index */
+{
+ if (UNIV_LIKELY
+ (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
+ || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO
+ && dict_index_is_clust(index)))) {
+ mtr_t mtr;
+ ulint size;
+ mtr_start(&mtr);
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+ size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
+
+ if (size != ULINT_UNDEFINED) {
+ index->stat_index_size = size;
+
+ size = btr_get_size(
+ index, BTR_N_LEAF_PAGES, &mtr);
+ }
+
+ mtr_commit(&mtr);
+
+ switch (size) {
+ case ULINT_UNDEFINED:
+ dict_stats_empty_index(index);
+ return;
+ case 0:
+ /* The root node of the tree is a leaf */
+ size = 1;
+ }
+
+ index->stat_n_leaf_pages = size;
+
+ btr_estimate_number_of_different_key_vals(index);
+ } else {
+ /* If we have set a high innodb_force_recovery
+ level, do not calculate statistics, as a badly
+ corrupted index can cause a crash in it.
+ Initialize some bogus index cardinality
+ statistics, so that the data can be queried in
+ various means, also via secondary indexes. */
+ dict_stats_empty_index(index);
+ }
+}
+/* @} */
+
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively quick and is used to calculate transient statistics that
+are not saved on disk.
+This was the only way to calculate statistics before the
+Persistent Statistics feature was introduced.
+dict_stats_update_transient() @{ */
+UNIV_INTERN
+void
+dict_stats_update_transient(
+/*========================*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ dict_index_t* index;
+ ulint sum_of_index_sizes = 0;
+
+ /* Find out the sizes of the indexes and how many different values
+ for the key they approximately have */
+
+ index = dict_table_get_first_index(table);
+
+ if (dict_table_is_discarded(table)) {
+ /* Nothing to do. */
+ dict_stats_empty_table(table);
+ return;
+ } else if (index == NULL) {
+ /* Table definition is corrupt */
+
+ char buf[MAX_FULL_NAME_LEN];
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %s\n", errstr);
+ fprintf(stderr, " InnoDB: table %s has no indexes. "
+ "Cannot calculate statistics.\n",
+ ut_format_name(table->name, TRUE, buf, sizeof(buf)));
+ dict_stats_empty_table(table);
+ return;
+ }
+
+ for (; index != NULL; index = dict_table_get_next_index(index)) {
+
+ ut_ad(!dict_index_is_univ(index));
+
+ if (index->type & DICT_FTS) {
+ continue;
+ }
+
+ dict_stats_empty_index(index);
+
+ if (dict_stats_should_ignore_index(index)) {
+ continue;
+ }
+
+ dict_stats_update_transient_for_index(index);
+
+ sum_of_index_sizes += index->stat_index_size;
}
- /* We return silently if some of the tables are not present because
- this code is executed during open table. By design we check if the
- persistent statistics storage is present and whether there are stats
- for the table being opened and if so, then we use them, otherwise we
- silently switch back to using the transient stats. */
- return(ret == DB_SUCCESS);
+ index = dict_table_get_first_index(table);
+
+ table->stat_n_rows = index->stat_n_diff_key_vals[
+ dict_index_get_n_unique(index) - 1];
+
+ table->stat_clustered_index_size = index->stat_index_size;
+
+ table->stat_sum_of_other_index_sizes = sum_of_index_sizes
+ - index->stat_index_size;
+
+ table->stats_last_recalc = ut_time();
+
+ table->stat_modified_counter = 0;
+
+ table->stat_initialized = TRUE;
}
/* @} */
/* @{ Pseudo code about the relation between the following functions
-let N = srv_stats_persistent_sample_pages
+let N = N_SAMPLE_PAGES(index)
dict_stats_analyze_index()
for each n_prefix
@@ -375,14 +923,11 @@ dict_stats_analyze_index()
/*********************************************************************//**
Find the total number and the number of distinct keys on a given level in
an index. Each of the 1..n_uniq prefixes are looked up and the results are
-saved in the array n_diff[]. Notice that n_diff[] must be able to store
-n_uniq+1 numbers because the results are saved in
-n_diff[1] .. n_diff[n_uniq]. The total number of records on the level is
-saved in total_recs.
+saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
+records on the level is saved in total_recs.
Also, the index of the last record in each group of equal records is saved
-in n_diff_boundaries[1..n_uniq], records indexing starts from the leftmost
-record on the level and continues cross pages boundaries, counting from 0.
-dict_stats_analyze_index_level() @{ */
+in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
+record on the level and continues cross pages boundaries, counting from 0. */
static
void
dict_stats_analyze_index_level(
@@ -393,78 +938,87 @@ dict_stats_analyze_index_level(
distinct keys for all prefixes */
ib_uint64_t* total_recs, /*!< out: total number of records */
ib_uint64_t* total_pages, /*!< out: total number of pages */
- dyn_array_t* n_diff_boundaries)/*!< out: boundaries of the groups
+ dyn_array_t* n_diff_boundaries,/*!< out: boundaries of the groups
of distinct keys */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint n_uniq;
mem_heap_t* heap;
- dtuple_t* dtuple;
btr_pcur_t pcur;
- mtr_t mtr;
const page_t* page;
const rec_t* rec;
const rec_t* prev_rec;
+ bool prev_rec_is_copied;
byte* prev_rec_buf = NULL;
ulint prev_rec_buf_size = 0;
+ ulint* rec_offsets;
+ ulint* prev_rec_offsets;
ulint i;
DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu)\n", __func__,
index->table->name, index->name, level);
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK));
+
n_uniq = dict_index_get_n_unique(index);
- /* elements in the n_diff array are 1..n_uniq (inclusive) */
- memset(n_diff, 0x0, (n_uniq + 1) * sizeof(*n_diff));
+ /* elements in the n_diff array are 0..n_uniq-1 (inclusive) */
+ memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
+
+ /* Allocate space for the offsets header (the allocation size at
+ offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
+ so that this will never be less than the size calculated in
+ rec_get_offsets_func(). */
+ i = (REC_OFFS_HEADER_SIZE + 1 + 1) + index->n_fields;
- heap = mem_heap_create(256);
+ heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
+ rec_offsets = static_cast<ulint*>(
+ mem_heap_alloc(heap, i * sizeof *rec_offsets));
+ prev_rec_offsets = static_cast<ulint*>(
+ mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
+ rec_offs_set_n_alloc(rec_offsets, i);
+ rec_offs_set_n_alloc(prev_rec_offsets, i);
- /* reset the dynamic arrays n_diff_boundaries[1..n_uniq];
- n_diff_boundaries[0] is ignored to follow the same convention
- as n_diff[] */
+ /* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
if (n_diff_boundaries != NULL) {
- for (i = 1; i <= n_uniq; i++) {
+ for (i = 0; i < n_uniq; i++) {
dyn_array_free(&n_diff_boundaries[i]);
dyn_array_create(&n_diff_boundaries[i]);
}
}
- /* craft a record that is always smaller than the others,
- this way we are sure that the cursor pcur will be positioned
- on the leftmost record on the leftmost page on the desired level */
- dtuple = dtuple_create(heap, dict_index_get_n_unique(index));
- dict_table_copy_types(dtuple, index->table);
- dtuple_set_info_bits(dtuple, REC_INFO_MIN_REC_FLAG);
-
- mtr_start(&mtr);
+ /* Position pcur on the leftmost record on the leftmost page
+ on the desired level. */
- btr_pcur_open_low(index, level, dtuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
- &pcur, __FILE__, __LINE__, &mtr);
+ btr_pcur_open_at_index_side(
+ true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
+ &pcur, true, level, mtr);
+ btr_pcur_move_to_next_on_page(&pcur);
page = btr_pcur_get_page(&pcur);
+ /* The page must not be empty, except when
+ it is the root page (and the whole index is empty). */
+ ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
+ ut_ad(btr_pcur_get_rec(&pcur)
+ == page_rec_get_next_const(page_get_infimum_rec(page)));
+
/* check that we are indeed on the desired level */
- ut_a(btr_page_get_level(page, &mtr) == level);
+ ut_a(btr_page_get_level(page, mtr) == level);
/* there should not be any pages on the left */
- ut_a(btr_page_get_prev(page, &mtr) == FIL_NULL);
+ ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
/* check whether the first record on the leftmost page is marked
as such, if we are on a non-leaf level */
- ut_a(level == 0
- || (REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
- page_rec_get_next_const(page_get_infimum_rec(page)),
- page_is_comp(page))));
-
- if (btr_pcur_is_before_first_on_page(&pcur)) {
- btr_pcur_move_to_next_on_page(&pcur);
- }
-
- if (btr_pcur_is_after_last_on_page(&pcur)) {
- btr_pcur_move_to_prev_on_page(&pcur);
- }
+ ut_a((level == 0)
+ == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+ btr_pcur_get_rec(&pcur), page_is_comp(page))));
prev_rec = NULL;
+ prev_rec_is_copied = false;
/* no records by default */
*total_recs = 0;
@@ -476,56 +1030,83 @@ dict_stats_analyze_index_level(
X and the fist on page X+1 */
for (;
btr_pcur_is_on_user_rec(&pcur);
- btr_pcur_move_to_next_user_rec(&pcur, &mtr)) {
+ btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
ulint matched_fields = 0;
ulint matched_bytes = 0;
- ulint offsets_rec_onstack[REC_OFFS_NORMAL_SIZE];
- ulint* offsets_rec;
-
- rec_offs_init(offsets_rec_onstack);
+ bool rec_is_last_on_page;
rec = btr_pcur_get_rec(&pcur);
+ /* If rec and prev_rec are on different pages, then prev_rec
+ must have been copied, because we hold latch only on the page
+ where rec resides. */
+ if (prev_rec != NULL
+ && page_align(rec) != page_align(prev_rec)) {
+
+ ut_a(prev_rec_is_copied);
+ }
+
+ rec_is_last_on_page =
+ page_rec_is_supremum(page_rec_get_next_const(rec));
+
/* increment the pages counter at the end of each page */
- if (page_rec_is_supremum(page_rec_get_next_const(rec))) {
+ if (rec_is_last_on_page) {
(*total_pages)++;
}
- /* skip delete-marked records */
- if (rec_get_deleted_flag(rec, page_is_comp(
- btr_pcur_get_page(&pcur)))) {
+ /* Skip delete-marked records on the leaf level. If we
+ do not skip them, then ANALYZE quickly after DELETE
+ could count them or not (purge may have already wiped
+ them away) which brings non-determinism. We skip only
+ leaf-level delete marks because delete marks on
+ non-leaf level do not make sense. */
+ if (level == 0 &&
+ rec_get_deleted_flag(
+ rec,
+ page_is_comp(btr_pcur_get_page(&pcur)))) {
+
+ if (rec_is_last_on_page
+ && !prev_rec_is_copied
+ && prev_rec != NULL) {
+ /* copy prev_rec */
+
+ prev_rec_offsets = rec_get_offsets(
+ prev_rec, index, prev_rec_offsets,
+ n_uniq, &heap);
+
+ prev_rec = rec_copy_prefix_to_buf(
+ prev_rec, index,
+ rec_offs_n_fields(prev_rec_offsets),
+ &prev_rec_buf, &prev_rec_buf_size);
+
+ prev_rec_is_copied = true;
+ }
continue;
}
- offsets_rec = rec_get_offsets(rec, index, offsets_rec_onstack,
- n_uniq, &heap);
+ rec_offsets = rec_get_offsets(
+ rec, index, rec_offsets, n_uniq, &heap);
(*total_recs)++;
if (prev_rec != NULL) {
-
- ulint offsets_prev_rec_onstack[REC_OFFS_NORMAL_SIZE];
- ulint* offsets_prev_rec;
-
- rec_offs_init(offsets_prev_rec_onstack);
-
- offsets_prev_rec = rec_get_offsets(
- prev_rec, index, offsets_prev_rec_onstack,
+ prev_rec_offsets = rec_get_offsets(
+ prev_rec, index, prev_rec_offsets,
n_uniq, &heap);
cmp_rec_rec_with_match(rec,
prev_rec,
- offsets_rec,
- offsets_prev_rec,
+ rec_offsets,
+ prev_rec_offsets,
index,
FALSE,
&matched_fields,
&matched_bytes);
- for (i = matched_fields + 1; i <= n_uniq; i++) {
+ for (i = matched_fields; i < n_uniq; i++) {
if (n_diff_boundaries != NULL) {
/* push the index of the previous
@@ -553,17 +1134,18 @@ dict_stats_analyze_index_level(
}
/* increment the number of different keys
- for n_prefix=i */
+ for n_prefix=i+1 (e.g. if i=0 then we increment
+ for n_prefix=1 which is stored in n_diff[0]) */
n_diff[i]++;
}
} else {
/* this is the first non-delete marked record */
- for (i = 1; i <= n_uniq; i++) {
+ for (i = 0; i < n_uniq; i++) {
n_diff[i] = 1;
}
}
- if (page_rec_is_supremum(page_rec_get_next_const(rec))) {
+ if (rec_is_last_on_page) {
/* end of a page has been reached */
/* we need to copy the record instead of assigning
@@ -574,8 +1156,9 @@ dict_stats_analyze_index_level(
btr_pcur_move_to_next_user_rec() will release the
latch on the page that prev_rec is on */
prev_rec = rec_copy_prefix_to_buf(
- rec, index, rec_offs_n_fields(offsets_rec),
+ rec, index, rec_offs_n_fields(rec_offsets),
&prev_rec_buf, &prev_rec_buf_size);
+ prev_rec_is_copied = true;
} else {
/* still on the same page, the next call to
@@ -584,12 +1167,14 @@ dict_stats_analyze_index_level(
instead of copying the records like above */
prev_rec = rec;
+ prev_rec_is_copied = false;
}
}
/* if *total_pages is left untouched then the above loop was not
entered at all and there is one page in the whole tree which is
- empty */
+ empty or the loop was entered but this is level 0, contains one page
+ and all records are delete-marked */
if (*total_pages == 0) {
ut_ad(level == 0);
@@ -605,7 +1190,7 @@ dict_stats_analyze_index_level(
/* remember the index of the last record on the level as the
last one from the last group of equal keys; this holds for
all possible prefixes */
- for (i = 1; i <= n_uniq; i++) {
+ for (i = 0; i < n_uniq; i++) {
void* p;
ib_uint64_t idx;
@@ -619,10 +1204,10 @@ dict_stats_analyze_index_level(
}
/* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
- for i=1..n_uniq */
+ for i=0..n_uniq-1 */
#ifdef UNIV_STATS_DEBUG
- for (i = 1; i <= n_uniq; i++) {
+ for (i = 0; i < n_uniq; i++) {
DEBUG_PRINTF(" %s(): total recs: " UINT64PF
", total pages: " UINT64PF
@@ -654,9 +1239,11 @@ dict_stats_analyze_index_level(
}
#endif /* UNIV_STATS_DEBUG */
- btr_pcur_close(&pcur);
+ /* Release the latch on the last page, because that is not done by
+ btr_pcur_close(). This function works also for non-leaf pages. */
+ btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
- mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
if (prev_rec_buf != NULL) {
@@ -665,15 +1252,16 @@ dict_stats_analyze_index_level(
mem_heap_free(heap);
}
-/* @} */
/* aux enum for controlling the behavior of dict_stats_scan_page() @{ */
-typedef enum page_scan_method_enum {
- COUNT_ALL_NON_BORING, /* scan all records on the given page
- and count the number of distinct ones */
+enum page_scan_method_t {
+ COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED,/* scan all records on
+ the given page and count the number of
+ distinct ones, also ignore delete marked
+ records */
QUIT_ON_FIRST_NON_BORING/* quit when the first record that differs
from its right neighbor is found */
-} page_scan_method_t;
+};
/* @} */
/*********************************************************************//**
@@ -715,11 +1303,18 @@ dict_stats_scan_page(
Because offsets1,offsets2 should be big enough,
this memory heap should never be used. */
mem_heap_t* heap = NULL;
+ const rec_t* (*get_next)(const rec_t*);
- rec = page_rec_get_next_const(page_get_infimum_rec(page));
+ if (scan_method == COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED) {
+ get_next = page_rec_get_next_non_del_marked;
+ } else {
+ get_next = page_rec_get_next_const;
+ }
+
+ rec = get_next(page_get_infimum_rec(page));
if (page_rec_is_supremum(rec)) {
- /* the page is empty */
+ /* the page is empty or contains only delete-marked records */
*n_diff = 0;
*out_rec = NULL;
return(NULL);
@@ -728,7 +1323,7 @@ dict_stats_scan_page(
offsets_rec = rec_get_offsets(rec, index, offsets_rec,
ULINT_UNDEFINED, &heap);
- next_rec = page_rec_get_next_const(rec);
+ next_rec = get_next(rec);
*n_diff = 1;
@@ -777,7 +1372,8 @@ dict_stats_scan_page(
offsets_rec = offsets_next_rec;
offsets_next_rec = offsets_tmp;
}
- next_rec = page_rec_get_next_const(next_rec);
+
+ next_rec = get_next(next_rec);
}
func_exit:
@@ -814,7 +1410,6 @@ dict_stats_analyze_index_below_cur(
ulint* offsets1;
ulint* offsets2;
ulint* offsets_rec;
- ulint root_height;
ib_uint64_t n_diff; /* the result */
ulint size;
@@ -841,8 +1436,6 @@ dict_stats_analyze_index_below_cur(
rec_offs_set_n_alloc(offsets1, size);
rec_offs_set_n_alloc(offsets2, size);
- root_height = btr_page_get_level(btr_root_get(index, mtr), mtr);
-
space = dict_index_get_space(index);
zip_size = dict_table_zip_size(index->table);
@@ -907,14 +1500,7 @@ dict_stats_analyze_index_below_cur(
offsets_rec = dict_stats_scan_page(
&rec, offsets1, offsets2, index, page, n_prefix,
- COUNT_ALL_NON_BORING, &n_diff);
-
- if (root_height > 0) {
-
- /* empty pages are allowed only if the whole B-tree is empty
- and contains a single empty page */
- ut_a(offsets_rec != NULL);
- }
+ COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, &n_diff);
#if 0
DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n",
@@ -928,42 +1514,40 @@ dict_stats_analyze_index_below_cur(
/* @} */
/*********************************************************************//**
-For a given level in an index select srv_stats_persistent_sample_pages
+For a given level in an index select N_SAMPLE_PAGES(index)
(or less) records from that level and dive below them to the corresponding
leaf pages, then scan those leaf pages and save the sampling results in
-index->stat_n_diff_key_vals[n_prefix] and the number of pages scanned in
-index->stat_n_sample_sizes[n_prefix].
-dict_stats_analyze_index_for_n_prefix() @{ */
+index->stat_n_diff_key_vals[n_prefix - 1] and the number of pages scanned in
+index->stat_n_sample_sizes[n_prefix - 1]. */
static
void
dict_stats_analyze_index_for_n_prefix(
/*==================================*/
- dict_index_t* index, /*!< in/out: index */
- ulint level, /*!< in: level,
- must be >= 1 */
- ib_uint64_t total_recs_on_level, /*!< in: total number of
- records on the given level */
- ulint n_prefix, /*!< in: look at first
- n_prefix columns when
- comparing records */
- ib_uint64_t n_diff_for_this_prefix, /*!< in: number of distinct
- records on the given level,
- when looking at the first
- n_prefix columns */
- dyn_array_t* boundaries) /*!< in: array that contains
- n_diff_for_this_prefix
- integers each of which
- represents the index (on the
- level, counting from
- left/smallest to right/biggest
- from 0) of the last record
- from each group of distinct
- keys */
+ dict_index_t* index, /*!< in/out: index */
+ ulint level, /*!< in: level, must be >= 1 */
+ ib_uint64_t total_recs_on_level,
+ /*!< in: total number of
+ records on the given level */
+ ulint n_prefix, /*!< in: look at first
+ n_prefix columns when
+ comparing records */
+ ib_uint64_t n_diff_for_this_prefix,
+ /*!< in: number of distinct
+ records on the given level,
+ when looking at the first
+ n_prefix columns */
+ dyn_array_t* boundaries, /*!< in: array that contains
+ n_diff_for_this_prefix
+ integers each of which
+ represents the index (on the
+ level, counting from
+ left/smallest to right/biggest
+ from 0) of the last record
+ from each group of distinct
+ keys */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
- mem_heap_t* heap;
- dtuple_t* dtuple;
btr_pcur_t pcur;
- mtr_t mtr;
const page_t* page;
ib_uint64_t rec_idx;
ib_uint64_t last_idx_on_level;
@@ -978,51 +1562,45 @@ dict_stats_analyze_index_for_n_prefix(
n_prefix, n_diff_for_this_prefix);
#endif
+ ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
+ MTR_MEMO_S_LOCK));
+
/* if some of those is 0 then this means that there is exactly one
page in the B-tree and it is empty and we should have done full scan
and should not be here */
ut_ad(total_recs_on_level > 0);
ut_ad(n_diff_for_this_prefix > 0);
- /* this is configured to be min 1, someone has changed the code */
- ut_ad(srv_stats_persistent_sample_pages > 0);
+ /* this must be at least 1 */
+ ut_ad(N_SAMPLE_PAGES(index) > 0);
- heap = mem_heap_create(256);
+ /* Position pcur on the leftmost record on the leftmost page
+ on the desired level. */
- /* craft a record that is always smaller than the others,
- this way we are sure that the cursor pcur will be positioned
- on the leftmost record on the leftmost page on the desired level */
- dtuple = dtuple_create(heap, dict_index_get_n_unique(index));
- dict_table_copy_types(dtuple, index->table);
- dtuple_set_info_bits(dtuple, REC_INFO_MIN_REC_FLAG);
-
- mtr_start(&mtr);
-
- btr_pcur_open_low(index, level, dtuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
- &pcur, __FILE__, __LINE__, &mtr);
+ btr_pcur_open_at_index_side(
+ true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
+ &pcur, true, level, mtr);
+ btr_pcur_move_to_next_on_page(&pcur);
page = btr_pcur_get_page(&pcur);
+ /* The page must not be empty, except when
+ it is the root page (and the whole index is empty). */
+ ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
+ ut_ad(btr_pcur_get_rec(&pcur)
+ == page_rec_get_next_const(page_get_infimum_rec(page)));
+
/* check that we are indeed on the desired level */
- ut_a(btr_page_get_level(page, &mtr) == level);
+ ut_a(btr_page_get_level(page, mtr) == level);
/* there should not be any pages on the left */
- ut_a(btr_page_get_prev(page, &mtr) == FIL_NULL);
+ ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
/* check whether the first record on the leftmost page is marked
as such, if we are on a non-leaf level */
- ut_a(level == 0 || REC_INFO_MIN_REC_FLAG
- & rec_get_info_bits(page_rec_get_next_const(
- page_get_infimum_rec(page)),
- page_is_comp(page)));
-
- if (btr_pcur_is_before_first_on_page(&pcur)) {
- btr_pcur_move_to_next_on_page(&pcur);
- }
-
- if (btr_pcur_is_after_last_on_page(&pcur)) {
- btr_pcur_move_to_prev_on_page(&pcur);
- }
+ ut_a((level == 0)
+ == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
+ btr_pcur_get_rec(&pcur), page_is_comp(page))));
last_idx_on_level = *(ib_uint64_t*) dyn_array_get_element(boundaries,
(ulint) ((n_diff_for_this_prefix - 1) * sizeof(ib_uint64_t)));
@@ -1031,7 +1609,7 @@ dict_stats_analyze_index_for_n_prefix(
n_diff_sum_of_all_analyzed_pages = 0;
- n_recs_to_dive_below = ut_min(srv_stats_persistent_sample_pages,
+ n_recs_to_dive_below = ut_min(N_SAMPLE_PAGES(index),
n_diff_for_this_prefix);
for (i = 0; i < n_recs_to_dive_below; i++) {
@@ -1093,7 +1671,7 @@ dict_stats_analyze_index_for_n_prefix(
while (rec_idx < dive_below_idx
&& btr_pcur_is_on_user_rec(&pcur)) {
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ btr_pcur_move_to_next_user_rec(&pcur, mtr);
rec_idx++;
}
@@ -1107,12 +1685,20 @@ dict_stats_analyze_index_for_n_prefix(
break;
}
+ /* it could be that the tree has changed in such a way that
+ the record under dive_below_idx is the supremum record, in
+ this case rec_idx == dive_below_idx and pcur is positioned
+ on the supremum, we do not want to dive below it */
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ break;
+ }
+
ut_a(rec_idx == dive_below_idx);
ib_uint64_t n_diff_on_leaf_page;
n_diff_on_leaf_page = dict_stats_analyze_index_below_cur(
- btr_pcur_get_btr_cur(&pcur), n_prefix, &mtr);
+ btr_pcur_get_btr_cur(&pcur), n_prefix, mtr);
/* We adjust n_diff_on_leaf_page here to avoid counting
one record twice - once as the last on some page and once
@@ -1135,12 +1721,13 @@ dict_stats_analyze_index_for_n_prefix(
n_diff_sum_of_all_analyzed_pages += n_diff_on_leaf_page;
}
- if (n_diff_sum_of_all_analyzed_pages == 0) {
- n_diff_sum_of_all_analyzed_pages = 1;
- }
+ /* n_diff_sum_of_all_analyzed_pages can be 0 here if all the leaf
+ pages sampled contained only delete-marked records. In this case
+ we should assign 0 to index->stat_n_diff_key_vals[n_prefix - 1], which
+ the formula below does. */
/* See REF01 for an explanation of the algorithm */
- index->stat_n_diff_key_vals[n_prefix]
+ index->stat_n_diff_key_vals[n_prefix - 1]
= index->stat_n_leaf_pages
* n_diff_for_this_prefix
@@ -1149,31 +1736,25 @@ dict_stats_analyze_index_for_n_prefix(
* n_diff_sum_of_all_analyzed_pages
/ n_recs_to_dive_below;
- index->stat_n_sample_sizes[n_prefix] = n_recs_to_dive_below;
+ index->stat_n_sample_sizes[n_prefix - 1] = n_recs_to_dive_below;
DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu "
"(%lu"
" * " UINT64PF " / " UINT64PF
" * " UINT64PF " / " UINT64PF ")\n",
- __func__, index->stat_n_diff_key_vals[n_prefix],
+ __func__, index->stat_n_diff_key_vals[n_prefix - 1],
n_prefix,
index->stat_n_leaf_pages,
n_diff_for_this_prefix, total_recs_on_level,
n_diff_sum_of_all_analyzed_pages, n_recs_to_dive_below);
btr_pcur_close(&pcur);
-
- mtr_commit(&mtr);
-
- mem_heap_free(heap);
}
-/* @} */
/*********************************************************************//**
Calculates new statistics for a given index and saves them to the index
members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
-stat_n_leaf_pages. This function could be slow.
-dict_stats_analyze_index() @{ */
+stat_n_leaf_pages. This function could be slow. */
static
void
dict_stats_analyze_index(
@@ -1182,7 +1763,7 @@ dict_stats_analyze_index(
{
ulint root_level;
ulint level;
- ibool level_is_analyzed;
+ bool level_is_analyzed;
ulint n_uniq;
ulint n_prefix;
ib_uint64_t* n_diff_on_level;
@@ -1191,10 +1772,11 @@ dict_stats_analyze_index(
dyn_array_t* n_diff_boundaries;
mtr_t mtr;
ulint size;
- ulint i;
DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name);
+ dict_stats_empty_index(index);
+
mtr_start(&mtr);
mtr_s_lock(dict_index_get_lock(index), &mtr);
@@ -1206,19 +1788,12 @@ dict_stats_analyze_index(
size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
}
+ /* Release the X locks on the root page taken by btr_get_size() */
+ mtr_commit(&mtr);
+
switch (size) {
case ULINT_UNDEFINED:
- mtr_commit(&mtr);
- /* Fake some statistics. */
- index->stat_index_size = index->stat_n_leaf_pages = 1;
-
- for (i = dict_index_get_n_unique(index); i; ) {
- index->stat_n_diff_key_vals[i--] = 1;
- }
-
- memset(index->stat_n_non_null_key_vals, 0,
- (1 + dict_index_get_n_unique(index))
- * sizeof(*index->stat_n_non_null_key_vals));
+ dict_stats_assert_initialized_index(index);
return;
case 0:
/* The root node of the tree is a leaf */
@@ -1227,23 +1802,25 @@ dict_stats_analyze_index(
index->stat_n_leaf_pages = size;
- root_level = btr_page_get_level(btr_root_get(index, &mtr), &mtr);
+ mtr_start(&mtr);
+
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
- mtr_commit(&mtr);
+ root_level = btr_height_get(index, &mtr);
n_uniq = dict_index_get_n_unique(index);
- /* if the tree has just one level (and one page) or if the user
- has requested to sample too many pages then do full scan */
+ /* If the tree has just one level (and one page) or if the user
+ has requested to sample too many pages then do full scan.
+
+ For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
+ will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf
+ pages will be sampled. If that number is bigger than the total
+ number of leaf pages then do full scan of the leaf level instead
+ since it will be faster and will give better results. */
+
if (root_level == 0
- /* for each n-column prefix (for n=1..n_uniq)
- srv_stats_persistent_sample_pages will be sampled, so in total
- srv_stats_persistent_sample_pages * n_uniq leaf pages will be
- sampled. If that number is bigger than the total number of leaf
- pages then do full scan of the leaf level instead since it will
- be faster and will give better results. */
- || srv_stats_persistent_sample_pages * n_uniq
- > index->stat_n_leaf_pages) {
+ || N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) {
if (root_level == 0) {
DEBUG_PRINTF(" %s(): just one page, "
@@ -1261,27 +1838,28 @@ dict_stats_analyze_index(
index->stat_n_diff_key_vals,
&total_recs,
&total_pages,
- NULL /*boundaries not needed*/);
+ NULL /* boundaries not needed */,
+ &mtr);
- for (i = 1; i <= n_uniq; i++) {
+ for (ulint i = 0; i < n_uniq; i++) {
index->stat_n_sample_sizes[i] = total_pages;
}
+ mtr_commit(&mtr);
+
+ dict_stats_assert_initialized_index(index);
return;
}
- /* else */
/* set to zero */
- n_diff_on_level = (ib_uint64_t*) mem_zalloc((n_uniq + 1)
- * sizeof(ib_uint64_t));
+ n_diff_on_level = reinterpret_cast<ib_uint64_t*>
+ (mem_zalloc(n_uniq * sizeof(ib_uint64_t)));
- n_diff_boundaries = (dyn_array_t*) mem_alloc((n_uniq + 1)
- * sizeof(dyn_array_t));
+ n_diff_boundaries = reinterpret_cast<dyn_array_t*>
+ (mem_alloc(n_uniq * sizeof(dyn_array_t)));
- for (i = 1; i <= n_uniq; i++) {
- /* initialize the dynamic arrays, the first one
- (index=0) is ignored to follow the same indexing
- scheme as n_diff_on_level[] */
+ for (ulint i = 0; i < n_uniq; i++) {
+ /* initialize the dynamic arrays */
dyn_array_create(&n_diff_boundaries[i]);
}
@@ -1299,25 +1877,42 @@ dict_stats_analyze_index(
So if we find that the first level containing D distinct
keys (on n_prefix columns) is L, we continue from L when
searching for D distinct keys on n_prefix-1 columns. */
- level = (long) root_level;
- level_is_analyzed = FALSE;
+ level = root_level;
+ level_is_analyzed = false;
+
for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
DEBUG_PRINTF(" %s(): searching level with >=%llu "
"distinct records, n_prefix=%lu\n",
- __func__, N_DIFF_REQUIRED, n_prefix);
+ __func__, N_DIFF_REQUIRED(index), n_prefix);
+
+ /* Commit the mtr to release the tree S lock to allow
+ other threads to do some work too. */
+ mtr_commit(&mtr);
+ mtr_start(&mtr);
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ if (root_level != btr_height_get(index, &mtr)) {
+ /* Just quit if the tree has changed beyond
+ recognition here. The old stats from previous
+ runs will remain in the values that we have
+ not calculated yet. Initially when the index
+ object is created the stats members are given
+ some sensible values so leaving them untouched
+ here even the first time will not cause us to
+ read uninitialized memory later. */
+ break;
+ }
/* check whether we should pick the current level;
we pick level 1 even if it does not have enough
distinct records because we do not want to scan the
leaf level because it may contain too many records */
if (level_is_analyzed
- && (n_diff_on_level[n_prefix] >= N_DIFF_REQUIRED
+ && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index)
|| level == 1)) {
goto found_level;
}
- /* else */
/* search for a level that contains enough distinct records */
@@ -1325,12 +1920,14 @@ dict_stats_analyze_index(
/* if this does not hold we should be on
"found_level" instead of here */
- ut_ad(n_diff_on_level[n_prefix] < N_DIFF_REQUIRED);
+ ut_ad(n_diff_on_level[n_prefix - 1]
+ < N_DIFF_REQUIRED(index));
level--;
- level_is_analyzed = FALSE;
+ level_is_analyzed = false;
}
+ /* descend into the tree, searching for "good enough" level */
for (;;) {
/* make sure we do not scan the leaf level
@@ -1349,18 +1946,19 @@ dict_stats_analyze_index(
total_recs is left from the previous iteration when
we scanned one level upper or we have not scanned any
levels yet in which case total_recs is 1. */
- if (total_recs > srv_stats_persistent_sample_pages) {
+ if (total_recs > N_SAMPLE_PAGES(index)) {
- /* if the above cond is true then we are not
- at the root level since on the root level
- total_recs == 1 and cannot
- be > srv_stats_persistent_sample_pages */
+ /* if the above cond is true then we are
+ not at the root level since on the root
+ level total_recs == 1 (set before we
+ enter the n-prefix loop) and cannot
+ be > N_SAMPLE_PAGES(index) */
ut_a(level != root_level);
/* step one level back and be satisfied with
whatever it contains */
level++;
- level_is_analyzed = TRUE;
+ level_is_analyzed = true;
break;
}
@@ -1370,27 +1968,28 @@ dict_stats_analyze_index(
n_diff_on_level,
&total_recs,
&total_pages,
- n_diff_boundaries);
+ n_diff_boundaries,
+ &mtr);
- level_is_analyzed = TRUE;
+ level_is_analyzed = true;
- if (n_diff_on_level[n_prefix] >= N_DIFF_REQUIRED
+ if (n_diff_on_level[n_prefix - 1]
+ >= N_DIFF_REQUIRED(index)
|| level == 1) {
/* we found a good level with many distinct
records or we have reached the last level we
could scan */
break;
}
- /* else */
level--;
- level_is_analyzed = FALSE;
+ level_is_analyzed = false;
}
found_level:
DEBUG_PRINTF(" %s(): found level %lu that has " UINT64PF
" distinct records for n_prefix=%lu\n",
- __func__, level, n_diff_on_level[n_prefix],
+ __func__, level, n_diff_on_level[n_prefix - 1],
n_prefix);
/* here we are either on level 1 or the level that we are on
@@ -1406,28 +2005,30 @@ found_level:
dict_stats_analyze_index_for_n_prefix(
index, level, total_recs, n_prefix,
- n_diff_on_level[n_prefix],
- &n_diff_boundaries[n_prefix]);
+ n_diff_on_level[n_prefix - 1],
+ &n_diff_boundaries[n_prefix - 1], &mtr);
}
- for (i = 1; i <= n_uniq; i++) {
+ mtr_commit(&mtr);
+
+ for (ulint i = 0; i < n_uniq; i++) {
dyn_array_free(&n_diff_boundaries[i]);
}
mem_free(n_diff_boundaries);
mem_free(n_diff_on_level);
+
+ dict_stats_assert_initialized_index(index);
}
-/* @} */
/*********************************************************************//**
Calculates new estimates for table and index statistics. This function
is relatively slow and is used to calculate persistent statistics that
will be saved on disk.
-dict_stats_update_persistent() @{
@return DB_SUCCESS or error code */
static
-enum db_err
+dberr_t
dict_stats_update_persistent(
/*=========================*/
dict_table_t* table) /*!< in/out: table */
@@ -1436,21 +2037,30 @@ dict_stats_update_persistent(
DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
- /* XXX quit if interrupted, e.g. SIGTERM */
+ dict_table_stats_lock(table, RW_X_LATCH);
/* analyze the clustered index first */
index = dict_table_get_first_index(table);
- if (index == NULL) {
+ if (index == NULL
+ || dict_index_is_corrupted(index)
+ || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
+
/* Table definition is corrupt */
+ dict_table_stats_unlock(table, RW_X_LATCH);
+ dict_stats_empty_table(table);
+
return(DB_CORRUPTION);
}
+ ut_ad(!dict_index_is_univ(index));
+
dict_stats_analyze_index(index);
- table->stat_n_rows
- = index->stat_n_diff_key_vals[dict_index_get_n_unique(index)];
+ ulint n_unique = dict_index_get_n_unique(index);
+
+ table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - 1];
table->stat_clustered_index_size = index->stat_index_size;
@@ -1462,31 +2072,47 @@ dict_stats_update_persistent(
index != NULL;
index = dict_table_get_next_index(index)) {
+ ut_ad(!dict_index_is_univ(index));
+
if (index->type & DICT_FTS) {
continue;
}
- dict_stats_analyze_index(index);
+ dict_stats_empty_index(index);
+
+ if (dict_stats_should_ignore_index(index)) {
+ continue;
+ }
+
+ if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
+ dict_stats_analyze_index(index);
+ }
table->stat_sum_of_other_index_sizes
+= index->stat_index_size;
}
+ table->stats_last_recalc = ut_time();
+
table->stat_modified_counter = 0;
table->stat_initialized = TRUE;
+ dict_stats_assert_initialized(table);
+
+ dict_table_stats_unlock(table, RW_X_LATCH);
+
return(DB_SUCCESS);
}
-/* @} */
+#include "mysql_com.h"
/*********************************************************************//**
Save an individual index's statistic into the persistent statistics
storage.
dict_stats_save_index_stat() @{
@return DB_SUCCESS or error code */
static
-enum db_err
+dberr_t
dict_stats_save_index_stat(
/*=======================*/
dict_index_t* index, /*!< in: index */
@@ -1494,95 +2120,114 @@ dict_stats_save_index_stat(
const char* stat_name, /*!< in: name of the stat */
ib_uint64_t stat_value, /*!< in: value of the stat */
ib_uint64_t* sample_size, /*!< in: n pages sampled or NULL */
- const char* stat_description,/*!< in: description of the stat */
- trx_t* trx, /*!< in/out: transaction to use */
- ibool caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
- owns dict_sys->mutex */
+ const char* stat_description)/*!< in: description of the stat */
{
pars_info_t* pinfo;
- enum db_err ret;
+ dberr_t ret;
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
- pinfo = pars_info_create();
-
- pars_info_add_literal(pinfo, "database_name", index->table->name,
- dict_get_db_name_len(index->table->name),
- DATA_VARCHAR, 0);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(mutex_own(&dict_sys->mutex));
- pars_info_add_str_literal(pinfo, "table_name",
- dict_remove_db_name(index->table->name));
+ dict_fs2utf8(index->table->name, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+ pinfo = pars_info_create();
+ pars_info_add_str_literal(pinfo, "database_name", db_utf8);
+ pars_info_add_str_literal(pinfo, "table_name", table_utf8);
+ UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name));
pars_info_add_str_literal(pinfo, "index_name", index->name);
-
+ UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
pars_info_add_int4_literal(pinfo, "last_update", last_update);
-
+ UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
pars_info_add_str_literal(pinfo, "stat_name", stat_name);
-
+ UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
-
if (sample_size != NULL) {
+ UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8);
pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
} else {
pars_info_add_literal(pinfo, "sample_size", NULL,
UNIV_SQL_NULL, DATA_FIXBINARY, 0);
}
-
+ UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description));
pars_info_add_str_literal(pinfo, "stat_description",
stat_description);
- ret = que_eval_sql(pinfo,
- "PROCEDURE INDEX_STATS_SAVE () IS\n"
- "dummy CHAR;\n"
- "BEGIN\n"
-
- "SELECT database_name INTO dummy\n"
- "FROM \"" INDEX_STATS_NAME "\"\n"
- "WHERE\n"
- "database_name = :database_name AND\n"
- "table_name = :table_name AND\n"
- "index_name = :index_name AND\n"
- "stat_name = :stat_name\n"
- "FOR UPDATE;\n"
-
- "IF (SQL % NOTFOUND) THEN\n"
- " INSERT INTO \"" INDEX_STATS_NAME "\"\n"
- " VALUES\n"
- " (\n"
- " :database_name,\n"
- " :table_name,\n"
- " :index_name,\n"
- " :last_update,\n"
- " :stat_name,\n"
- " :stat_value,\n"
- " :sample_size,\n"
- " :stat_description\n"
- " );\n"
- "ELSE\n"
- " UPDATE \"" INDEX_STATS_NAME "\" SET\n"
- " last_update = :last_update,\n"
- " stat_value = :stat_value,\n"
- " sample_size = :sample_size,\n"
- " stat_description = :stat_description\n"
- " WHERE\n"
- " database_name = :database_name AND\n"
- " table_name = :table_name AND\n"
- " index_name = :index_name AND\n"
- " stat_name = :stat_name;\n"
- "END IF;\n"
- "END;",
- !caller_has_dict_sys_mutex, trx);
-
- /* pinfo is freed by que_eval_sql() */
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE INDEX_STATS_SAVE_INSERT () IS\n"
+ "BEGIN\n"
+ "INSERT INTO \"" INDEX_STATS_NAME "\"\n"
+ "VALUES\n"
+ "(\n"
+ ":database_name,\n"
+ ":table_name,\n"
+ ":index_name,\n"
+ ":last_update,\n"
+ ":stat_name,\n"
+ ":stat_value,\n"
+ ":sample_size,\n"
+ ":stat_description\n"
+ ");\n"
+ "END;");
+
+ if (ret == DB_DUPLICATE_KEY) {
+
+ pinfo = pars_info_create();
+ pars_info_add_str_literal(pinfo, "database_name", db_utf8);
+ pars_info_add_str_literal(pinfo, "table_name", table_utf8);
+ UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name));
+ pars_info_add_str_literal(pinfo, "index_name", index->name);
+ UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
+ pars_info_add_int4_literal(pinfo, "last_update", last_update);
+ UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
+ pars_info_add_str_literal(pinfo, "stat_name", stat_name);
+ UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
+ pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
+ if (sample_size != NULL) {
+ UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8);
+ pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
+ } else {
+ pars_info_add_literal(pinfo, "sample_size", NULL,
+ UNIV_SQL_NULL, DATA_FIXBINARY, 0);
+ }
+ UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description));
+ pars_info_add_str_literal(pinfo, "stat_description",
+ stat_description);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE INDEX_STATS_SAVE_UPDATE () IS\n"
+ "BEGIN\n"
+ "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
+ "last_update = :last_update,\n"
+ "stat_value = :stat_value,\n"
+ "sample_size = :sample_size,\n"
+ "stat_description = :stat_description\n"
+ "WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name AND\n"
+ "index_name = :index_name AND\n"
+ "stat_name = :stat_name;\n"
+ "END;");
+ }
if (ret != DB_SUCCESS) {
+ char buf_table[MAX_FULL_NAME_LEN];
+ char buf_index[MAX_FULL_NAME_LEN];
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: Error while trying to save index "
- "statistics for table %s, index %s, "
- "stat name %s: %s\n",
- index->table->name, index->name,
+ " InnoDB: Cannot save index statistics for table "
+ "%s, index %s, stat name \"%s\": %s\n",
+ ut_format_name(index->table->name, TRUE,
+ buf_table, sizeof(buf_table)),
+ ut_format_name(index->name, FALSE,
+ buf_index, sizeof(buf_index)),
stat_name, ut_strerr(ret));
-
- trx->error_state = DB_SUCCESS;
}
return(ret);
@@ -1594,196 +2239,165 @@ Save the table's statistics into the persistent statistics storage.
dict_stats_save() @{
@return DB_SUCCESS or error code */
static
-enum db_err
+dberr_t
dict_stats_save(
/*============*/
- dict_table_t* table, /*!< in: table */
- ibool caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
- owns dict_sys->mutex */
+ dict_table_t* table_orig) /*!< in: table */
{
- trx_t* trx;
pars_info_t* pinfo;
- dict_index_t* index;
lint now;
- enum db_err ret;
+ dberr_t ret;
+ dict_table_t* table;
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+
+ table = dict_stats_snapshot_create(table_orig);
+
+ dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+
+ rw_lock_x_lock(&dict_operation_lock);
+ mutex_enter(&dict_sys->mutex);
/* MySQL's timestamp is 4 byte, so we use
pars_info_add_int4_literal() which takes a lint arg, so "now" is
lint */
now = (lint) ut_time();
- trx = trx_allocate_for_background();
-
- /* Use 'read-uncommitted' so that the SELECTs we execute
- do not get blocked in case some user has locked the rows we
- are SELECTing */
-
- trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
-
- trx_start_if_not_started(trx);
+#define PREPARE_PINFO_FOR_TABLE_SAVE(p, t, n) \
+ do { \
+ pars_info_add_str_literal((p), "database_name", db_utf8); \
+ pars_info_add_str_literal((p), "table_name", table_utf8); \
+ pars_info_add_int4_literal((p), "last_update", (n)); \
+ pars_info_add_ull_literal((p), "n_rows", (t)->stat_n_rows); \
+ pars_info_add_ull_literal((p), "clustered_index_size", \
+ (t)->stat_clustered_index_size); \
+ pars_info_add_ull_literal((p), "sum_of_other_index_sizes", \
+ (t)->stat_sum_of_other_index_sizes); \
+ } while(false);
pinfo = pars_info_create();
- pars_info_add_literal(pinfo, "database_name", table->name,
- dict_get_db_name_len(table->name),
- DATA_VARCHAR, 0);
-
- pars_info_add_str_literal(pinfo, "table_name",
- dict_remove_db_name(table->name));
-
- pars_info_add_int4_literal(pinfo, "last_update", now);
-
- pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
-
- pars_info_add_ull_literal(pinfo, "clustered_index_size",
- table->stat_clustered_index_size);
-
- pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
- table->stat_sum_of_other_index_sizes);
-
- ret = que_eval_sql(pinfo,
- "PROCEDURE TABLE_STATS_SAVE () IS\n"
- "dummy CHAR;\n"
- "BEGIN\n"
-
- "SELECT database_name INTO dummy\n"
- "FROM \"" TABLE_STATS_NAME "\"\n"
- "WHERE\n"
- "database_name = :database_name AND\n"
- "table_name = :table_name\n"
- "FOR UPDATE;\n"
-
- "IF (SQL % NOTFOUND) THEN\n"
- " INSERT INTO \"" TABLE_STATS_NAME "\"\n"
- " VALUES\n"
- " (\n"
- " :database_name,\n"
- " :table_name,\n"
- " :last_update,\n"
- " :n_rows,\n"
- " :clustered_index_size,\n"
- " :sum_of_other_index_sizes\n"
- " );\n"
- "ELSE\n"
- " UPDATE \"" TABLE_STATS_NAME "\" SET\n"
- " last_update = :last_update,\n"
- " n_rows = :n_rows,\n"
- " clustered_index_size = :clustered_index_size,\n"
- " sum_of_other_index_sizes = "
- " :sum_of_other_index_sizes\n"
- " WHERE\n"
- " database_name = :database_name AND\n"
- " table_name = :table_name;\n"
- "END IF;\n"
- "END;",
- !caller_has_dict_sys_mutex, trx);
-
- /* pinfo is freed by que_eval_sql() */
+ PREPARE_PINFO_FOR_TABLE_SAVE(pinfo, table, now);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE TABLE_STATS_SAVE_INSERT () IS\n"
+ "BEGIN\n"
+ "INSERT INTO \"" TABLE_STATS_NAME "\"\n"
+ "VALUES\n"
+ "(\n"
+ ":database_name,\n"
+ ":table_name,\n"
+ ":last_update,\n"
+ ":n_rows,\n"
+ ":clustered_index_size,\n"
+ ":sum_of_other_index_sizes\n"
+ ");\n"
+ "END;");
+
+ if (ret == DB_DUPLICATE_KEY) {
+ pinfo = pars_info_create();
+
+ PREPARE_PINFO_FOR_TABLE_SAVE(pinfo, table, now);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE TABLE_STATS_SAVE_UPDATE () IS\n"
+ "BEGIN\n"
+ "UPDATE \"" TABLE_STATS_NAME "\" SET\n"
+ "last_update = :last_update,\n"
+ "n_rows = :n_rows,\n"
+ "clustered_index_size = :clustered_index_size,\n"
+ "sum_of_other_index_sizes = "
+ " :sum_of_other_index_sizes\n"
+ "WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name;\n"
+ "END;");
+ }
if (ret != DB_SUCCESS) {
-
+ char buf[MAX_FULL_NAME_LEN];
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: Error while trying to save table "
- "statistics for table %s: %s\n",
- table->name, ut_strerr(ret));
-
- goto end_rollback;
+ " InnoDB: Cannot save table statistics for table "
+ "%s: %s\n",
+ ut_format_name(table->name, TRUE, buf, sizeof(buf)),
+ ut_strerr(ret));
+ goto end;
}
+ dict_index_t* index;
+
for (index = dict_table_get_first_index(table);
index != NULL;
index = dict_table_get_next_index(index)) {
- ib_uint64_t stat_n_diff_key_vals[REC_MAX_N_FIELDS];
- ib_uint64_t stat_n_sample_sizes[REC_MAX_N_FIELDS];
- ulint n_uniq;
- ulint i;
+ if (dict_stats_should_ignore_index(index)) {
+ continue;
+ }
+
+ ut_ad(!dict_index_is_univ(index));
ret = dict_stats_save_index_stat(index, now, "size",
index->stat_index_size,
NULL,
"Number of pages "
- "in the index",
- trx,
- caller_has_dict_sys_mutex);
+ "in the index");
if (ret != DB_SUCCESS) {
- goto end_rollback;
+ goto end;
}
ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
index->stat_n_leaf_pages,
NULL,
"Number of leaf pages "
- "in the index",
- trx,
- caller_has_dict_sys_mutex);
+ "in the index");
if (ret != DB_SUCCESS) {
- goto end_rollback;
+ goto end;
}
- n_uniq = dict_index_get_n_unique(index);
-
- ut_ad(n_uniq + 1 <= UT_ARR_SIZE(stat_n_diff_key_vals));
-
- memcpy(stat_n_diff_key_vals, index->stat_n_diff_key_vals,
- (n_uniq + 1) * sizeof(index->stat_n_diff_key_vals[0]));
-
- ut_ad(n_uniq + 1 <= UT_ARR_SIZE(stat_n_sample_sizes));
-
- memcpy(stat_n_sample_sizes, index->stat_n_sample_sizes,
- (n_uniq + 1) * sizeof(index->stat_n_sample_sizes[0]));
-
- for (i = 1; i <= n_uniq; i++) {
+ for (ulint i = 0; i < index->n_uniq; i++) {
char stat_name[16];
char stat_description[1024];
ulint j;
ut_snprintf(stat_name, sizeof(stat_name),
- "n_diff_pfx%02lu", i);
+ "n_diff_pfx%02lu", i + 1);
/* craft a string that contains the columns names */
ut_snprintf(stat_description,
sizeof(stat_description),
"%s", index->fields[0].name);
- for (j = 2; j <= i; j++) {
+ for (j = 1; j <= i; j++) {
size_t len;
len = strlen(stat_description);
ut_snprintf(stat_description + len,
sizeof(stat_description) - len,
- ",%s", index->fields[j - 1].name);
+ ",%s", index->fields[j].name);
}
ret = dict_stats_save_index_stat(
index, now, stat_name,
- stat_n_diff_key_vals[i],
- &stat_n_sample_sizes[i],
- stat_description, trx,
- caller_has_dict_sys_mutex);
+ index->stat_n_diff_key_vals[i],
+ &index->stat_n_sample_sizes[i],
+ stat_description);
if (ret != DB_SUCCESS) {
- goto end_rollback;
+ goto end;
}
}
}
- trx_commit_for_mysql(trx);
- ret = DB_SUCCESS;
- goto end_free;
-
-end_rollback:
-
- trx->op_info = "rollback of internal transaction on stats tables";
- trx_rollback_to_savepoint(trx, NULL);
- trx->op_info = "";
- ut_a(trx->error_state == DB_SUCCESS);
+end:
+ mutex_exit(&dict_sys->mutex);
+ rw_lock_x_unlock(&dict_operation_lock);
-end_free:
-
- trx_free_for_background(trx);
+ dict_stats_snapshot_free(table);
return(ret);
}
@@ -1875,11 +2489,11 @@ dict_stats_fetch_table_stats_step(
/** Aux struct used to pass a table and a boolean to
dict_stats_fetch_index_stats_step(). */
-typedef struct index_fetch_struct {
+struct index_fetch_t {
dict_table_t* table; /*!< table whose indexes are to be modified */
- ibool stats_were_modified; /*!< will be set to TRUE if at
+ bool stats_were_modified; /*!< will be set to true if at
least one index stats were modified */
-} index_fetch_t;
+};
/*********************************************************************//**
Called for the rows that are selected by
@@ -2036,12 +2650,12 @@ dict_stats_fetch_index_stats_step(
if (stat_name_len == 4 /* strlen("size") */
&& strncasecmp("size", stat_name, stat_name_len) == 0) {
index->stat_index_size = (ulint) stat_value;
- arg->stats_were_modified = TRUE;
+ arg->stats_were_modified = true;
} else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
&& strncasecmp("n_leaf_pages", stat_name, stat_name_len)
== 0) {
index->stat_n_leaf_pages = (ulint) stat_value;
- arg->stats_were_modified = TRUE;
+ arg->stats_were_modified = true;
} else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
&& strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
@@ -2057,19 +2671,24 @@ dict_stats_fetch_index_stats_step(
|| num_ptr[0] < '0' || num_ptr[0] > '9'
|| num_ptr[1] < '0' || num_ptr[1] > '9') {
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+
+ dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Ignoring strange row from "
"%s WHERE "
- "database_name = '%.*s' AND "
+ "database_name = '%s' AND "
"table_name = '%s' AND "
"index_name = '%s' AND "
"stat_name = '%.*s'; because stat_name "
"is malformed\n",
INDEX_STATS_NAME_PRINT,
- (int) dict_get_db_name_len(table->name),
- table->name,
- dict_remove_db_name(table->name),
+ db_utf8,
+ table_utf8,
index->name,
(int) stat_name_len,
stat_name);
@@ -2081,41 +2700,50 @@ dict_stats_fetch_index_stats_step(
note that stat_name does not have a terminating '\0' */
n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0');
- if (n_pfx == 0 || n_pfx > dict_index_get_n_unique(index)) {
+ ulint n_uniq = index->n_uniq;
+
+ if (n_pfx == 0 || n_pfx > n_uniq) {
+
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+
+ dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Ignoring strange row from "
"%s WHERE "
- "database_name = '%.*s' AND "
+ "database_name = '%s' AND "
"table_name = '%s' AND "
"index_name = '%s' AND "
"stat_name = '%.*s'; because stat_name is "
"out of range, the index has %lu unique "
"columns\n",
INDEX_STATS_NAME_PRINT,
- (int) dict_get_db_name_len(table->name),
- table->name,
- dict_remove_db_name(table->name),
+ db_utf8,
+ table_utf8,
index->name,
(int) stat_name_len,
stat_name,
- dict_index_get_n_unique(index));
+ n_uniq);
return(TRUE);
}
/* else */
- index->stat_n_diff_key_vals[n_pfx] = stat_value;
+ index->stat_n_diff_key_vals[n_pfx - 1] = stat_value;
if (sample_size != UINT64_UNDEFINED) {
- index->stat_n_sample_sizes[n_pfx] = sample_size;
+ index->stat_n_sample_sizes[n_pfx - 1] = sample_size;
} else {
/* hmm, strange... the user must have UPDATEd the
table manually and SET sample_size = NULL */
- index->stat_n_sample_sizes[n_pfx] = 0;
+ index->stat_n_sample_sizes[n_pfx - 1] = 0;
}
- arg->stats_were_modified = TRUE;
+ index->stat_n_non_null_key_vals[n_pfx - 1] = 0;
+
+ arg->stats_were_modified = true;
} else {
/* silently ignore rows with unknown stat_name, the
user may have developed her own stats */
@@ -2131,19 +2759,25 @@ Read table's statistics from the persistent statistics storage.
dict_stats_fetch_from_ps() @{
@return DB_SUCCESS or error code */
static
-enum db_err
+dberr_t
dict_stats_fetch_from_ps(
/*=====================*/
- dict_table_t* table, /*!< in/out: table */
- ibool caller_has_dict_sys_mutex)/*!< in: TRUE if the caller
- owns dict_sys->mutex */
+ dict_table_t* table) /*!< in/out: table */
{
index_fetch_t index_fetch_arg;
trx_t* trx;
pars_info_t* pinfo;
- enum db_err ret;
+ dberr_t ret;
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+
+ ut_ad(!mutex_own(&dict_sys->mutex));
- ut_ad(mutex_own(&dict_sys->mutex) == caller_has_dict_sys_mutex);
+ /* Initialize all stats to dummy values before fetching because if
+ the persistent storage contains incomplete stats (e.g. missing stats
+ for some index) then we would end up with (partially) uninitialized
+ stats. */
+ dict_stats_empty_table(table);
trx = trx_allocate_for_background();
@@ -2155,14 +2789,14 @@ dict_stats_fetch_from_ps(
trx_start_if_not_started(trx);
+ dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+
pinfo = pars_info_create();
- pars_info_add_literal(pinfo, "database_name", table->name,
- dict_get_db_name_len(table->name),
- DATA_VARCHAR, 0);
+ pars_info_add_str_literal(pinfo, "database_name", db_utf8);
- pars_info_add_str_literal(pinfo, "table_name",
- dict_remove_db_name(table->name));
+ pars_info_add_str_literal(pinfo, "table_name", table_utf8);
pars_info_bind_function(pinfo,
"fetch_table_stats_step",
@@ -2170,7 +2804,7 @@ dict_stats_fetch_from_ps(
table);
index_fetch_arg.table = table;
- index_fetch_arg.stats_were_modified = FALSE;
+ index_fetch_arg.stats_were_modified = false;
pars_info_bind_function(pinfo,
"fetch_index_stats_step",
dict_stats_fetch_index_stats_step,
@@ -2230,19 +2864,9 @@ dict_stats_fetch_from_ps(
"CLOSE index_stats_cur;\n"
"END;",
- !caller_has_dict_sys_mutex, trx);
-
+ TRUE, trx);
/* pinfo is freed by que_eval_sql() */
- /* XXX If mysql.innodb_index_stats contained less rows than the number
- of indexes in the table, then some of the indexes of the table
- were left uninitialized. Currently this is ignored and those
- indexes are left with uninitialized stats until ANALYZE TABLE is
- run. This condition happens when the user creates a new index
- on a table. We could return DB_STATS_DO_NOT_EXIST from here,
- forcing the usage of transient stats until mysql.innodb_index_stats
- is complete. */
-
trx_commit_for_mysql(trx);
trx_free_for_background(trx);
@@ -2256,32 +2880,67 @@ dict_stats_fetch_from_ps(
/* @} */
/*********************************************************************//**
+Fetches or calculates new estimates for index statistics.
+dict_stats_update_for_index() @{ */
+UNIV_INTERN
+void
+dict_stats_update_for_index(
+/*========================*/
+ dict_index_t* index) /*!< in/out: index */
+{
+ ut_ad(!mutex_own(&dict_sys->mutex));
+
+ if (dict_stats_is_persistent_enabled(index->table)) {
+
+ if (dict_stats_persistent_storage_check(false)) {
+ dict_table_stats_lock(index->table, RW_X_LATCH);
+ dict_stats_analyze_index(index);
+ dict_table_stats_unlock(index->table, RW_X_LATCH);
+ dict_stats_save(index->table);
+ return;
+ }
+ /* else */
+
+ /* Fall back to transient stats since the persistent
+ storage is not present or is corrupted */
+ char buf_table[MAX_FULL_NAME_LEN];
+ char buf_index[MAX_FULL_NAME_LEN];
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Recalculation of persistent statistics "
+ "requested for table %s index %s but the required "
+ "persistent statistics storage is not present or is "
+ "corrupted. Using transient stats instead.\n",
+ ut_format_name(index->table->name, TRUE,
+ buf_table, sizeof(buf_table)),
+ ut_format_name(index->name, FALSE,
+ buf_index, sizeof(buf_index)));
+ }
+
+ dict_table_stats_lock(index->table, RW_X_LATCH);
+ dict_stats_update_transient_for_index(index);
+ dict_table_stats_unlock(index->table, RW_X_LATCH);
+}
+/* @} */
+
+/*********************************************************************//**
Calculates new estimates for table and index statistics. The statistics
are used in query optimization.
-dict_stats_update() @{
-@return DB_* error code or DB_SUCCESS */
+@return DB_SUCCESS or error code */
UNIV_INTERN
-enum db_err
+dberr_t
dict_stats_update(
/*==============*/
dict_table_t* table, /*!< in/out: table */
- dict_stats_upd_option_t stats_upd_option,
+ dict_stats_upd_option_t stats_upd_option)
/*!< in: whether to (re) calc
the stats or to fetch them from
the persistent statistics
storage */
- ibool caller_has_dict_sys_mutex)
- /*!< in: TRUE if the caller
- owns dict_sys->mutex */
{
- enum db_err ret = DB_ERROR;
+ char buf[MAX_FULL_NAME_LEN];
- /* check whether caller_has_dict_sys_mutex is set correctly;
- note that mutex_own() is not implemented in non-debug code so
- we cannot avoid having this extra param to the current function */
- ut_ad(caller_has_dict_sys_mutex
- ? mutex_own(&dict_sys->mutex)
- : !mutex_own(&dict_sys->mutex));
+ ut_ad(!mutex_own(&dict_sys->mutex));
if (table->ibd_file_missing) {
ut_print_timestamp(stderr);
@@ -2289,83 +2948,61 @@ dict_stats_update(
" InnoDB: cannot calculate statistics for table %s "
"because the .ibd file is missing. For help, please "
"refer to " REFMAN "innodb-troubleshooting.html\n",
- table->name);
-
+ ut_format_name(table->name, TRUE, buf, sizeof(buf)));
+ dict_stats_empty_table(table);
return(DB_TABLESPACE_DELETED);
- }
-
- /* If we have set a high innodb_force_recovery level, do not calculate
- statistics, as a badly corrupted index can cause a crash in it. */
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
+ } else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+ /* If we have set a high innodb_force_recovery level, do
+ not calculate statistics, as a badly corrupted index can
+ cause a crash in it. */
+ dict_stats_empty_table(table);
return(DB_SUCCESS);
}
switch (stats_upd_option) {
case DICT_STATS_RECALC_PERSISTENT:
- case DICT_STATS_RECALC_PERSISTENT_SILENT:
+
+ ut_ad(!srv_read_only_mode);
+
/* Persistent recalculation requested, called from
- ANALYZE TABLE or from TRUNCATE TABLE */
-
- /* FTS auxiliary tables do not need persistent stats */
- if ((ut_strcount(table->name, "FTS") > 0
- && (ut_strcount(table->name, "CONFIG") > 0
- || ut_strcount(table->name, "INDEX") > 0
- || ut_strcount(table->name, "DELETED") > 0
- || ut_strcount(table->name, "DOC_ID") > 0
- || ut_strcount(table->name, "ADDED") > 0))) {
- goto transient;
- }
+ 1) ANALYZE TABLE, or
+ 2) the auto recalculation background thread, or
+ 3) open table if stats do not exist on disk and auto recalc
+ is enabled */
+
+ /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
+ persistent stats enabled */
+ ut_a(strchr(table->name, '/') != NULL);
/* check if the persistent statistics storage exists
before calling the potentially slow function
dict_stats_update_persistent(); that is a
prerequisite for dict_stats_save() succeeding */
- if (dict_stats_persistent_storage_check(
- caller_has_dict_sys_mutex)) {
-
- dict_table_stats_lock(table, RW_X_LATCH);
+ if (dict_stats_persistent_storage_check(false)) {
- ret = dict_stats_update_persistent(table);
+ dberr_t err;
- /* XXX Currently dict_stats_save() would read the
- stats from the table without dict_table_stats_lock()
- which means it could save inconsistent data on the
- disk. This is because we must call
- dict_table_stats_lock() after locking dict_sys->mutex.
- A solution is to copy here the stats to a temporary
- buffer while holding the _stats_lock(), release it,
- and pass that buffer to dict_stats_save(). */
+ err = dict_stats_update_persistent(table);
- dict_table_stats_unlock(table, RW_X_LATCH);
-
- if (ret == DB_SUCCESS) {
- ret = dict_stats_save(
- table,
- caller_has_dict_sys_mutex);
+ if (err != DB_SUCCESS) {
+ return(err);
}
- return(ret);
+ err = dict_stats_save(table);
+
+ return(err);
}
- /* else */
/* Fall back to transient stats since the persistent
storage is not present or is corrupted */
- if (stats_upd_option == DICT_STATS_RECALC_PERSISTENT) {
-
- ut_print_timestamp(stderr);
- /* XXX add link to the doc about storage
- creation */
- fprintf(stderr,
- " InnoDB: Recalculation of persistent "
- "statistics requested but the required "
- "persistent statistics storage is not "
- "present or is corrupted. "
- "Using quick transient stats "
- "instead.\n");
- }
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Recalculation of persistent statistics "
+ "requested for table %s but the required persistent "
+ "statistics storage is not present or is corrupted. "
+ "Using transient stats instead.\n",
+ ut_format_name(table->name, TRUE, buf, sizeof(buf)));
goto transient;
@@ -2373,265 +3010,317 @@ dict_stats_update(
goto transient;
- case DICT_STATS_FETCH:
- case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
- /* fetch requested, either fetch from persistent statistics
- storage or use the old method */
+ case DICT_STATS_EMPTY_TABLE:
- dict_table_stats_lock(table, RW_X_LATCH);
+ dict_stats_empty_table(table);
- if (stats_upd_option == DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY
- && table->stat_initialized) {
+ /* If table is using persistent stats,
+ then save the stats on disk */
- dict_table_stats_unlock(table, RW_X_LATCH);
- return(DB_SUCCESS);
+ if (dict_stats_is_persistent_enabled(table)) {
+
+ if (dict_stats_persistent_storage_check(false)) {
+
+ return(dict_stats_save(table));
+ }
+
+ return(DB_STATS_DO_NOT_EXIST);
}
- /* else */
- /* Must unlock because otherwise there is a lock order
- violation with dict_sys->mutex below. Declare stats to be
- initialized before unlocking. */
- table->stat_initialized = TRUE;
- dict_table_stats_unlock(table, RW_X_LATCH);
+ return(DB_SUCCESS);
+
+ case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
- if (strchr(table->name, '/') == NULL
- || strcmp(table->name, INDEX_STATS_NAME) == 0
- || strcmp(table->name, TABLE_STATS_NAME) == 0
- || (ut_strcount(table->name, "FTS") > 0
- && (ut_strcount(table->name, "CONFIG") > 0
- || ut_strcount(table->name, "INDEX") > 0
- || ut_strcount(table->name, "DELETED") > 0
- || ut_strcount(table->name, "DOC_ID") > 0
- || ut_strcount(table->name, "ADDED") > 0))) {
- /* Use the quick transient stats method for
- InnoDB internal tables, because we know the
- persistent stats storage does not contain data
- for them */
+ /* fetch requested, either fetch from persistent statistics
+ storage or use the old method */
- goto transient;
+ if (table->stat_initialized) {
+ return(DB_SUCCESS);
}
- /* else */
- if (dict_stats_persistent_storage_check(
- caller_has_dict_sys_mutex)) {
+ /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
+ persistent stats enabled */
+ ut_a(strchr(table->name, '/') != NULL);
- ret = dict_stats_fetch_from_ps(table,
- caller_has_dict_sys_mutex);
+ if (!dict_stats_persistent_storage_check(false)) {
+ /* persistent statistics storage does not exist
+ or is corrupted, calculate the transient stats */
- if (ret == DB_STATS_DO_NOT_EXIST
- || (ret != DB_SUCCESS && stats_upd_option
- == DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY)) {
- /* Stats for this particular table do not
- exist or we have been called from open table
- which needs to initialize the stats,
- calculate the quick transient statistics */
- goto transient;
- }
- /* else */
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: Fetch of persistent "
+ "statistics requested for table %s but the "
+ "required system tables %s and %s are not "
+ "present or have unexpected structure. "
+ "Using transient stats instead.\n",
+ ut_format_name(table->name, TRUE,
+ buf, sizeof(buf)),
+ TABLE_STATS_NAME_PRINT,
+ INDEX_STATS_NAME_PRINT);
- return(ret);
- } else {
- /* persistent statistics storage does not exist,
- calculate the transient stats */
goto transient;
}
- break;
+ dict_table_t* t;
- /* no "default:" in order to produce a compilation warning
- about unhandled enumeration value */
- }
+ ut_ad(!srv_read_only_mode);
-transient:
+ /* Create a dummy table object with the same name and
+ indexes, suitable for fetching the stats into it. */
+ t = dict_stats_table_clone_create(table);
- dict_table_stats_lock(table, RW_X_LATCH);
+ dberr_t err = dict_stats_fetch_from_ps(t);
- dict_stats_update_transient(table);
+ t->stats_last_recalc = table->stats_last_recalc;
+ t->stat_modified_counter = 0;
- dict_table_stats_unlock(table, RW_X_LATCH);
+ switch (err) {
+ case DB_SUCCESS:
- return(DB_SUCCESS);
-}
-/* @} */
+ dict_table_stats_lock(table, RW_X_LATCH);
-/*********************************************************************//**
-Close the stats tables. Should always be called after successful
-dict_stats_open(). It will free the dict_stats handle.
-dict_stats_close() @{ */
-UNIV_INLINE
-void
-dict_stats_close(
-/*=============*/
- dict_stats_t* dict_stats) /*!< in/own: Handle to open
- statistics tables */
-{
- if (dict_stats->table_stats != NULL) {
- dict_table_close(dict_stats->table_stats, FALSE);
- dict_stats->table_stats = NULL;
- }
+ /* Initialize all stats to dummy values before
+ copying because dict_stats_table_clone_create() does
+ skip corrupted indexes so our dummy object 't' may
+ have less indexes than the real object 'table'. */
+ dict_stats_empty_table(table);
- if (dict_stats->index_stats != NULL) {
- dict_table_close(dict_stats->index_stats, FALSE);
- dict_stats->index_stats = NULL;
- }
+ dict_stats_copy(table, t);
- mem_free(dict_stats);
-}
-/* @} */
+ dict_stats_assert_initialized(table);
-/*********************************************************************//**
-Open stats tables to prevent these tables from being DROPped.
-Also check whether they have the correct structure. The caller
-must call dict_stats_close() when he has finished DMLing the tables.
-dict_stats_open() @{
-@return pointer to open tables or NULL on failure */
-UNIV_INLINE
-dict_stats_t*
-dict_stats_open(void)
-/*=================*/
-{
- dict_stats_t* dict_stats;
+ dict_table_stats_unlock(table, RW_X_LATCH);
+
+ dict_stats_table_clone_free(t);
+
+ return(DB_SUCCESS);
+ case DB_STATS_DO_NOT_EXIST:
+
+ dict_stats_table_clone_free(t);
- dict_stats = static_cast<dict_stats_t*>(
- mem_zalloc(sizeof(*dict_stats)));
+ if (dict_stats_auto_recalc_is_enabled(table)) {
+ return(dict_stats_update(
+ table,
+ DICT_STATS_RECALC_PERSISTENT));
+ }
- dict_stats->table_stats = dict_table_open_on_name_no_stats(
- TABLE_STATS_NAME, FALSE, DICT_ERR_IGNORE_NONE);
+ ut_format_name(table->name, TRUE, buf, sizeof(buf));
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Trying to use table %s which has "
+ "persistent statistics enabled, but auto "
+ "recalculation turned off and the statistics "
+ "do not exist in %s and %s. Please either run "
+ "\"ANALYZE TABLE %s;\" manually or enable the "
+ "auto recalculation with "
+ "\"ALTER TABLE %s STATS_AUTO_RECALC=1;\". "
+ "InnoDB will now use transient statistics for "
+ "%s.\n",
+ buf, TABLE_STATS_NAME, INDEX_STATS_NAME, buf,
+ buf, buf);
- dict_stats->index_stats = dict_table_open_on_name_no_stats(
- INDEX_STATS_NAME, FALSE, DICT_ERR_IGNORE_NONE);
+ goto transient;
+ default:
- /* Check if the tables have the correct structure, if yes then
- after this function we can safely DELETE from them without worrying
- that they may get DROPped or DDLed because the open will have
- increased the reference count. */
+ dict_stats_table_clone_free(t);
- if (dict_stats->table_stats == NULL
- || dict_stats->index_stats == NULL
- || !dict_stats_persistent_storage_check(FALSE)) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error fetching persistent statistics "
+ "for table %s from %s and %s: %s. "
+ "Using transient stats method instead.\n",
+ ut_format_name(table->name, TRUE, buf,
+ sizeof(buf)),
+ TABLE_STATS_NAME,
+ INDEX_STATS_NAME,
+ ut_strerr(err));
- /* There was an error, close the tables and free the handle. */
- dict_stats_close(dict_stats);
- dict_stats = NULL;
+ goto transient;
+ }
+ /* no "default:" in order to produce a compilation warning
+ about unhandled enumeration value */
}
- return(dict_stats);
+transient:
+
+ dict_table_stats_lock(table, RW_X_LATCH);
+
+ dict_stats_update_transient(table);
+
+ dict_table_stats_unlock(table, RW_X_LATCH);
+
+ return(DB_SUCCESS);
}
-/* @} */
/*********************************************************************//**
Removes the information for a particular index's stats from the persistent
storage if it exists and if there is data stored for this index.
-The transaction is not committed, it must not be committed in this
-function because this is the user trx that is running DROP INDEX.
-The transaction will be committed at the very end when dropping an
-index.
+This function creates its own trx and commits it.
A note from Marko why we cannot edit user and sys_* tables in one trx:
marko: The problem is that ibuf merges should be disabled while we are
rolling back dict transactions.
marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
But we shouldn't open *.ibd files before we have rolled back dict
transactions and opened the SYS_* records for the *.ibd files.
-dict_stats_delete_index_stats() @{
+dict_stats_drop_index() @{
@return DB_SUCCESS or error code */
UNIV_INTERN
-enum db_err
-dict_stats_delete_index_stats(
-/*==========================*/
- dict_index_t* index, /*!< in: index */
- trx_t* trx, /*!< in: transaction to use */
+dberr_t
+dict_stats_drop_index(
+/*==================*/
+ const char* db_and_table,/*!< in: db and table, e.g. 'db/table' */
+ const char* iname, /*!< in: index name */
char* errstr, /*!< out: error message if != DB_SUCCESS
is returned */
ulint errstr_sz)/*!< in: size of the errstr buffer */
{
- char database_name[MAX_DATABASE_NAME_LEN + 1];
- const char* table_name;
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
pars_info_t* pinfo;
- enum db_err ret;
- dict_stats_t* dict_stats;
- void* mysql_thd = trx->mysql_thd;
+ dberr_t ret;
+
+ ut_ad(!mutex_own(&dict_sys->mutex));
/* skip indexes whose table names do not contain a database name
e.g. if we are dropping an index from SYS_TABLES */
- if (strchr(index->table_name, '/') == NULL) {
-
- return(DB_SUCCESS);
- }
+ if (strchr(db_and_table, '/') == NULL) {
- /* Increment table reference count to prevent the tables from
- being DROPped just before que_eval_sql(). */
- dict_stats = dict_stats_open();
-
- if (dict_stats == NULL) {
- /* stats tables do not exist or have unexpected structure */
return(DB_SUCCESS);
}
- /* the stats tables cannot be DROPped now */
-
- ut_snprintf(database_name, sizeof(database_name), "%.*s",
- (int) dict_get_db_name_len(index->table_name),
- index->table_name);
-
- table_name = dict_remove_db_name(index->table_name);
+ dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
pinfo = pars_info_create();
- pars_info_add_str_literal(pinfo, "database_name", database_name);
+ pars_info_add_str_literal(pinfo, "database_name", db_utf8);
- pars_info_add_str_literal(pinfo, "table_name", table_name);
+ pars_info_add_str_literal(pinfo, "table_name", table_utf8);
- pars_info_add_str_literal(pinfo, "index_name", index->name);
+ pars_info_add_str_literal(pinfo, "index_name", iname);
- /* Force lock wait timeout to be instantaneous because the incoming
- transaction was created via MySQL. */
+ rw_lock_x_lock(&dict_operation_lock);
+ mutex_enter(&dict_sys->mutex);
- mysql_thd = trx->mysql_thd;
- trx->mysql_thd = NULL;
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE DROP_INDEX_STATS () IS\n"
+ "BEGIN\n"
+ "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name AND\n"
+ "index_name = :index_name;\n"
+ "END;\n");
- ret = que_eval_sql(pinfo,
- "PROCEDURE DROP_INDEX_STATS () IS\n"
- "BEGIN\n"
- "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
- "database_name = :database_name AND\n"
- "table_name = :table_name AND\n"
- "index_name = :index_name;\n"
- "END;\n",
- TRUE,
- trx);
-
- trx->mysql_thd = mysql_thd;
-
- /* pinfo is freed by que_eval_sql() */
+ mutex_exit(&dict_sys->mutex);
+ rw_lock_x_unlock(&dict_operation_lock);
- /* do not to commit here, see the function's comment */
+ if (ret == DB_STATS_DO_NOT_EXIST) {
+ ret = DB_SUCCESS;
+ }
if (ret != DB_SUCCESS) {
-
ut_snprintf(errstr, errstr_sz,
"Unable to delete statistics for index %s "
- "from %s%s. They can be deleted later using "
+ "from %s%s: %s. They can be deleted later using "
"DELETE FROM %s WHERE "
"database_name = '%s' AND "
"table_name = '%s' AND "
"index_name = '%s';",
- index->name,
+ iname,
INDEX_STATS_NAME_PRINT,
(ret == DB_LOCK_WAIT_TIMEOUT
? " because the rows are locked"
: ""),
+ ut_strerr(ret),
INDEX_STATS_NAME_PRINT,
- database_name,
- table_name,
- index->name);
+ db_utf8,
+ table_utf8,
+ iname);
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: %s\n", errstr);
-
- trx->error_state = DB_SUCCESS;
}
- dict_stats_close(dict_stats);
+ return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Executes
+DELETE FROM mysql.innodb_table_stats
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+dict_stats_delete_from_table_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_delete_from_table_stats(
+/*===============================*/
+ const char* database_name, /*!< in: database name, e.g. 'db' */
+ const char* table_name) /*!< in: table name, e.g. 'table' */
+{
+ pars_info_t* pinfo;
+ dberr_t ret;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "database_name", database_name);
+ pars_info_add_str_literal(pinfo, "table_name", table_name);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
+ "BEGIN\n"
+ "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name;\n"
+ "END;\n");
+
+ return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Executes
+DELETE FROM mysql.innodb_index_stats
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+dict_stats_delete_from_index_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_delete_from_index_stats(
+/*===============================*/
+ const char* database_name, /*!< in: database name, e.g. 'db' */
+ const char* table_name) /*!< in: table name, e.g. 'table' */
+{
+ pars_info_t* pinfo;
+ dberr_t ret;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ pinfo = pars_info_create();
+
+ pars_info_add_str_literal(pinfo, "database_name", database_name);
+ pars_info_add_str_literal(pinfo, "table_name", table_name);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
+ "BEGIN\n"
+ "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
+ "database_name = :database_name AND\n"
+ "table_name = :table_name;\n"
+ "END;\n");
return(ret);
}
@@ -2640,130 +3329,332 @@ dict_stats_delete_index_stats(
/*********************************************************************//**
Removes the statistics for a table and all of its indexes from the
persistent statistics storage if it exists and if there is data stored for
-the table. This function creates its own transaction and commits it.
-dict_stats_delete_table_stats() @{
+the table. This function creates its own transaction and commits it.
+dict_stats_drop_table() @{
@return DB_SUCCESS or error code */
UNIV_INTERN
-enum db_err
-dict_stats_delete_table_stats(
-/*==========================*/
- const char* table_name, /*!< in: table name */
+dberr_t
+dict_stats_drop_table(
+/*==================*/
+ const char* db_and_table, /*!< in: db and table, e.g. 'db/table' */
char* errstr, /*!< out: error message
if != DB_SUCCESS is returned */
ulint errstr_sz) /*!< in: size of errstr buffer */
{
- char database_name[MAX_DATABASE_NAME_LEN + 1];
- const char* table_name_strip; /* without leading db name */
- trx_t* trx;
- pars_info_t* pinfo;
- enum db_err ret = DB_ERROR;
- dict_stats_t* dict_stats;
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+ dberr_t ret;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+ ut_ad(mutex_own(&dict_sys->mutex));
/* skip tables that do not contain a database name
e.g. if we are dropping SYS_TABLES */
- if (strchr(table_name, '/') == NULL) {
+ if (strchr(db_and_table, '/') == NULL) {
return(DB_SUCCESS);
}
/* skip innodb_table_stats and innodb_index_stats themselves */
- if (strcmp(table_name, TABLE_STATS_NAME) == 0
- || strcmp(table_name, INDEX_STATS_NAME) == 0) {
+ if (strcmp(db_and_table, TABLE_STATS_NAME) == 0
+ || strcmp(db_and_table, INDEX_STATS_NAME) == 0) {
return(DB_SUCCESS);
}
- /* Create a new private trx */
+ dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
- trx = trx_allocate_for_background();
+ ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
- /* Use 'read-uncommitted' so that the SELECTs we execute
- do not get blocked in case some user has locked the rows we
- are SELECTing */
+ if (ret == DB_SUCCESS) {
+ ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
+ }
- trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
+ if (ret == DB_STATS_DO_NOT_EXIST) {
+ ret = DB_SUCCESS;
+ }
- trx_start_if_not_started(trx);
+ if (ret != DB_SUCCESS) {
- /* Increment table reference count to prevent the tables from
- being DROPped just before que_eval_sql(). */
- dict_stats = dict_stats_open();
+ ut_snprintf(errstr, errstr_sz,
+ "Unable to delete statistics for table %s.%s: %s. "
+ "They can be deleted later using "
- if (dict_stats == NULL) {
- /* stats tables do not exist or have unexpected structure */
- ret = DB_SUCCESS;
- goto commit_and_return;
+ "DELETE FROM %s WHERE "
+ "database_name = '%s' AND "
+ "table_name = '%s'; "
+
+ "DELETE FROM %s WHERE "
+ "database_name = '%s' AND "
+ "table_name = '%s';",
+
+ db_utf8, table_utf8,
+ ut_strerr(ret),
+
+ INDEX_STATS_NAME_PRINT,
+ db_utf8, table_utf8,
+
+ TABLE_STATS_NAME_PRINT,
+ db_utf8, table_utf8);
}
- ut_snprintf(database_name, sizeof(database_name), "%.*s",
- (int) dict_get_db_name_len(table_name),
- table_name);
+ return(ret);
+}
+/* @} */
+
+/*********************************************************************//**
+Executes
+UPDATE mysql.innodb_table_stats SET
+database_name = '...', table_name = '...'
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+dict_stats_rename_in_table_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_rename_in_table_stats(
+/*=============================*/
+ const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
+ const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
+ const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
+ const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
+{
+ pars_info_t* pinfo;
+ dberr_t ret;
- table_name_strip = dict_remove_db_name(table_name);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+ ut_ad(mutex_own(&dict_sys->mutex));
pinfo = pars_info_create();
- pars_info_add_str_literal(pinfo, "database_name", database_name);
+ pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
+ pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
+ pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
+ pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE RENAME_IN_TABLE_STATS () IS\n"
+ "BEGIN\n"
+ "UPDATE \"" TABLE_STATS_NAME "\" SET\n"
+ "database_name = :new_dbname_utf8,\n"
+ "table_name = :new_tablename_utf8\n"
+ "WHERE\n"
+ "database_name = :old_dbname_utf8 AND\n"
+ "table_name = :old_tablename_utf8;\n"
+ "END;\n");
- pars_info_add_str_literal(pinfo, "table_name", table_name_strip);
+ return(ret);
+}
+/* @} */
- ret = que_eval_sql(pinfo,
- "PROCEDURE DROP_TABLE_STATS () IS\n"
- "BEGIN\n"
+/*********************************************************************//**
+Executes
+UPDATE mysql.innodb_index_stats SET
+database_name = '...', table_name = '...'
+WHERE database_name = '...' AND table_name = '...';
+Creates its own transaction and commits it.
+dict_stats_rename_in_index_stats() @{
+@return DB_SUCCESS or error code */
+UNIV_INLINE
+dberr_t
+dict_stats_rename_in_index_stats(
+/*=============================*/
+ const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
+ const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
+ const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
+ const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
+{
+ pars_info_t* pinfo;
+ dberr_t ret;
- "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
- "database_name = :database_name AND\n"
- "table_name = :table_name;\n"
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+ ut_ad(mutex_own(&dict_sys->mutex));
- "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
- "database_name = :database_name AND\n"
- "table_name = :table_name;\n"
+ pinfo = pars_info_create();
- "END;\n",
- TRUE,
- trx);
+ pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
+ pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
+ pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
+ pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
+
+ ret = dict_stats_exec_sql(
+ pinfo,
+ "PROCEDURE RENAME_IN_INDEX_STATS () IS\n"
+ "BEGIN\n"
+ "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
+ "database_name = :new_dbname_utf8,\n"
+ "table_name = :new_tablename_utf8\n"
+ "WHERE\n"
+ "database_name = :old_dbname_utf8 AND\n"
+ "table_name = :old_tablename_utf8;\n"
+ "END;\n");
- /* pinfo is freed by que_eval_sql() */
+ return(ret);
+}
+/* @} */
- if (ret != DB_SUCCESS) {
+/*********************************************************************//**
+Renames a table in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+dict_stats_rename_table() @{
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_stats_rename_table(
+/*====================*/
+ const char* old_name, /*!< in: old name, e.g. 'db/table' */
+ const char* new_name, /*!< in: new name, e.g. 'db/table' */
+ char* errstr, /*!< out: error string if != DB_SUCCESS
+ is returned */
+ size_t errstr_sz) /*!< in: errstr size */
+{
+ char old_db_utf8[MAX_DB_UTF8_LEN];
+ char new_db_utf8[MAX_DB_UTF8_LEN];
+ char old_table_utf8[MAX_TABLE_UTF8_LEN];
+ char new_table_utf8[MAX_TABLE_UTF8_LEN];
+ dberr_t ret;
- ut_snprintf(errstr, errstr_sz,
- "Unable to delete statistics for table %s.%s "
- "from %s or %s%s. "
- "They can be deleted later using "
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_STAT */
+ ut_ad(!mutex_own(&dict_sys->mutex));
- "DELETE FROM %s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s'; "
+ /* skip innodb_table_stats and innodb_index_stats themselves */
+ if (strcmp(old_name, TABLE_STATS_NAME) == 0
+ || strcmp(old_name, INDEX_STATS_NAME) == 0
+ || strcmp(new_name, TABLE_STATS_NAME) == 0
+ || strcmp(new_name, INDEX_STATS_NAME) == 0) {
- "DELETE FROM %s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s';",
+ return(DB_SUCCESS);
+ }
- database_name, table_name_strip,
- TABLE_STATS_NAME_PRINT, INDEX_STATS_NAME_PRINT,
+ dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
+ old_table_utf8, sizeof(old_table_utf8));
- (ret == DB_LOCK_WAIT_TIMEOUT
- ? " because the rows are locked"
- : ""),
+ dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
+ new_table_utf8, sizeof(new_table_utf8));
- INDEX_STATS_NAME_PRINT,
- database_name, table_name_strip,
+ rw_lock_x_lock(&dict_operation_lock);
+ mutex_enter(&dict_sys->mutex);
+
+ ulint n_attempts = 0;
+ do {
+ n_attempts++;
+
+ ret = dict_stats_rename_in_table_stats(
+ old_db_utf8, old_table_utf8,
+ new_db_utf8, new_table_utf8);
+
+ if (ret == DB_DUPLICATE_KEY) {
+ dict_stats_delete_from_table_stats(
+ new_db_utf8, new_table_utf8);
+ }
+
+ if (ret == DB_STATS_DO_NOT_EXIST) {
+ ret = DB_SUCCESS;
+ }
+
+ if (ret != DB_SUCCESS) {
+ mutex_exit(&dict_sys->mutex);
+ rw_lock_x_unlock(&dict_operation_lock);
+ os_thread_sleep(200000 /* 0.2 sec */);
+ rw_lock_x_lock(&dict_operation_lock);
+ mutex_enter(&dict_sys->mutex);
+ }
+ } while ((ret == DB_DEADLOCK
+ || ret == DB_DUPLICATE_KEY
+ || ret == DB_LOCK_WAIT_TIMEOUT)
+ && n_attempts < 5);
+
+ if (ret != DB_SUCCESS) {
+ ut_snprintf(errstr, errstr_sz,
+ "Unable to rename statistics from "
+ "%s.%s to %s.%s in %s: %s. "
+ "They can be renamed later using "
+
+ "UPDATE %s SET "
+ "database_name = '%s', "
+ "table_name = '%s' "
+ "WHERE "
+ "database_name = '%s' AND "
+ "table_name = '%s';",
+ old_db_utf8, old_table_utf8,
+ new_db_utf8, new_table_utf8,
TABLE_STATS_NAME_PRINT,
- database_name, table_name_strip);
+ ut_strerr(ret),
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %s\n", errstr);
+ TABLE_STATS_NAME_PRINT,
+ new_db_utf8, new_table_utf8,
+ old_db_utf8, old_table_utf8);
+ mutex_exit(&dict_sys->mutex);
+ rw_lock_x_unlock(&dict_operation_lock);
+ return(ret);
}
+ /* else */
- dict_stats_close(dict_stats);
+ n_attempts = 0;
+ do {
+ n_attempts++;
-commit_and_return:
+ ret = dict_stats_rename_in_index_stats(
+ old_db_utf8, old_table_utf8,
+ new_db_utf8, new_table_utf8);
- trx_commit_for_mysql(trx);
+ if (ret == DB_DUPLICATE_KEY) {
+ dict_stats_delete_from_index_stats(
+ new_db_utf8, new_table_utf8);
+ }
- trx_free_for_background(trx);
+ if (ret == DB_STATS_DO_NOT_EXIST) {
+ ret = DB_SUCCESS;
+ }
+
+ if (ret != DB_SUCCESS) {
+ mutex_exit(&dict_sys->mutex);
+ rw_lock_x_unlock(&dict_operation_lock);
+ os_thread_sleep(200000 /* 0.2 sec */);
+ rw_lock_x_lock(&dict_operation_lock);
+ mutex_enter(&dict_sys->mutex);
+ }
+ } while ((ret == DB_DEADLOCK
+ || ret == DB_DUPLICATE_KEY
+ || ret == DB_LOCK_WAIT_TIMEOUT)
+ && n_attempts < 5);
+
+ mutex_exit(&dict_sys->mutex);
+ rw_lock_x_unlock(&dict_operation_lock);
+
+ if (ret != DB_SUCCESS) {
+ ut_snprintf(errstr, errstr_sz,
+ "Unable to rename statistics from "
+ "%s.%s to %s.%s in %s: %s. "
+ "They can be renamed later using "
+
+ "UPDATE %s SET "
+ "database_name = '%s', "
+ "table_name = '%s' "
+ "WHERE "
+ "database_name = '%s' AND "
+ "table_name = '%s';",
+
+ old_db_utf8, old_table_utf8,
+ new_db_utf8, new_table_utf8,
+ INDEX_STATS_NAME_PRINT,
+ ut_strerr(ret),
+
+ INDEX_STATS_NAME_PRINT,
+ new_db_utf8, new_table_utf8,
+ old_db_utf8, old_table_utf8);
+ }
return(ret);
}
@@ -2933,13 +3824,13 @@ test_dict_stats_save()
dict_table_t table;
dict_index_t index1;
dict_field_t index1_fields[1];
- ib_uint64_t index1_stat_n_diff_key_vals[2];
- ib_uint64_t index1_stat_n_sample_sizes[2];
+ ib_uint64_t index1_stat_n_diff_key_vals[1];
+ ib_uint64_t index1_stat_n_sample_sizes[1];
dict_index_t index2;
dict_field_t index2_fields[4];
- ib_uint64_t index2_stat_n_diff_key_vals[5];
- ib_uint64_t index2_stat_n_sample_sizes[5];
- enum db_err ret;
+ ib_uint64_t index2_stat_n_diff_key_vals[4];
+ ib_uint64_t index2_stat_n_sample_sizes[4];
+ dberr_t ret;
/* craft a dummy dict_table_t */
table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
@@ -2949,16 +3840,11 @@ test_dict_stats_save()
UT_LIST_INIT(table.indexes);
UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
-#ifdef UNIV_DEBUG
- table.magic_n = DICT_TABLE_MAGIC_N;
-#endif /* UNIV_DEBUG */
+ ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
+ ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
index1.name = TEST_IDX1_NAME;
index1.table = &table;
-#ifdef UNIV_DEBUG
- index1.magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
- index1.to_be_dropped = 0;
index1.cached = 1;
index1.n_uniq = 1;
index1.fields = index1_fields;
@@ -2967,17 +3853,12 @@ test_dict_stats_save()
index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
index1_fields[0].name = TEST_IDX1_COL1_NAME;
- index1_stat_n_diff_key_vals[0] = 1; /* dummy */
- index1_stat_n_diff_key_vals[1] = TEST_IDX1_N_DIFF1;
- index1_stat_n_sample_sizes[0] = 0; /* dummy */
- index1_stat_n_sample_sizes[1] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
+ index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1;
+ index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
+ ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
index2.name = TEST_IDX2_NAME;
index2.table = &table;
-#ifdef UNIV_DEBUG
- index2.magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
- index2.to_be_dropped = 0;
index2.cached = 1;
index2.n_uniq = 4;
index2.fields = index2_fields;
@@ -2989,18 +3870,16 @@ test_dict_stats_save()
index2_fields[1].name = TEST_IDX2_COL2_NAME;
index2_fields[2].name = TEST_IDX2_COL3_NAME;
index2_fields[3].name = TEST_IDX2_COL4_NAME;
- index2_stat_n_diff_key_vals[0] = 1; /* dummy */
- index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF1;
- index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF2;
- index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF3;
- index2_stat_n_diff_key_vals[4] = TEST_IDX2_N_DIFF4;
- index2_stat_n_sample_sizes[0] = 0; /* dummy */
- index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
- index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
- index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
- index2_stat_n_sample_sizes[4] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
-
- ret = dict_stats_save(&table, FALSE);
+ index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1;
+ index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2;
+ index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3;
+ index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4;
+ index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
+ index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
+ index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
+ index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
+
+ ret = dict_stats_save(&table);
ut_a(ret == DB_SUCCESS);
@@ -3098,41 +3977,35 @@ test_dict_stats_fetch_from_ps()
{
dict_table_t table;
dict_index_t index1;
- ib_uint64_t index1_stat_n_diff_key_vals[2];
- ib_uint64_t index1_stat_n_sample_sizes[2];
+ ib_uint64_t index1_stat_n_diff_key_vals[1];
+ ib_uint64_t index1_stat_n_sample_sizes[1];
dict_index_t index2;
- ib_uint64_t index2_stat_n_diff_key_vals[5];
- ib_uint64_t index2_stat_n_sample_sizes[5];
- enum db_err ret;
+ ib_uint64_t index2_stat_n_diff_key_vals[4];
+ ib_uint64_t index2_stat_n_sample_sizes[4];
+ dberr_t ret;
/* craft a dummy dict_table_t */
table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
UT_LIST_INIT(table.indexes);
UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
-#ifdef UNIV_DEBUG
- table.magic_n = DICT_TABLE_MAGIC_N;
-#endif /* UNIV_DEBUG */
+ ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
index1.name = TEST_IDX1_NAME;
-#ifdef UNIV_DEBUG
- index1.magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
+ ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
index1.cached = 1;
index1.n_uniq = 1;
index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
index2.name = TEST_IDX2_NAME;
-#ifdef UNIV_DEBUG
- index2.magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
+ ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
index2.cached = 1;
index2.n_uniq = 4;
index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
- ret = dict_stats_fetch_from_ps(&table, FALSE);
+ ret = dict_stats_fetch_from_ps(&table);
ut_a(ret == DB_SUCCESS);
@@ -3143,19 +4016,19 @@ test_dict_stats_fetch_from_ps()
ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
- ut_a(index1_stat_n_diff_key_vals[1] == TEST_IDX1_N_DIFF1);
- ut_a(index1_stat_n_sample_sizes[1] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
+ ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1);
+ ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
- ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF1);
- ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
- ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF2);
- ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
- ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF3);
- ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
- ut_a(index2_stat_n_diff_key_vals[4] == TEST_IDX2_N_DIFF4);
- ut_a(index2_stat_n_sample_sizes[4] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
+ ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1);
+ ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
+ ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2);
+ ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
+ ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3);
+ ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
+ ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4);
+ ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
printf("OK: fetch successful\n");
}
diff --git a/storage/innobase/dict/dict0stats_bg.cc b/storage/innobase/dict/dict0stats_bg.cc
new file mode 100644
index 00000000000..7a30b748e7f
--- /dev/null
+++ b/storage/innobase/dict/dict0stats_bg.cc
@@ -0,0 +1,392 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file dict/dict0stats_bg.cc
+Code used for background table and index stats gathering.
+
+Created Apr 25, 2012 Vasil Dimov
+*******************************************************/
+
+#include "row0mysql.h"
+#include "srv0start.h"
+#include "dict0stats.h"
+#include "dict0stats_bg.h"
+
+#include <vector>
+
+/** Minimum time interval between stats recalc for a given table */
+#define MIN_RECALC_INTERVAL 10 /* seconds */
+
+#define SHUTTING_DOWN() (srv_shutdown_state != SRV_SHUTDOWN_NONE)
+
+/** Event to wake up the stats thread */
+UNIV_INTERN os_event_t dict_stats_event = NULL;
+
+/** This mutex protects the "recalc_pool" variable. */
+static ib_mutex_t recalc_pool_mutex;
+#ifdef HAVE_PSI_INTERFACE
+static mysql_pfs_key_t recalc_pool_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
+
+/** The number of tables that can be added to "recalc_pool" before
+it is enlarged */
+static const ulint RECALC_POOL_INITIAL_SLOTS = 128;
+
+/** The multitude of tables whose stats are to be automatically
+recalculated - an STL vector */
+typedef std::vector<table_id_t> recalc_pool_t;
+static recalc_pool_t recalc_pool;
+
+typedef recalc_pool_t::iterator recalc_pool_iterator_t;
+
+/*****************************************************************//**
+Initialize the recalc pool, called once during thread initialization. */
+static
+void
+dict_stats_recalc_pool_init()
+/*=========================*/
+{
+ ut_ad(!srv_read_only_mode);
+
+ recalc_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
+}
+
+/*****************************************************************//**
+Free the resources occupied by the recalc pool, called once during
+thread de-initialization. */
+static
+void
+dict_stats_recalc_pool_deinit()
+/*===========================*/
+{
+ ut_ad(!srv_read_only_mode);
+
+ recalc_pool.clear();
+}
+
+/*****************************************************************//**
+Add a table to the recalc pool, which is processed by the
+background stats gathering thread. Only the table id is added to the
+list, so the table can be closed after being enqueued and it will be
+opened when needed. If the table does not exist later (has been DROPped),
+then it will be removed from the pool and skipped.
+dict_stats_recalc_pool_add() @{ */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_add(
+/*=======================*/
+ const dict_table_t* table) /*!< in: table to add */
+{
+ ut_ad(!srv_read_only_mode);
+
+ mutex_enter(&recalc_pool_mutex);
+
+ /* quit if already in the list */
+ for (recalc_pool_iterator_t iter = recalc_pool.begin();
+ iter != recalc_pool.end();
+ ++iter) {
+
+ if (*iter == table->id) {
+ mutex_exit(&recalc_pool_mutex);
+ return;
+ }
+ }
+
+ recalc_pool.push_back(table->id);
+
+ mutex_exit(&recalc_pool_mutex);
+
+ os_event_set(dict_stats_event);
+}
+/* @} */
+
+/*****************************************************************//**
+Get a table from the auto recalc pool. The returned table id is removed
+from the pool.
+dict_stats_recalc_pool_get() @{
+@return true if the pool was non-empty and "id" was set, false otherwise */
+static
+bool
+dict_stats_recalc_pool_get(
+/*=======================*/
+ table_id_t* id) /*!< out: table id, or unmodified if list is
+ empty */
+{
+ ut_ad(!srv_read_only_mode);
+
+ mutex_enter(&recalc_pool_mutex);
+
+ if (recalc_pool.empty()) {
+ mutex_exit(&recalc_pool_mutex);
+ return(false);
+ }
+
+ *id = recalc_pool[0];
+
+ recalc_pool.erase(recalc_pool.begin());
+
+ mutex_exit(&recalc_pool_mutex);
+
+ return(true);
+}
+/* @} */
+
+/*****************************************************************//**
+Delete a given table from the auto recalc pool.
+dict_stats_recalc_pool_del() */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_del(
+/*=======================*/
+ const dict_table_t* table) /*!< in: table to remove */
+{
+ ut_ad(!srv_read_only_mode);
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ mutex_enter(&recalc_pool_mutex);
+
+ ut_ad(table->id > 0);
+
+ for (recalc_pool_iterator_t iter = recalc_pool.begin();
+ iter != recalc_pool.end();
+ ++iter) {
+
+ if (*iter == table->id) {
+ /* erase() invalidates the iterator */
+ recalc_pool.erase(iter);
+ break;
+ }
+ }
+
+ mutex_exit(&recalc_pool_mutex);
+}
+
+/*****************************************************************//**
+Wait until background stats thread has stopped using the specified table(s).
+The caller must have locked the data dictionary using
+row_mysql_lock_data_dictionary() and this function may unlock it temporarily
+and restore the lock before it exits.
+The background stats thead is guaranteed not to start using the specified
+tables after this function returns and before the caller unlocks the data
+dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
+under dict_sys->mutex.
+dict_stats_wait_bg_to_stop_using_table() @{ */
+UNIV_INTERN
+void
+dict_stats_wait_bg_to_stop_using_tables(
+/*====================================*/
+ dict_table_t* table1, /*!< in/out: table1 */
+ dict_table_t* table2, /*!< in/out: table2, could be NULL */
+ trx_t* trx) /*!< in/out: transaction to use for
+ unlocking/locking the data dict */
+{
+ ut_ad(!srv_read_only_mode);
+
+ while ((table1->stats_bg_flag & BG_STAT_IN_PROGRESS)
+ || (table2 != NULL
+ && (table2->stats_bg_flag & BG_STAT_IN_PROGRESS))) {
+
+ table1->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
+ if (table2 != NULL) {
+ table2->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
+ }
+
+ row_mysql_unlock_data_dictionary(trx);
+ os_thread_sleep(250000);
+ row_mysql_lock_data_dictionary(trx);
+ }
+}
+/* @} */
+
+/*****************************************************************//**
+Initialize global variables needed for the operation of dict_stats_thread()
+Must be called before dict_stats_thread() is started.
+dict_stats_thread_init() @{ */
+UNIV_INTERN
+void
+dict_stats_thread_init()
+/*====================*/
+{
+ ut_a(!srv_read_only_mode);
+
+ dict_stats_event = os_event_create();
+
+ /* The recalc_pool_mutex is acquired from:
+ 1) the background stats gathering thread before any other latch
+ and released without latching anything else in between (thus
+ any level would do here)
+ 2) from row_update_statistics_if_needed()
+ and released without latching anything else in between. We know
+ that dict_sys->mutex (SYNC_DICT) is not acquired when
+ row_update_statistics_if_needed() is called and it may be acquired
+ inside that function (thus a level <=SYNC_DICT would do).
+ 3) from row_drop_table_for_mysql() after dict_sys->mutex (SYNC_DICT)
+ and dict_operation_lock (SYNC_DICT_OPERATION) have been locked
+ (thus a level <SYNC_DICT && <SYNC_DICT_OPERATION would do)
+ So we choose SYNC_STATS_AUTO_RECALC to be about below SYNC_DICT. */
+ mutex_create(recalc_pool_mutex_key, &recalc_pool_mutex,
+ SYNC_STATS_AUTO_RECALC);
+
+ dict_stats_recalc_pool_init();
+}
+/* @} */
+
+/*****************************************************************//**
+Free resources allocated by dict_stats_thread_init(), must be called
+after dict_stats_thread() has exited.
+dict_stats_thread_deinit() @{ */
+UNIV_INTERN
+void
+dict_stats_thread_deinit()
+/*======================*/
+{
+ ut_a(!srv_read_only_mode);
+ ut_ad(!srv_dict_stats_thread_active);
+
+ dict_stats_recalc_pool_deinit();
+
+ mutex_free(&recalc_pool_mutex);
+ memset(&recalc_pool_mutex, 0x0, sizeof(recalc_pool_mutex));
+
+ os_event_free(dict_stats_event);
+ dict_stats_event = NULL;
+}
+/* @} */
+
+/*****************************************************************//**
+Get the first table that has been added for auto recalc and eventually
+update its stats.
+dict_stats_process_entry_from_recalc_pool() @{ */
+static
+void
+dict_stats_process_entry_from_recalc_pool()
+/*=======================================*/
+{
+ table_id_t table_id;
+
+ ut_ad(!srv_read_only_mode);
+
+ /* pop the first table from the auto recalc pool */
+ if (!dict_stats_recalc_pool_get(&table_id)) {
+ /* no tables for auto recalc */
+ return;
+ }
+
+ dict_table_t* table;
+
+ mutex_enter(&dict_sys->mutex);
+
+ table = dict_table_open_on_id(table_id, TRUE, FALSE);
+
+ if (table == NULL) {
+ /* table does not exist, must have been DROPped
+ after its id was enqueued */
+ mutex_exit(&dict_sys->mutex);
+ return;
+ }
+
+ /* Check whether table is corrupted */
+ if (table->corrupted) {
+ dict_table_close(table, TRUE, FALSE);
+ mutex_exit(&dict_sys->mutex);
+ return;
+ }
+
+ table->stats_bg_flag = BG_STAT_IN_PROGRESS;
+
+ mutex_exit(&dict_sys->mutex);
+
+ /* ut_time() could be expensive, the current function
+ is called once every time a table has been changed more than 10% and
+ on a system with lots of small tables, this could become hot. If we
+ find out that this is a problem, then the check below could eventually
+ be replaced with something else, though a time interval is the natural
+ approach. */
+
+ if (ut_difftime(ut_time(), table->stats_last_recalc)
+ < MIN_RECALC_INTERVAL) {
+
+ /* Stats were (re)calculated not long ago. To avoid
+ too frequent stats updates we put back the table on
+ the auto recalc list and do nothing. */
+
+ dict_stats_recalc_pool_add(table);
+
+ } else {
+
+ dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT);
+ }
+
+ mutex_enter(&dict_sys->mutex);
+
+ table->stats_bg_flag = BG_STAT_NONE;
+
+ dict_table_close(table, TRUE, FALSE);
+
+ mutex_exit(&dict_sys->mutex);
+}
+/* @} */
+
+/*****************************************************************//**
+This is the thread for background stats gathering. It pops tables, from
+the auto recalc list and proceeds them, eventually recalculating their
+statistics.
+dict_stats_thread() @{
+@return this function does not return, it calls os_thread_exit() */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(dict_stats_thread)(
+/*==============================*/
+ void* arg __attribute__((unused))) /*!< in: a dummy parameter
+ required by os_thread_create */
+{
+ ut_a(!srv_read_only_mode);
+
+ srv_dict_stats_thread_active = TRUE;
+
+ while (!SHUTTING_DOWN()) {
+
+ /* Wake up periodically even if not signaled. This is
+ because we may lose an event - if the below call to
+ dict_stats_process_entry_from_recalc_pool() puts the entry back
+ in the list, the os_event_set() will be lost by the subsequent
+ os_event_reset(). */
+ os_event_wait_time(
+ dict_stats_event, MIN_RECALC_INTERVAL * 1000000);
+
+ if (SHUTTING_DOWN()) {
+ break;
+ }
+
+ dict_stats_process_entry_from_recalc_pool();
+
+ os_event_reset(dict_stats_event);
+ }
+
+ srv_dict_stats_thread_active = FALSE;
+
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit instead of return(). */
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
+/* @} */
+
+/* vim: set foldmethod=marker foldmarker=@{,@}: */
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 2e6835fe0c0..a89875352c6 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -25,6 +25,9 @@ Created 10/25/1995 Heikki Tuuri
#include "fil0fil.h"
+#include <debug_sync.h>
+#include <my_dbug.h>
+
#include "mem0mem.h"
#include "hash0hash.h"
#include "os0file.h"
@@ -41,7 +44,7 @@ Created 10/25/1995 Heikki Tuuri
#include "page0page.h"
#include "page0zip.h"
#include "trx0sys.h"
-#include "buf0rea.h"
+#include "row0mysql.h"
#ifndef UNIV_HOTBACKUP
# include "buf0lru.h"
# include "ibuf0ibuf.h"
@@ -138,7 +141,7 @@ UNIV_INTERN mysql_pfs_key_t fil_space_latch_key;
#endif /* UNIV_PFS_RWLOCK */
/** File node of a tablespace or the log data space */
-struct fil_node_struct {
+struct fil_node_t {
fil_space_t* space; /*!< backpointer to the space where this node
belongs */
char* name; /*!< path to the file */
@@ -172,11 +175,11 @@ struct fil_node_struct {
ulint magic_n;/*!< FIL_NODE_MAGIC_N */
};
-/** Value of fil_node_struct::magic_n */
+/** Value of fil_node_t::magic_n */
#define FIL_NODE_MAGIC_N 89389
/** Tablespace or log data space: let us call them by a common name space */
-struct fil_space_struct {
+struct fil_space_t {
char* name; /*!< space name = the path to the first file in
it */
ulint id; /*!< space id */
@@ -215,7 +218,8 @@ struct fil_space_struct {
last incomplete megabytes in data files may be
ignored if space == 0 */
ulint flags; /*!< tablespace flags; see
- fsp_flags_validate(), fsp_flags_get_zip_size() */
+ fsp_flags_is_valid(),
+ fsp_flags_get_zip_size() */
ulint n_reserved_extents;
/*!< number of reserved free extents for
ongoing operations like B-tree page split */
@@ -238,26 +242,23 @@ struct fil_space_struct {
UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
/*!< list of spaces with at least one unflushed
file we have written to */
- ibool is_in_unflushed_spaces; /*!< TRUE if this space is
- currently in unflushed_spaces */
+ bool is_in_unflushed_spaces;
+ /*!< true if this space is currently in
+ unflushed_spaces */
UT_LIST_NODE_T(fil_space_t) space_list;
/*!< list of all spaces */
ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
};
-/** Value of fil_space_struct::magic_n */
+/** Value of fil_space_t::magic_n */
#define FIL_SPACE_MAGIC_N 89472
-/** The tablespace memory cache */
-typedef struct fil_system_struct fil_system_t;
-
/** The tablespace memory cache; also the totality of logs (the log
data space) is stored here; below we talk about tablespaces, but also
the ib_logfiles form a 'space' and it is handled here */
-
-struct fil_system_struct {
+struct fil_system_t {
#ifndef UNIV_HOTBACKUP
- mutex_t mutex; /*!< The mutex protecting the cache */
+ ib_mutex_t mutex; /*!< The mutex protecting the cache */
#endif /* !UNIV_HOTBACKUP */
hash_table_t* spaces; /*!< The hash table of spaces in the
system; they are hashed on the space
@@ -313,7 +314,17 @@ initialized. */
static fil_system_t* fil_system = NULL;
/** Determine if (i) is a user tablespace id or not. */
-# define fil_is_user_tablespace_id(i) ((i) > srv_undo_tablespaces)
+# define fil_is_user_tablespace_id(i) ((i) > srv_undo_tablespaces_open)
+
+/** Determine if user has explicitly disabled fsync(). */
+#ifndef __WIN__
+# define fil_buffering_disabled(s) \
+ ((s)->purpose == FIL_TABLESPACE \
+ && srv_unix_file_flush_method \
+ == SRV_UNIX_O_DIRECT_NO_FSYNC)
+#else /* __WIN__ */
+# define fil_buffering_disabled(s) (0)
+#endif /* __WIN__ */
#ifdef UNIV_DEBUG
/** Try fil_validate() every this many times */
@@ -384,16 +395,6 @@ fil_node_complete_io(
the node as modified if
type == OS_FILE_WRITE */
/*******************************************************************//**
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache.
-@return space id, ULINT_UNDEFINED if not found */
-static
-ulint
-fil_get_space_id_for_table(
-/*=======================*/
- const char* name); /*!< in: table name in the standard
- 'databasename/tablename' format */
-/*******************************************************************//**
Frees a space object from the tablespace memory cache. Closes the files in
the chain but does not delete them. There must not be any pending i/o's or
flushes on the files.
@@ -412,7 +413,7 @@ calculating the byte offset within a space.
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
i/o on a tablespace which does not exist */
UNIV_INLINE
-ulint
+dberr_t
fil_read(
/*=====*/
ibool sync, /*!< in: TRUE if synchronous aio is desired */
@@ -441,7 +442,7 @@ calculating the byte offset within a space.
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
i/o on a tablespace which does not exist */
UNIV_INLINE
-ulint
+dberr_t
fil_write(
/*======*/
ibool sync, /*!< in: TRUE if synchronous aio is desired */
@@ -459,6 +460,8 @@ fil_write(
void* message) /*!< in: message for aio handler if non-sync
aio used, else ignored */
{
+ ut_ad(!srv_read_only_mode);
+
return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
byte_offset, len, buf, message));
}
@@ -592,9 +595,9 @@ fil_space_get_type(
/**********************************************************************//**
Checks if all the file nodes in a space are flushed. The caller must hold
the fil_system mutex.
-@return TRUE if all are flushed */
+@return true if all are flushed */
static
-ibool
+bool
fil_space_is_flushed(
/*=================*/
fil_space_t* space) /*!< in: space */
@@ -608,19 +611,21 @@ fil_space_is_flushed(
while (node) {
if (node->modification_counter > node->flush_counter) {
- return(FALSE);
+ ut_ad(!fil_buffering_disabled(space));
+ return(false);
}
node = UT_LIST_GET_NEXT(chain, node);
}
- return(TRUE);
+ return(true);
}
/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed. */
+Appends a new file to the chain of files of a space. File must be closed.
+@return pointer to the file name, or NULL on error */
UNIV_INTERN
-void
+char*
fil_node_create(
/*============*/
const char* name, /*!< in: file name (file must be closed) */
@@ -663,7 +668,7 @@ fil_node_create(
mutex_exit(&fil_system->mutex);
- return;
+ return(NULL);
}
space->size += size;
@@ -678,6 +683,8 @@ fil_node_create(
}
mutex_exit(&fil_system->mutex);
+
+ return(node->name);
}
/********************************************************************//**
@@ -718,7 +725,7 @@ fil_node_open_file(
OS_FILE_READ_ONLY, &success);
if (!success) {
/* The following call prints an error message */
- os_file_get_last_error(TRUE);
+ os_file_get_last_error(true);
ut_print_timestamp(stderr);
@@ -798,9 +805,9 @@ fil_node_open_file(
!= page_size)) {
fprintf(stderr,
"InnoDB: Error: tablespace file %s"
- " has page size %lx\n"
+ " has page size 0x%lx\n"
"InnoDB: but the data dictionary"
- " expects page size %lx!\n",
+ " expects page size 0x%lx!\n",
node->name, flags,
fsp_flags_get_page_size(space->flags));
@@ -809,9 +816,9 @@ fil_node_open_file(
if (UNIV_UNLIKELY(space->flags != flags)) {
fprintf(stderr,
- "InnoDB: Error: table flags are %lx"
+ "InnoDB: Error: table flags are 0x%lx"
" in the data dictionary\n"
- "InnoDB: but the flags in file %s are %lx!\n",
+ "InnoDB: but the flags in file %s are 0x%lx!\n",
space->flags, node->name, flags);
ut_error;
@@ -971,6 +978,7 @@ fil_try_to_close_file_in_LRU(
", because mod_count %ld != fl_count %ld\n",
(long) node->modification_counter,
(long) node->flush_counter);
+
}
if (node->being_extended) {
@@ -1143,10 +1151,15 @@ fil_node_free(
node->modification_counter = node->flush_counter;
- if (space->is_in_unflushed_spaces
- && fil_space_is_flushed(space)) {
+ if (fil_buffering_disabled(space)) {
+
+ ut_ad(!space->is_in_unflushed_spaces);
+ ut_ad(fil_space_is_flushed(space));
- space->is_in_unflushed_spaces = FALSE;
+ } else if (space->is_in_unflushed_spaces
+ && fil_space_is_flushed(space)) {
+
+ space->is_in_unflushed_spaces = false;
UT_LIST_REMOVE(unflushed_spaces,
system->unflushed_spaces,
@@ -1215,82 +1228,50 @@ fil_space_create(
{
fil_space_t* space;
- fsp_flags_validate(flags);
-
-try_again:
- /*printf(
- "InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
- purpose);*/
+ DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
ut_a(fil_system);
- ut_a(name);
+ ut_a(fsp_flags_is_valid(flags));
- mutex_enter(&fil_system->mutex);
+ /* Look for a matching tablespace and if found free it. */
+ do {
+ mutex_enter(&fil_system->mutex);
- space = fil_space_get_by_name(name);
+ space = fil_space_get_by_name(name);
- if (UNIV_LIKELY_NULL(space)) {
- ibool success;
- ulint namesake_id;
+ if (space != 0) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Tablespace '%s' exists in the cache "
+ "with id %lu", name, (ulong) id);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: trying to init to the"
- " tablespace memory cache\n"
- "InnoDB: a tablespace %lu of name ", (ulong) id);
- ut_print_filename(stderr, name);
- fprintf(stderr, ",\n"
- "InnoDB: but a tablespace %lu of the same name\n"
- "InnoDB: already exists in the"
- " tablespace memory cache!\n",
- (ulong) space->id);
+ if (id == 0 || purpose != FIL_TABLESPACE) {
- if (id == 0 || purpose != FIL_TABLESPACE) {
+ mutex_exit(&fil_system->mutex);
- mutex_exit(&fil_system->mutex);
+ return(FALSE);
+ }
- return(FALSE);
- }
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Freeing existing tablespace '%s' entry "
+ "from the cache with id %lu",
+ name, (ulong) id);
- fprintf(stderr,
- "InnoDB: We assume that InnoDB did a crash recovery,"
- " and you had\n"
- "InnoDB: an .ibd file for which the table"
- " did not exist in the\n"
- "InnoDB: InnoDB internal data dictionary in the"
- " ibdata files.\n"
- "InnoDB: We assume that you later removed the"
- " .ibd and .frm files,\n"
- "InnoDB: and are now trying to recreate the table."
- " We now remove the\n"
- "InnoDB: conflicting tablespace object"
- " from the memory cache and try\n"
- "InnoDB: the init again.\n");
-
- namesake_id = space->id;
-
- success = fil_space_free(namesake_id, FALSE);
- ut_a(success);
+ ibool success = fil_space_free(space->id, FALSE);
+ ut_a(success);
- mutex_exit(&fil_system->mutex);
+ mutex_exit(&fil_system->mutex);
+ }
- goto try_again;
- }
+ } while (space != 0);
space = fil_space_get_by_id(id);
- if (UNIV_LIKELY_NULL(space)) {
- fprintf(stderr,
- "InnoDB: Error: trying to add tablespace %lu"
- " of name ", (ulong) id);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: to the tablespace memory cache,"
- " but tablespace\n"
- "InnoDB: %lu of name ", (ulong) space->id);
- ut_print_filename(stderr, space->name);
- fputs(" already exists in the tablespace\n"
- "InnoDB: memory cache!\n", stderr);
+ if (space != 0) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Trying to add tablespace '%s' with id %lu "
+ "to the tablespace memory cache, but tablespace '%s' "
+ "with id %lu already exists in the cache!",
+ name, (ulong) id, space->name, (ulong) space->id);
mutex_exit(&fil_system->mutex);
@@ -1306,15 +1287,15 @@ try_again:
space->tablespace_version = fil_system->tablespace_version;
space->mark = FALSE;
- if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on)
- && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) {
+ if (purpose == FIL_TABLESPACE && !recv_recovery_on
+ && id > fil_system->max_assigned_id) {
+
if (!fil_system->space_id_reuse_warned) {
fil_system->space_id_reuse_warned = TRUE;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: allocated tablespace %lu,"
- " old maximum was %lu\n",
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Allocated tablespace %lu, old maximum "
+ "was %lu",
(ulong) id,
(ulong) fil_system->max_assigned_id);
}
@@ -1333,7 +1314,7 @@ try_again:
HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
ut_fold_string(name), space);
- space->is_in_unflushed_spaces = FALSE;
+ space->is_in_unflushed_spaces = false;
UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
@@ -1418,7 +1399,6 @@ fil_space_free(
{
fil_space_t* space;
fil_space_t* fnamespace;
- fil_node_t* fil_node;
ut_ad(mutex_own(&fil_system->mutex));
@@ -1444,7 +1424,9 @@ fil_space_free(
ut_fold_string(space->name), space);
if (space->is_in_unflushed_spaces) {
- space->is_in_unflushed_spaces = FALSE;
+
+ ut_ad(!fil_buffering_disabled(space));
+ space->is_in_unflushed_spaces = false;
UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
space);
@@ -1455,12 +1437,11 @@ fil_space_free(
ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
ut_a(0 == space->n_pending_flushes);
- fil_node = UT_LIST_GET_FIRST(space->chain);
+ for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
+ fil_node != NULL;
+ fil_node = UT_LIST_GET_FIRST(space->chain)) {
- while (fil_node != NULL) {
fil_node_free(fil_node, fil_system, space);
-
- fil_node = UT_LIST_GET_FIRST(space->chain);
}
ut_a(0 == UT_LIST_GET_LEN(space->chain));
@@ -1478,34 +1459,30 @@ fil_space_free(
}
/*******************************************************************//**
-Returns the size of the space in pages. The tablespace must be cached in the
-memory cache.
-@return space size, 0 if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_size(
-/*===============*/
+Returns a pointer to the file_space_t that is in the memory cache
+associated with a space id. The caller must lock fil_system->mutex.
+@return file_space_t pointer, NULL if space not found */
+UNIV_INLINE
+fil_space_t*
+fil_space_get_space(
+/*================*/
ulint id) /*!< in: space id */
{
- fil_node_t* node;
fil_space_t* space;
- ulint size;
+ fil_node_t* node;
ut_ad(fil_system);
- fil_mutex_enter_and_prepare_for_io(id);
-
space = fil_space_get_by_id(id);
-
if (space == NULL) {
- mutex_exit(&fil_system->mutex);
-
- return(0);
+ return(NULL);
}
if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
ut_a(id != 0);
+ /* The following code must change when InnoDB supports
+ multiple datafiles per tablespace. */
ut_a(1 == UT_LIST_GET_LEN(space->chain));
node = UT_LIST_GET_FIRST(space->chain);
@@ -1518,7 +1495,69 @@ fil_space_get_size(
fil_node_complete_io(node, fil_system, OS_FILE_READ);
}
- size = space->size;
+ return(space);
+}
+
+/*******************************************************************//**
+Returns the path from the first fil_node_t found for the space ID sent.
+The caller is responsible for freeing the memory allocated here for the
+value returned.
+@return own: A copy of fil_node_t::path, NULL if space ID is zero
+or not found. */
+UNIV_INTERN
+char*
+fil_space_get_first_path(
+/*=====================*/
+ ulint id) /*!< in: space id */
+{
+ fil_space_t* space;
+ fil_node_t* node;
+ char* path;
+
+ ut_ad(fil_system);
+ ut_a(id);
+
+ fil_mutex_enter_and_prepare_for_io(id);
+
+ space = fil_space_get_space(id);
+
+ if (space == NULL) {
+ mutex_exit(&fil_system->mutex);
+
+ return(NULL);
+ }
+
+ ut_ad(mutex_own(&fil_system->mutex));
+
+ node = UT_LIST_GET_FIRST(space->chain);
+
+ path = mem_strdup(node->name);
+
+ mutex_exit(&fil_system->mutex);
+
+ return(path);
+}
+
+/*******************************************************************//**
+Returns the size of the space in pages. The tablespace must be cached in the
+memory cache.
+@return space size, 0 if space not found */
+UNIV_INTERN
+ulint
+fil_space_get_size(
+/*===============*/
+ ulint id) /*!< in: space id */
+{
+ fil_space_t* space;
+ ulint size;
+
+ ut_ad(fil_system);
+
+ fil_mutex_enter_and_prepare_for_io(id);
+
+ space = fil_space_get_space(id);
+
+ size = space ? space->size : 0;
mutex_exit(&fil_system->mutex);
@@ -1535,19 +1574,18 @@ fil_space_get_flags(
/*================*/
ulint id) /*!< in: space id */
{
- fil_node_t* node;
fil_space_t* space;
ulint flags;
ut_ad(fil_system);
- if (UNIV_UNLIKELY(!id)) {
+ if (!id) {
return(0);
}
fil_mutex_enter_and_prepare_for_io(id);
- space = fil_space_get_by_id(id);
+ space = fil_space_get_space(id);
if (space == NULL) {
mutex_exit(&fil_system->mutex);
@@ -1555,21 +1593,6 @@ fil_space_get_flags(
return(ULINT_UNDEFINED);
}
- if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
- ut_a(id != 0);
-
- ut_a(1 == UT_LIST_GET_LEN(space->chain));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- /* It must be a single-table tablespace and we have not opened
- the file yet; the following calls will open it and update the
- size fields */
-
- fil_node_prepare_for_io(node, fil_system, space);
- fil_node_complete_io(node, fil_system, OS_FILE_READ);
- }
-
flags = space->flags;
mutex_exit(&fil_system->mutex);
@@ -1744,6 +1767,49 @@ fil_close_all_files(void)
}
/*******************************************************************//**
+Closes the redo log files. There must not be any pending i/o's or not
+flushed modifications in the files. */
+UNIV_INTERN
+void
+fil_close_log_files(
+/*================*/
+ bool free) /*!< in: whether to free the memory object */
+{
+ fil_space_t* space;
+
+ mutex_enter(&fil_system->mutex);
+
+ space = UT_LIST_GET_FIRST(fil_system->space_list);
+
+ while (space != NULL) {
+ fil_node_t* node;
+ fil_space_t* prev_space = space;
+
+ if (space->purpose != FIL_LOG) {
+ space = UT_LIST_GET_NEXT(space_list, space);
+ continue;
+ }
+
+ for (node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+
+ if (node->open) {
+ fil_node_close_file(node, fil_system);
+ }
+ }
+
+ space = UT_LIST_GET_NEXT(space_list, space);
+
+ if (free) {
+ fil_space_free(prev_space->id, FALSE);
+ }
+ }
+
+ mutex_exit(&fil_system->mutex);
+}
+
+/*******************************************************************//**
Sets the max tablespace id counter if the given number is bigger than the
previous value. */
UNIV_INTERN
@@ -1773,8 +1839,8 @@ fil_set_max_space_id_if_bigger(
Writes the flushed lsn and the latest archived log number to the page header
of the first page of a data file of the system tablespace (space 0),
which is uncompressed. */
-static
-ulint
+static __attribute__((warn_unused_result))
+dberr_t
fil_write_lsn_and_arch_no_to_file(
/*==============================*/
ulint space, /*!< in: space to write to */
@@ -1786,19 +1852,23 @@ fil_write_lsn_and_arch_no_to_file(
{
byte* buf1;
byte* buf;
+ dberr_t err;
buf1 = static_cast<byte*>(mem_alloc(2 * UNIV_PAGE_SIZE));
buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
- fil_read(TRUE, space, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
-
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
+ err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
+ UNIV_PAGE_SIZE, buf, NULL);
+ if (err == DB_SUCCESS) {
+ mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
- fil_write(TRUE, space, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
+ err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
+ UNIV_PAGE_SIZE, buf, NULL);
+ }
mem_free(buf1);
- return(DB_SUCCESS);
+ return(err);
}
/****************************************************************//**
@@ -1806,7 +1876,7 @@ Writes the flushed lsn and the latest archived log number to the page
header of the first page of each data file in the system tablespace.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
fil_write_flushed_lsn_to_data_files(
/*================================*/
lsn_t lsn, /*!< in: lsn to write */
@@ -1814,7 +1884,7 @@ fil_write_flushed_lsn_to_data_files(
{
fil_space_t* space;
fil_node_t* node;
- ulint err;
+ dberr_t err;
mutex_enter(&fil_system->mutex);
@@ -1830,7 +1900,6 @@ fil_write_flushed_lsn_to_data_files(
if (space->purpose == FIL_TABLESPACE
&& !fil_is_user_tablespace_id(space->id)) {
-
ulint sum_of_sizes = 0;
for (node = UT_LIST_GET_FIRST(space->chain);
@@ -1872,6 +1941,7 @@ fil_read_first_page(
parameters below already
contain sensible data */
ulint* flags, /*!< out: tablespace flags */
+ ulint* space_id, /*!< out: tablespace ID */
#ifdef UNIV_LOG_ARCHIVE
ulint* min_arch_log_no, /*!< out: min of archived
log numbers in data files */
@@ -1897,7 +1967,9 @@ fil_read_first_page(
*flags = fsp_header_get_flags(page);
- flushed_lsn = mach_read_from_8(page+ FIL_PAGE_FILE_FLUSH_LSN);
+ *space_id = fsp_header_get_space_id(page);
+
+ flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
ut_free(buf);
@@ -2102,6 +2174,12 @@ created does not exist, then we create the directory, too.
Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
datadir that we should use in replaying the file operations.
+
+InnoDB recovery does not replay these fully since it always sets the space id
+to zero. But ibbackup does replay them. TODO: If remote tablespaces are used,
+ibbackup will only create tables in the default directory since MLOG_FILE_CREATE
+and MLOG_FILE_CREATE2 only know the tablename, not the path.
+
@return end of log record, or NULL if the record was not completely
contained between ptr and end_ptr */
UNIV_INTERN
@@ -2197,7 +2275,9 @@ fil_op_log_parse_or_replay(
switch (type) {
case MLOG_FILE_DELETE:
if (fil_tablespace_exists_in_mem(space_id)) {
- ut_a(fil_delete_tablespace(space_id));
+ dberr_t err = fil_delete_tablespace(
+ space_id, BUF_REMOVE_FLUSH_NO_WRITE);
+ ut_a(err == DB_SUCCESS);
}
break;
@@ -2218,10 +2298,10 @@ fil_op_log_parse_or_replay(
if (fil_get_space_id_for_table(new_name)
== ULINT_UNDEFINED) {
- /* We do not care of the old name, that is
- why we pass NULL as the first argument */
+ /* We do not care about the old name, that
+ is why we pass NULL as the first argument. */
if (!fil_rename_tablespace(NULL, space_id,
- new_name)) {
+ new_name, NULL)) {
ut_error;
}
}
@@ -2239,12 +2319,14 @@ fil_op_log_parse_or_replay(
} else if (log_flags & MLOG_FILE_FLAG_TEMP) {
/* Temporary table, do nothing */
} else {
+ const char* path = NULL;
+
/* Create the database directory for name, if it does
not exist yet */
fil_create_directory_for_tablename(name);
if (fil_create_new_single_table_tablespace(
- space_id, name, FALSE, flags,
+ space_id, name, path, flags,
DICT_TF2_USE_TABLESPACE,
FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
ut_error;
@@ -2261,118 +2343,271 @@ fil_op_log_parse_or_replay(
}
/*******************************************************************//**
-Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_delete_tablespace(
-/*==================*/
- ulint id) /*!< in: space id */
+Allocates a file name for the EXPORT/IMPORT config file name. The
+string must be freed by caller with mem_free().
+@return own: file name */
+static
+char*
+fil_make_cfg_name(
+/*==============*/
+ const char* filepath) /*!< in: .ibd file name */
{
- ibool success;
- fil_space_t* space;
- fil_node_t* node;
- ulint count = 0;
- char* path;
+ char* cfg_name;
- ut_a(id != 0);
-stop_new_ops:
- mutex_enter(&fil_system->mutex);
+ /* Create a temporary file path by replacing the .ibd suffix
+ with .cfg. */
- space = fil_space_get_by_id(id);
+ ut_ad(strlen(filepath) > 4);
- if (space != NULL) {
- space->stop_new_ops = TRUE;
+ cfg_name = mem_strdup(filepath);
+ ut_snprintf(cfg_name + strlen(cfg_name) - 3, 4, "cfg");
+ return(cfg_name);
+}
- if (space->n_pending_ops == 0) {
- mutex_exit(&fil_system->mutex);
+/*******************************************************************//**
+Check for change buffer merges.
+@return 0 if no merges else count + 1. */
+static
+ulint
+fil_ibuf_check_pending_ops(
+/*=======================*/
+ fil_space_t* space, /*!< in/out: Tablespace to check */
+ ulint count) /*!< in: number of attempts so far */
+{
+ ut_ad(mutex_own(&fil_system->mutex));
- count = 0;
+ if (space != 0 && space->n_pending_ops != 0) {
- goto try_again;
- } else {
- if (count > 5000) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: trying to"
- " delete tablespace ", stderr);
- ut_print_filename(stderr, space->name);
- fprintf(stderr, ",\n"
- "InnoDB: but there are %lu pending"
- " operations (most likely ibuf merges)"
- " on it.\n"
- "InnoDB: Loop %lu.\n",
- (ulong) space->n_pending_ops,
- (ulong) count);
- }
+ if (count > 5000) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Trying to close/delete tablespace "
+ "'%s' but there are %lu pending change "
+ "buffer merges on it.",
+ space->name,
+ (ulong) space->n_pending_ops);
+ }
- mutex_exit(&fil_system->mutex);
+ return(count + 1);
+ }
- os_thread_sleep(20000);
- count++;
+ return(0);
+}
+
+/*******************************************************************//**
+Check for pending IO.
+@return 0 if no pending else count + 1. */
+static
+ulint
+fil_check_pending_io(
+/*=================*/
+ fil_space_t* space, /*!< in/out: Tablespace to check */
+ fil_node_t** node, /*!< out: Node in space list */
+ ulint count) /*!< in: number of attempts so far */
+{
+ ut_ad(mutex_own(&fil_system->mutex));
+ ut_a(space->n_pending_ops == 0);
+
+ /* The following code must change when InnoDB supports
+ multiple datafiles per tablespace. */
+ ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+
+ *node = UT_LIST_GET_FIRST(space->chain);
+
+ if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) {
+
+ ut_a(!(*node)->being_extended);
- goto stop_new_ops;
+ if (count > 1000) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Trying to close/delete tablespace '%s' "
+ "but there are %lu flushes "
+ " and %lu pending i/o's on it.",
+ space->name,
+ (ulong) space->n_pending_flushes,
+ (ulong) (*node)->n_pending);
}
+
+ return(count + 1);
}
- mutex_exit(&fil_system->mutex);
- count = 0;
+ return(0);
+}
+
+/*******************************************************************//**
+Check pending operations on a tablespace.
+@return DB_SUCCESS or error failure. */
+static
+dberr_t
+fil_check_pending_operations(
+/*=========================*/
+ ulint id, /*!< in: space id */
+ fil_space_t** space, /*!< out: tablespace instance in memory */
+ char** path) /*!< out/own: tablespace path */
+{
+ ulint count = 0;
+
+ ut_a(id != TRX_SYS_SPACE);
+ ut_ad(space);
+
+ *space = 0;
-try_again:
mutex_enter(&fil_system->mutex);
+ fil_space_t* sp = fil_space_get_by_id(id);
+ if (sp) {
+ sp->stop_new_ops = TRUE;
+ }
+ mutex_exit(&fil_system->mutex);
- space = fil_space_get_by_id(id);
+ /* Check for pending change buffer merges. */
- if (space == NULL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: cannot delete tablespace %lu\n"
- "InnoDB: because it is not found in the"
- " tablespace memory cache.\n",
- (ulong) id);
+ do {
+ mutex_enter(&fil_system->mutex);
+
+ sp = fil_space_get_by_id(id);
+
+ count = fil_ibuf_check_pending_ops(sp, count);
mutex_exit(&fil_system->mutex);
- return(FALSE);
- }
+ if (count > 0) {
+ os_thread_sleep(20000);
+ }
- ut_a(space->stop_new_ops);
- ut_a(space->n_pending_ops == 0);
+ } while (count > 0);
- /* TODO: The following code must change when InnoDB supports
- multiple datafiles per tablespace. */
- ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+ /* Check for pending IO. */
- node = UT_LIST_GET_FIRST(space->chain);
+ *path = 0;
- if (space->n_pending_flushes > 0 || node->n_pending > 0
- || node->being_extended) {
- if (count > 1000) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: trying to"
- " delete tablespace ", stderr);
- ut_print_filename(stderr, space->name);
- fprintf(stderr, ",\n"
- "InnoDB: but there are %lu flushes"
- " and %lu pending i/o's on it\n"
- "InnoDB: Or it is being extended\n"
- "InnoDB: Loop %lu.\n",
- (ulong) space->n_pending_flushes,
- (ulong) node->n_pending,
- (ulong) count);
+ do {
+ mutex_enter(&fil_system->mutex);
+
+ sp = fil_space_get_by_id(id);
+
+ if (sp == NULL) {
+ mutex_exit(&fil_system->mutex);
+ return(DB_TABLESPACE_NOT_FOUND);
+ }
+
+ fil_node_t* node;
+
+ count = fil_check_pending_io(sp, &node, count);
+
+ if (count == 0) {
+ *path = mem_strdup(node->name);
}
+
mutex_exit(&fil_system->mutex);
- os_thread_sleep(20000);
- count++;
+ if (count > 0) {
+ os_thread_sleep(20000);
+ }
+
+ } while (count > 0);
+
+ ut_ad(sp);
+
+ *space = sp;
+ return(DB_SUCCESS);
+}
+
+/*******************************************************************//**
+Closes a single-table tablespace. The tablespace must be cached in the
+memory cache. Free all pages used by the tablespace.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+dberr_t
+fil_close_tablespace(
+/*=================*/
+ trx_t* trx, /*!< in/out: Transaction covering the close */
+ ulint id) /*!< in: space id */
+{
+ char* path = 0;
+ fil_space_t* space = 0;
+
+ ut_a(id != TRX_SYS_SPACE);
+
+ dberr_t err = fil_check_pending_operations(id, &space, &path);
- goto try_again;
+ if (err != DB_SUCCESS) {
+ return(err);
}
- path = mem_strdup(node->name);
+ ut_a(space);
+ ut_a(path != 0);
+
+ rw_lock_x_lock(&space->latch);
+
+#ifndef UNIV_HOTBACKUP
+ /* Invalidate in the buffer pool all pages belonging to the
+ tablespace. Since we have set space->stop_new_ops = TRUE, readahead
+ or ibuf merge can no longer read more pages of this tablespace to the
+ buffer pool. Thus we can clean the tablespace out of the buffer pool
+ completely and permanently. The flag stop_new_ops also prevents
+ fil_flush() from being applied to this tablespace. */
+
+ buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx);
+#endif
+ mutex_enter(&fil_system->mutex);
+
+ /* If the free is successful, the X lock will be released before
+ the space memory data structure is freed. */
+
+ if (!fil_space_free(id, TRUE)) {
+ rw_lock_x_unlock(&space->latch);
+ err = DB_TABLESPACE_NOT_FOUND;
+ } else {
+ err = DB_SUCCESS;
+ }
mutex_exit(&fil_system->mutex);
+ /* If it is a delete then also delete any generated files, otherwise
+ when we drop the database the remove directory will fail. */
+
+ char* cfg_name = fil_make_cfg_name(path);
+
+ os_file_delete_if_exists(cfg_name);
+
+ mem_free(path);
+ mem_free(cfg_name);
+
+ return(err);
+}
+
+/*******************************************************************//**
+Deletes a single-table tablespace. The tablespace must be cached in the
+memory cache.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+dberr_t
+fil_delete_tablespace(
+/*==================*/
+ ulint id, /*!< in: space id */
+ buf_remove_t buf_remove) /*!< in: specify the action to take
+ on the tables pages in the buffer
+ pool */
+{
+ char* path = 0;
+ fil_space_t* space = 0;
+
+ ut_a(id != TRX_SYS_SPACE);
+
+ dberr_t err = fil_check_pending_operations(id, &space, &path);
+
+ if (err != DB_SUCCESS) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot delete tablespace %lu because it is not "
+ "found in the tablespace memory cache.",
+ (ulong) id);
+
+ return(err);
+ }
+
+ ut_a(space);
+ ut_a(path != 0);
+
/* Important: We rely on the data dictionary mutex to ensure
that a race is not possible here. It should serialize the tablespace
drop/free. We acquire an X latch only to avoid a race condition
@@ -2407,9 +2642,22 @@ try_again:
To deal with potential read requests by checking the
::stop_new_ops flag in fil_io() */
- buf_LRU_invalidate_tablespace(id);
-#endif
- /* printf("Deleting tablespace %s id %lu\n", space->name, id); */
+ buf_LRU_flush_or_remove_pages(id, buf_remove, 0);
+
+#endif /* !UNIV_HOTBACKUP */
+
+ /* If it is a delete then also delete any generated files, otherwise
+ when we drop the database the remove directory will fail. */
+ {
+ char* cfg_name = fil_make_cfg_name(path);
+ os_file_delete_if_exists(cfg_name);
+ mem_free(cfg_name);
+ }
+
+ /* Delete the link file pointing to the ibd file we are deleting. */
+ if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) {
+ fil_delete_link_file(space->name);
+ }
mutex_enter(&fil_system->mutex);
@@ -2418,25 +2666,27 @@ try_again:
if (fil_space_get_by_id(id)) {
ut_a(space->n_pending_ops == 0);
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
- node = UT_LIST_GET_FIRST(space->chain);
+ fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
ut_a(node->n_pending == 0);
}
- success = fil_space_free(id, TRUE);
+ if (!fil_space_free(id, TRUE)) {
+ err = DB_TABLESPACE_NOT_FOUND;
+ }
mutex_exit(&fil_system->mutex);
- if (success) {
- success = os_file_delete(path);
-
- if (!success) {
- success = os_file_delete_if_exists(path);
- }
- } else {
+ if (err != DB_SUCCESS) {
rw_lock_x_unlock(&space->latch);
+ } else if (!os_file_delete(path) && !os_file_delete_if_exists(path)) {
+
+ /* Note: This is because we have removed the
+ tablespace instance from the cache. */
+
+ err = DB_IO_ERROR;
}
- if (success) {
+ if (err == DB_SUCCESS) {
#ifndef UNIV_HOTBACKUP
/* Write a log record about the deletion of the .ibd
file, so that ibbackup can replay it in the
@@ -2451,14 +2701,12 @@ try_again:
fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
mtr_commit(&mtr);
#endif
- mem_free(path);
-
- return(TRUE);
+ err = DB_SUCCESS;
}
mem_free(path);
- return(FALSE);
+ return(err);
}
/*******************************************************************//**
@@ -2490,36 +2738,49 @@ fil_tablespace_is_being_deleted(
/*******************************************************************//**
Discards a single-table tablespace. The tablespace must be cached in the
memory cache. Discarding is like deleting a tablespace, but
-1) we do not drop the table from the data dictionary;
-2) we remove all insert buffer entries for the tablespace immediately; in DROP
-TABLE they are only removed gradually in the background;
-3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had.
-@return TRUE if success */
+
+ 1. We do not drop the table from the data dictionary;
+
+ 2. We remove all insert buffer entries for the tablespace immediately;
+ in DROP TABLE they are only removed gradually in the background;
+
+ 3. Free all the pages in use by the tablespace.
+@return DB_SUCCESS or error */
UNIV_INTERN
-ibool
+dberr_t
fil_discard_tablespace(
/*===================*/
ulint id) /*!< in: space id */
{
- ibool success;
+ dberr_t err;
- success = fil_delete_tablespace(id);
+ switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) {
+ case DB_SUCCESS:
+ break;
- if (!success) {
- fprintf(stderr,
- "InnoDB: Warning: cannot delete tablespace %lu"
- " in DISCARD TABLESPACE.\n"
- "InnoDB: But let us remove the"
- " insert buffer entries for this tablespace.\n",
- (ulong) id);
+ case DB_IO_ERROR:
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "While deleting tablespace %lu in DISCARD TABLESPACE."
+ " File rename/delete failed: %s",
+ (ulong) id, ut_strerr(err));
+ break;
+
+ case DB_TABLESPACE_NOT_FOUND:
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Cannot delete tablespace %lu in DISCARD "
+ "TABLESPACE. %s",
+ (ulong) id, ut_strerr(err));
+ break;
+
+ default:
+ ut_error;
}
/* Remove all insert buffer entries for the tablespace */
ibuf_delete_for_discarded_space(id);
- return(success);
+ return(err);
}
#endif /* !UNIV_HOTBACKUP */
@@ -2575,30 +2836,27 @@ fil_rename_tablespace_in_mem(
Allocates a file name for a single-table tablespace. The string must be freed
by caller with mem_free().
@return own: file name */
-static
+UNIV_INTERN
char*
fil_make_ibd_name(
/*==============*/
- const char* name, /*!< in: table name or a dir path of a
- TEMPORARY table */
- ibool is_temp) /*!< in: TRUE if it is a dir path */
+ const char* name, /*!< in: table name or a dir path */
+ bool is_full_path) /*!< in: TRUE if it is a dir path */
{
char* filename;
ulint namelen = strlen(name);
ulint dirlen = strlen(fil_path_to_mysql_datadir);
+ ulint pathlen = dirlen + namelen + sizeof "/.ibd";
- filename = static_cast<char*>(
- mem_alloc(namelen + dirlen + sizeof "/.ibd"));
+ filename = static_cast<char*>(mem_alloc(pathlen));
- if (is_temp) {
+ if (is_full_path) {
memcpy(filename, name, namelen);
memcpy(filename + namelen, ".ibd", sizeof ".ibd");
} else {
- memcpy(filename, fil_path_to_mysql_datadir, dirlen);
- filename[dirlen] = '/';
+ ut_snprintf(filename, pathlen, "%s/%s.ibd",
+ fil_path_to_mysql_datadir, name);
- memcpy(filename + dirlen + 1, name, namelen);
- memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd");
}
srv_normalize_path_for_win(filename);
@@ -2607,6 +2865,31 @@ fil_make_ibd_name(
}
/*******************************************************************//**
+Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
+The string must be freed by caller with mem_free().
+@return own: file name */
+UNIV_INTERN
+char*
+fil_make_isl_name(
+/*==============*/
+ const char* name) /*!< in: table name */
+{
+ char* filename;
+ ulint namelen = strlen(name);
+ ulint dirlen = strlen(fil_path_to_mysql_datadir);
+ ulint pathlen = dirlen + namelen + sizeof "/.isl";
+
+ filename = static_cast<char*>(mem_alloc(pathlen));
+
+ ut_snprintf(filename, pathlen, "%s/%s.isl",
+ fil_path_to_mysql_datadir, name);
+
+ srv_normalize_path_for_win(filename);
+
+ return(filename);
+}
+
+/*******************************************************************//**
Renames a single-table tablespace. The tablespace must be cached in the
tablespace memory cache.
@return TRUE if success */
@@ -2614,14 +2897,19 @@ UNIV_INTERN
ibool
fil_rename_tablespace(
/*==================*/
- const char* old_name_in, /*!< in: old table name in the standard
- databasename/tablename format of
- InnoDB, or NULL if we do the rename
- based on the space id only */
+ const char* old_name_in, /*!< in: old table name in the
+ standard databasename/tablename
+ format of InnoDB, or NULL if we
+ do the rename based on the space
+ id only */
ulint id, /*!< in: space id */
- const char* new_name) /*!< in: new table name in the standard
- databasename/tablename format
- of InnoDB */
+ const char* new_name, /*!< in: new table name in the
+ standard databasename/tablename
+ format of InnoDB */
+ const char* new_path_in) /*!< in: new full datafile path
+ if the tablespace is remotely
+ located, or NULL if it is located
+ in the normal data directory. */
{
ibool success;
fil_space_t* space;
@@ -2651,14 +2939,14 @@ retry:
space = fil_space_get_by_id(id);
+ DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; );
+
if (space == NULL) {
- fprintf(stderr,
- "InnoDB: Error: cannot find space id %lu"
- " in the tablespace memory cache\n"
- "InnoDB: though the table ", (ulong) id);
- ut_print_filename(stderr,
- old_name_in ? old_name_in : not_given);
- fputs(" in a rename operation should have that id\n", stderr);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot find space id %lu in the tablespace "
+ "memory cache, though the table '%s' in a "
+ "rename operation should have that id.",
+ (ulong) id, old_name_in ? old_name_in : not_given);
mutex_exit(&fil_system->mutex);
return(FALSE);
@@ -2677,10 +2965,13 @@ retry:
space->stop_ios = TRUE;
+ /* The following code must change when InnoDB supports
+ multiple datafiles per tablespace. */
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
node = UT_LIST_GET_FIRST(space->chain);
- if (node->n_pending > 0 || node->n_pending_flushes > 0
+ if (node->n_pending > 0
+ || node->n_pending_flushes > 0
|| node->being_extended) {
/* There are pending i/o's or flushes or the file is
currently being extended, sleep for a while and
@@ -2713,24 +3004,31 @@ retry:
if (old_name_in) {
old_name = mem_strdup(old_name_in);
- old_path = fil_make_ibd_name(old_name, FALSE);
-
ut_a(strcmp(space->name, old_name) == 0);
- ut_a(strcmp(node->name, old_path) == 0);
} else {
old_name = mem_strdup(space->name);
- old_path = mem_strdup(node->name);
}
+ old_path = mem_strdup(node->name);
/* Rename the tablespace and the node in the memory cache */
- new_path = fil_make_ibd_name(new_name, FALSE);
+ new_path = new_path_in ? mem_strdup(new_path_in)
+ : fil_make_ibd_name(new_name, false);
+
success = fil_rename_tablespace_in_mem(
space, node, new_name, new_path);
if (success) {
+
+ DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
+ goto skip_second_rename; );
+
success = os_file_rename(
innodb_file_data_key, old_path, new_path);
+ DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
+skip_second_rename:
+ success = FALSE; );
+
if (!success) {
/* We have to revert the changes we made
to the tablespace memory cache */
@@ -2745,7 +3043,7 @@ retry:
mutex_exit(&fil_system->mutex);
#ifndef UNIV_HOTBACKUP
- if (success) {
+ if (success && !recv_recovery_on) {
mtr_t mtr;
mtr_start(&mtr);
@@ -2754,7 +3052,7 @@ retry:
&mtr);
mtr_commit(&mtr);
}
-#endif
+#endif /* !UNIV_HOTBACKUP */
mem_free(new_path);
mem_free(old_path);
@@ -2764,23 +3062,202 @@ retry:
}
/*******************************************************************//**
+Creates a new InnoDB Symbolic Link (ISL) file. It is always created
+under the 'datadir' of MySQL. The datadir is the directory of a
+running mysqld program. We can refer to it by simply using the path '.'.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_create_link_file(
+/*=================*/
+ const char* tablename, /*!< in: tablename */
+ const char* filepath) /*!< in: pathname of tablespace */
+{
+ os_file_t file;
+ ibool success;
+ dberr_t err = DB_SUCCESS;
+ char* link_filepath;
+ char* prev_filepath = fil_read_link_file(tablename);
+
+ ut_ad(!srv_read_only_mode);
+
+ if (prev_filepath) {
+ /* Truncate will call this with an existing
+ link file which contains the same filepath. */
+ if (0 == strcmp(prev_filepath, filepath)) {
+ mem_free(prev_filepath);
+ return(DB_SUCCESS);
+ }
+ mem_free(prev_filepath);
+ }
+
+ link_filepath = fil_make_isl_name(tablename);
+
+ file = os_file_create_simple_no_error_handling(
+ innodb_file_data_key, link_filepath,
+ OS_FILE_CREATE, OS_FILE_READ_WRITE, &success);
+
+ if (!success) {
+ /* The following call will print an error message */
+ ulint error = os_file_get_last_error(true);
+
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Cannot create file ", stderr);
+ ut_print_filename(stderr, link_filepath);
+ fputs(".\n", stderr);
+
+ if (error == OS_FILE_ALREADY_EXISTS) {
+ fputs("InnoDB: The link file: ", stderr);
+ ut_print_filename(stderr, filepath);
+ fputs(" already exists.\n", stderr);
+ err = DB_TABLESPACE_EXISTS;
+
+ } else if (error == OS_FILE_DISK_FULL) {
+ err = DB_OUT_OF_FILE_SPACE;
+
+ } else {
+ err = DB_ERROR;
+ }
+
+ /* file is not open, no need to close it. */
+ mem_free(link_filepath);
+ return(err);
+ }
+
+ if (!os_file_write(link_filepath, file, filepath, 0,
+ strlen(filepath))) {
+ err = DB_ERROR;
+ }
+
+ /* Close the file, we only need it at startup */
+ os_file_close(file);
+
+ mem_free(link_filepath);
+
+ return(err);
+}
+
+/*******************************************************************//**
+Deletes an InnoDB Symbolic Link (ISL) file. */
+UNIV_INTERN
+void
+fil_delete_link_file(
+/*=================*/
+ const char* tablename) /*!< in: name of table */
+{
+ char* link_filepath = fil_make_isl_name(tablename);
+
+ os_file_delete_if_exists(link_filepath);
+
+ mem_free(link_filepath);
+}
+
+/*******************************************************************//**
+Reads an InnoDB Symbolic Link (ISL) file.
+It is always created under the 'datadir' of MySQL. The name is of the
+form {databasename}/{tablename}. and the isl file is expected to be in a
+'{databasename}' directory called '{tablename}.isl'. The caller must free
+the memory of the null-terminated path returned if it is not null.
+@return own: filepath found in link file, NULL if not found. */
+UNIV_INTERN
+char*
+fil_read_link_file(
+/*===============*/
+ const char* name) /*!< in: tablespace name */
+{
+ char* filepath = NULL;
+ char* link_filepath;
+ FILE* file = NULL;
+
+ /* The .isl file is in the 'normal' tablespace location. */
+ link_filepath = fil_make_isl_name(name);
+
+ file = fopen(link_filepath, "r+b");
+
+ mem_free(link_filepath);
+
+ if (file) {
+ filepath = static_cast<char*>(mem_alloc(OS_FILE_MAX_PATH));
+
+ os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
+ fclose(file);
+
+ if (strlen(filepath)) {
+ /* Trim whitespace from end of filepath */
+ ulint lastch = strlen(filepath) - 1;
+ while (lastch > 4 && filepath[lastch] <= 0x20) {
+ filepath[lastch--] = 0x00;
+ }
+ srv_normalize_path_for_win(filepath);
+ }
+ }
+
+ return(filepath);
+}
+
+/*******************************************************************//**
+Opens a handle to the file linked to in an InnoDB Symbolic Link file.
+@return TRUE if remote linked tablespace file is found and opened. */
+UNIV_INTERN
+ibool
+fil_open_linked_file(
+/*===============*/
+ const char* tablename, /*!< in: database/tablename */
+ char** remote_filepath,/*!< out: remote filepath */
+ os_file_t* remote_file) /*!< out: remote file handle */
+
+{
+ ibool success;
+
+ *remote_filepath = fil_read_link_file(tablename);
+ if (*remote_filepath == NULL) {
+ return(FALSE);
+ }
+
+ /* The filepath provided is different from what was
+ found in the link file. */
+ *remote_file = os_file_create_simple_no_error_handling(
+ innodb_file_data_key, *remote_filepath,
+ OS_FILE_OPEN, OS_FILE_READ_ONLY,
+ &success);
+
+ if (!success) {
+ char* link_filepath = fil_make_isl_name(tablename);
+
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "A link file was found named '%s' "
+ "but the linked tablespace '%s' "
+ "could not be opened.",
+ link_filepath, *remote_filepath);
+
+ mem_free(link_filepath);
+ mem_free(*remote_filepath);
+ *remote_filepath = NULL;
+ }
+
+ return(success);
+}
+
+/*******************************************************************//**
Creates a new single-table tablespace to a database directory of MySQL.
Database directories are under the 'datadir' of MySQL. The datadir is the
directory of a running mysqld program. We can refer to it by simply the
path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
dir of the mysqld server.
+
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fil_create_new_single_table_tablespace(
/*===================================*/
ulint space_id, /*!< in: space id */
const char* tablename, /*!< in: the table name in the usual
databasename/tablename format
- of InnoDB, or a dir path to a temp
- table */
- ibool is_temp, /*!< in: TRUE if a table created with
- CREATE TEMPORARY TABLE */
+ of InnoDB */
+ const char* dir_path, /*!< in: NULL or a dir path */
ulint flags, /*!< in: tablespace flags */
ulint flags2, /*!< in: table flags2 */
ulint size) /*!< in: the initial size of the
@@ -2789,18 +3266,40 @@ fil_create_new_single_table_tablespace(
{
os_file_t file;
ibool ret;
- ulint err;
+ dberr_t err;
byte* buf2;
byte* page;
char* path;
ibool success;
+ /* TRUE if a table is created with CREATE TEMPORARY TABLE */
+ bool is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
+ bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
ut_a(space_id > 0);
+ ut_ad(!srv_read_only_mode);
ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
- fsp_flags_validate(flags);
+ ut_a(fsp_flags_is_valid(flags));
- path = fil_make_ibd_name(tablename, is_temp);
+ if (is_temp) {
+ /* Temporary table filepath */
+ ut_ad(dir_path);
+ path = fil_make_ibd_name(dir_path, true);
+ } else if (has_data_dir) {
+ ut_ad(dir_path);
+ path = os_file_make_remote_pathname(dir_path, tablename, "ibd");
+
+ /* Since this tablespace file will be created in a
+ remote directory, let's create the subdirectories
+ in the path, if they are not there already. */
+ success = os_file_create_subdirs_if_needed(path);
+ if (!success) {
+ err = DB_ERROR;
+ goto error_exit_3;
+ }
+ } else {
+ path = fil_make_ibd_name(tablename, false);
+ }
file = os_file_create(
innodb_file_data_key, path,
@@ -2810,58 +3309,44 @@ fil_create_new_single_table_tablespace(
&ret);
if (ret == FALSE) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error creating file ", stderr);
- ut_print_filename(stderr, path);
- fputs(".\n", stderr);
-
/* The following call will print an error message */
-
- err = os_file_get_last_error(TRUE);
-
- if (err == OS_FILE_ALREADY_EXISTS) {
- fputs("InnoDB: The file already exists though"
- " the corresponding table did not\n"
- "InnoDB: exist in the InnoDB data dictionary."
- " Have you moved InnoDB\n"
- "InnoDB: .ibd files around without using the"
- " SQL commands\n"
- "InnoDB: DISCARD TABLESPACE and"
- " IMPORT TABLESPACE, or did\n"
- "InnoDB: mysqld crash in the middle of"
- " CREATE TABLE? You can\n"
- "InnoDB: resolve the problem by"
- " removing the file ", stderr);
- ut_print_filename(stderr, path);
- fputs("\n"
- "InnoDB: under the 'datadir' of MySQL.\n",
- stderr);
-
- mem_free(path);
- return(DB_TABLESPACE_ALREADY_EXISTS);
+ ulint error = os_file_get_last_error(true);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot create file '%s'\n", path);
+
+ if (error == OS_FILE_ALREADY_EXISTS) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "The file '%s' already exists though the "
+ "corresponding table did not exist "
+ "in the InnoDB data dictionary. "
+ "Have you moved InnoDB .ibd files "
+ "around without using the SQL commands "
+ "DISCARD TABLESPACE and IMPORT TABLESPACE, "
+ "or did mysqld crash in the middle of "
+ "CREATE TABLE? "
+ "You can resolve the problem by removing "
+ "the file '%s' under the 'datadir' of MySQL.",
+ path, path);
+
+ err = DB_TABLESPACE_EXISTS;
+ goto error_exit_3;
}
- if (err == OS_FILE_DISK_FULL) {
-
- mem_free(path);
- return(DB_OUT_OF_FILE_SPACE);
+ if (error == OS_FILE_DISK_FULL) {
+ err = DB_OUT_OF_FILE_SPACE;
+ goto error_exit_3;
}
- mem_free(path);
- return(DB_ERROR);
+ err = DB_ERROR;
+ goto error_exit_3;
}
ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE);
if (!ret) {
err = DB_OUT_OF_FILE_SPACE;
-error_exit:
- os_file_close(file);
-error_exit2:
- os_file_delete(path);
-
- mem_free(path);
- return(err);
+ goto error_exit_2;
}
/* printf("Creating tablespace %s id %lu\n", path, space_id); */
@@ -2910,356 +3395,486 @@ error_exit2:
ut_free(buf2);
if (!ret) {
- fputs("InnoDB: Error: could not write the first page"
- " to tablespace ", stderr);
- ut_print_filename(stderr, path);
- putc('\n', stderr);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Could not write the first page to tablespace "
+ "'%s'", path);
+
err = DB_ERROR;
- goto error_exit;
+ goto error_exit_2;
}
ret = os_file_flush(file);
if (!ret) {
- fputs("InnoDB: Error: file flush of tablespace ", stderr);
- ut_print_filename(stderr, path);
- fputs(" failed\n", stderr);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "File flush of tablespace '%s' failed", path);
err = DB_ERROR;
- goto error_exit;
+ goto error_exit_2;
}
- os_file_close(file);
+ if (has_data_dir) {
+ /* Now that the IBD file is created, make the ISL file. */
+ err = fil_create_link_file(tablename, path);
+ if (err != DB_SUCCESS) {
+ goto error_exit_2;
+ }
+ }
success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE);
-
- if (!success) {
+ if (!success || !fil_node_create(path, size, space_id, FALSE)) {
err = DB_ERROR;
- goto error_exit2;
+ goto error_exit_1;
}
- fil_node_create(path, size, space_id, FALSE);
-
#ifndef UNIV_HOTBACKUP
{
mtr_t mtr;
+ ulint mlog_file_flag = 0;
+
+ if (is_temp) {
+ mlog_file_flag |= MLOG_FILE_FLAG_TEMP;
+ }
mtr_start(&mtr);
fil_op_write_log(flags
? MLOG_FILE_CREATE2
: MLOG_FILE_CREATE,
- space_id,
- is_temp ? MLOG_FILE_FLAG_TEMP : 0,
- flags,
+ space_id, mlog_file_flag, flags,
tablename, NULL, &mtr);
mtr_commit(&mtr);
}
#endif
+ err = DB_SUCCESS;
+
+ /* Error code is set. Cleanup the various variables used.
+ These labels reflect the order in which variables are assigned or
+ actions are done. */
+error_exit_1:
+ if (has_data_dir && err != DB_SUCCESS) {
+ fil_delete_link_file(tablename);
+ }
+error_exit_2:
+ os_file_close(file);
+ if (err != DB_SUCCESS) {
+ os_file_delete(path);
+ }
+error_exit_3:
mem_free(path);
- return(DB_SUCCESS);
+
+ return(err);
}
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
-It is possible, though very improbable, that the lsn's in the tablespace to be
-imported have risen above the current system lsn, if a lengthy purge, ibuf
-merge, or rollback was performed on a backup taken with ibbackup. If that is
-the case, reset page lsn's in the file. We assume that mysqld was shut down
-after it performed these cleanup operations on the .ibd file, so that it at
-the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
-first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_reset_too_high_lsns(
-/*====================*/
- const char* name, /*!< in: table name in the
- databasename/tablename format */
- lsn_t current_lsn) /*!< in: reset lsn's if the lsn stamped
- to FIL_PAGE_FILE_FLUSH_LSN in the
- first page is too high */
+Report information about a bad tablespace. */
+static
+void
+fil_report_bad_tablespace(
+/*======================*/
+ char* filepath, /*!< in: filepath */
+ ulint found_id, /*!< in: found space ID */
+ ulint found_flags, /*!< in: found flags */
+ ulint expected_id, /*!< in: expected space id */
+ ulint expected_flags) /*!< in: expected flags */
{
- os_file_t file;
- char* filepath;
- byte* page;
- byte* buf2;
- lsn_t flush_lsn;
- ulint space_id;
- os_offset_t file_size;
- os_offset_t offset;
- ulint zip_size;
- ibool success;
- page_zip_des_t page_zip;
-
- filepath = fil_make_ibd_name(name, FALSE);
-
- file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, filepath, OS_FILE_OPEN,
- OS_FILE_READ_WRITE, &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "In file '%s', tablespace id and flags are %lu and %lu, "
+ "but in the InnoDB data dictionary they are %lu and %lu. "
+ "Have you moved InnoDB .ibd files around without using the "
+ "commands DISCARD TABLESPACE and IMPORT TABLESPACE? "
+ "Please refer to "
+ REFMAN "innodb-troubleshooting-datadict.html "
+ "for how to resolve the issue.",
+ filepath, (ulong) found_id, (ulong) found_flags,
+ (ulong) expected_id, (ulong) expected_flags);
+}
- fputs(" InnoDB: Error: trying to open a table,"
- " but could not\n"
- "InnoDB: open the tablespace file ", stderr);
- ut_print_filename(stderr, filepath);
- fputs("!\n", stderr);
- mem_free(filepath);
+struct fsp_open_info {
+ ibool success; /*!< Has the tablespace been opened? */
+ ibool valid; /*!< Is the tablespace valid? */
+ os_file_t file; /*!< File handle */
+ char* filepath; /*!< File path to open */
+ lsn_t lsn; /*!< Flushed LSN from header page */
+ ulint id; /*!< Space ID */
+ ulint flags; /*!< Tablespace flags */
+#ifdef UNIV_LOG_ARCHIVE
+ ulint arch_log_no; /*!< latest archived log file number */
+#endif /* UNIV_LOG_ARCHIVE */
+};
- return(FALSE);
- }
+/********************************************************************//**
+Tries to open a single-table tablespace and optionally checks that the
+space id in it is correct. If this does not succeed, print an error message
+to the .err log. This function is used to open a tablespace when we start
+mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE.
- /* Read the first page of the tablespace */
+NOTE that we assume this operation is used either at the database startup
+or under the protection of the dictionary mutex, so that two users cannot
+race here. This operation does not leave the file associated with the
+tablespace open, but closes it after we have looked at the space id in it.
- buf2 = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+If the validate boolean is set, we read the first page of the file and
+check that the space id in the file is what we expect. We assume that
+this function runs much faster if no check is made, since accessing the
+file inode probably is much faster (the OS caches them) than accessing
+the first page of the file. This boolean may be initially FALSE, but if
+a remote tablespace is found it will be changed to true.
- success = os_file_read(file, page, 0, UNIV_PAGE_SIZE);
- if (!success) {
+If the fix_dict boolean is set, then it is safe to use an internal SQL
+statement to update the dictionary tables if they are incorrect.
- goto func_exit;
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_open_single_table_tablespace(
+/*=============================*/
+ bool validate, /*!< in: Do we validate tablespace? */
+ bool fix_dict, /*!< in: Can we fix the dictionary? */
+ ulint id, /*!< in: space id */
+ ulint flags, /*!< in: tablespace flags */
+ const char* tablename, /*!< in: table name in the
+ databasename/tablename format */
+ const char* path_in) /*!< in: tablespace filepath */
+{
+ dberr_t err = DB_SUCCESS;
+ bool dict_filepath_same_as_default = false;
+ bool link_file_found = false;
+ bool link_file_is_bad = false;
+ fsp_open_info def;
+ fsp_open_info dict;
+ fsp_open_info remote;
+ ulint tablespaces_found = 0;
+ ulint valid_tablespaces_found = 0;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex)));
+
+ if (!fsp_flags_is_valid(flags)) {
+ return(DB_CORRUPTION);
+ }
+
+ /* If the tablespace was relocated, we do not
+ compare the DATA_DIR flag */
+ ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR;
+
+ memset(&def, 0, sizeof(def));
+ memset(&dict, 0, sizeof(dict));
+ memset(&remote, 0, sizeof(remote));
+
+ /* Discover the correct filepath. We will always look for an ibd
+ in the default location. If it is remote, it should not be here. */
+ def.filepath = fil_make_ibd_name(tablename, false);
+
+ /* The path_in was read from SYS_DATAFILES. */
+ if (path_in) {
+ if (strcmp(def.filepath, path_in)) {
+ dict.filepath = mem_strdup(path_in);
+ /* possibility of multiple files. */
+ validate = true;
+ } else {
+ dict_filepath_same_as_default = true;
+ }
}
- /* We have to read the file flush lsn from the header of the file */
-
- flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
+ link_file_found = fil_open_linked_file(
+ tablename, &remote.filepath, &remote.file);
+ remote.success = link_file_found;
+ if (remote.success) {
+ /* possibility of multiple files. */
+ validate = true;
+ tablespaces_found++;
+
+ /* A link file was found. MySQL does not allow a DATA
+ DIRECTORY to be be the same as the default filepath. */
+ ut_a(strcmp(def.filepath, remote.filepath));
+
+ /* If there was a filepath found in SYS_DATAFILES,
+ we hope it was the same as this remote.filepath found
+ in the ISL file. */
+ if (dict.filepath
+ && (0 == strcmp(dict.filepath, remote.filepath))) {
+ remote.success = FALSE;
+ os_file_close(remote.file);
+ mem_free(remote.filepath);
+ remote.filepath = NULL;
+ tablespaces_found--;
+ }
+ }
- if (current_lsn >= flush_lsn) {
- /* Ok */
- success = TRUE;
+ /* Attempt to open the tablespace at other possible filepaths. */
+ if (dict.filepath) {
+ dict.file = os_file_create_simple_no_error_handling(
+ innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
+ OS_FILE_READ_ONLY, &dict.success);
+ if (dict.success) {
+ /* possibility of multiple files. */
+ validate = true;
+ tablespaces_found++;
+ }
+ }
- goto func_exit;
+ /* Always look for a file at the default location. */
+ ut_a(def.filepath);
+ def.file = os_file_create_simple_no_error_handling(
+ innodb_file_data_key, def.filepath, OS_FILE_OPEN,
+ OS_FILE_READ_ONLY, &def.success);
+ if (def.success) {
+ tablespaces_found++;
}
- space_id = fsp_header_get_space_id(page);
- zip_size = fsp_header_get_zip_size(page);
+ /* We have now checked all possible tablespace locations and
+ have a count of how many we found. If things are normal, we
+ only found 1. */
+ if (!validate && tablespaces_found == 1) {
+ goto skip_validate;
+ }
- page_zip_des_init(&page_zip);
- page_zip_set_size(&page_zip, zip_size);
- if (zip_size) {
- page_zip.data = page + UNIV_PAGE_SIZE;
+ /* Read the first page of the datadir tablespace, if found. */
+ if (def.success) {
+ fil_read_first_page(
+ def.file, FALSE, &def.flags, &def.id,
+#ifdef UNIV_LOG_ARCHIVE
+ &space_arch_log_no, &space_arch_log_no,
+#endif /* UNIV_LOG_ARCHIVE */
+ &def.lsn, &def.lsn);
+
+ /* Validate this single-table-tablespace with SYS_TABLES,
+ but do not compare the DATA_DIR flag, in case the
+ tablespace was relocated. */
+ ulint mod_def_flags = def.flags & ~FSP_FLAGS_MASK_DATA_DIR;
+ if (def.id == id && mod_def_flags == mod_flags) {
+ valid_tablespaces_found++;
+ def.valid = TRUE;
+ } else {
+ /* Do not use this tablespace. */
+ fil_report_bad_tablespace(
+ def.filepath, def.id,
+ def.flags, id, flags);
+ }
}
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Flush lsn in the tablespace file %lu"
- " to be imported\n"
- "InnoDB: is " LSN_PF ", which exceeds current"
- " system lsn " LSN_PF ".\n"
- "InnoDB: We reset the lsn's in the file ",
- (ulong) space_id,
- flush_lsn, current_lsn);
- ut_print_filename(stderr, filepath);
- fputs(".\n", stderr);
-
- ut_a(ut_is_2pow(zip_size));
- ut_a(zip_size <= UNIV_ZIP_SIZE_MAX);
-
- /* Loop through all the pages in the tablespace and reset the lsn and
- the page checksum if necessary */
-
- file_size = os_file_get_size(file);
- ut_a(file_size != (os_offset_t) -1);
+ /* Read the first page of the remote tablespace */
+ if (remote.success) {
+ fil_read_first_page(
+ remote.file, FALSE, &remote.flags, &remote.id,
+#ifdef UNIV_LOG_ARCHIVE
+ &remote.arch_log_no, &remote.arch_log_no,
+#endif /* UNIV_LOG_ARCHIVE */
+ &remote.lsn, &remote.lsn);
+
+ /* Validate this single-table-tablespace with SYS_TABLES,
+ but do not compare the DATA_DIR flag, in case the
+ tablespace was relocated. */
+ ulint mod_remote_flags = remote.flags & ~FSP_FLAGS_MASK_DATA_DIR;
+ if (remote.id == id && mod_remote_flags == mod_flags) {
+ valid_tablespaces_found++;
+ remote.valid = TRUE;
+ } else {
+ /* Do not use this linked tablespace. */
+ fil_report_bad_tablespace(
+ remote.filepath, remote.id,
+ remote.flags, id, flags);
+ link_file_is_bad = true;
+ }
+ }
- for (offset = 0; offset < file_size;
- offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
- success = os_file_read(file, page, offset,
- zip_size ? zip_size : UNIV_PAGE_SIZE);
- if (!success) {
+ /* Read the first page of the datadir tablespace, if found. */
+ if (dict.success) {
+ fil_read_first_page(
+ dict.file, FALSE, &dict.flags, &dict.id,
+#ifdef UNIV_LOG_ARCHIVE
+ &dict.arch_log_no, &dict.arch_log_no,
+#endif /* UNIV_LOG_ARCHIVE */
+ &dict.lsn, &dict.lsn);
+
+ /* Validate this single-table-tablespace with SYS_TABLES,
+ but do not compare the DATA_DIR flag, in case the
+ tablespace was relocated. */
+ ulint mod_dict_flags = dict.flags & ~FSP_FLAGS_MASK_DATA_DIR;
+ if (dict.id == id && mod_dict_flags == mod_flags) {
+ valid_tablespaces_found++;
+ dict.valid = TRUE;
+ } else {
+ /* Do not use this tablespace. */
+ fil_report_bad_tablespace(
+ dict.filepath, dict.id,
+ dict.flags, id, flags);
+ }
+ }
- goto func_exit;
+ /* Make sense of these three possible locations.
+ First, bail out if no tablespace files were found. */
+ if (valid_tablespaces_found == 0) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Could not find a valid tablespace file for '%s'. "
+ "See " REFMAN "innodb-troubleshooting-datadict.html "
+ "for how to resolve the issue.",
+ tablename);
+
+ err = DB_CORRUPTION;
+
+ goto cleanup_and_exit;
+ }
+
+ /* Do not open any tablespaces if more than one tablespace with
+ the correct space ID and flags were found. */
+ if (tablespaces_found > 1) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "A tablespace for %s has been found in "
+ "multiple places;", tablename);
+ if (def.success) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Default location; %s, LSN=" LSN_PF
+ ", Space ID=%lu, Flags=%lu",
+ def.filepath, def.lsn,
+ (ulong) def.id, (ulong) def.flags);
+ }
+ if (remote.success) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Remote location; %s, LSN=" LSN_PF
+ ", Space ID=%lu, Flags=%lu",
+ remote.filepath, remote.lsn,
+ (ulong) remote.id, (ulong) remote.flags);
}
- if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
- /* We have to reset the lsn */
-
- if (zip_size) {
- memcpy(page_zip.data, page, zip_size);
- buf_flush_init_for_writing(
- page, &page_zip, current_lsn);
- success = os_file_write(
- filepath, file, page_zip.data,
- offset, zip_size);
+ if (dict.success) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Dictionary location; %s, LSN=" LSN_PF
+ ", Space ID=%lu, Flags=%lu",
+ dict.filepath, dict.lsn,
+ (ulong) dict.id, (ulong) dict.flags);
+ }
+
+ /* Force-recovery will allow some tablespaces to be
+ skipped by REDO if there was more than one file found.
+ Unlike during the REDO phase of recovery, we now know
+ if the tablespace is valid according to the dictionary,
+ which was not available then. So if we did not force
+ recovery and there is only one good tablespace, ignore
+ any bad tablespaces. */
+ if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Will not open the tablespace for '%s'",
+ tablename);
+
+ if (def.success != def.valid
+ || dict.success != dict.valid
+ || remote.success != remote.valid) {
+ err = DB_CORRUPTION;
} else {
- buf_flush_init_for_writing(
- page, NULL, current_lsn);
- success = os_file_write(
- filepath, file, page,
- offset, UNIV_PAGE_SIZE);
+ err = DB_ERROR;
}
+ goto cleanup_and_exit;
+ }
- if (!success) {
+ /* There is only one valid tablespace found and we did
+ not use srv_force_recovery during REDO. Use this one
+ tablespace and clean up invalid tablespace pointers */
+ if (def.success && !def.valid) {
+ def.success = false;
+ os_file_close(def.file);
+ tablespaces_found--;
+ }
+ if (dict.success && !dict.valid) {
+ dict.success = false;
+ os_file_close(dict.file);
+ /* Leave dict.filepath so that SYS_DATAFILES
+ can be corrected below. */
+ tablespaces_found--;
+ }
+ if (remote.success && !remote.valid) {
+ remote.success = false;
+ os_file_close(remote.file);
+ mem_free(remote.filepath);
+ remote.filepath = NULL;
+ tablespaces_found--;
+ }
+ }
- goto func_exit;
+ /* At this point, there should be only one filepath. */
+ ut_a(tablespaces_found == 1);
+ ut_a(valid_tablespaces_found == 1);
+
+ /* Only fix the dictionary at startup when there is only one thread.
+ Calls to dict_load_table() can be done while holding other latches. */
+ if (!fix_dict) {
+ goto skip_validate;
+ }
+
+ /* We may need to change what is stored in SYS_DATAFILES or
+ SYS_TABLESPACES or adjust the link file.
+ Since a failure to update SYS_TABLESPACES or SYS_DATAFILES does
+ not prevent opening and using the single_table_tablespace either
+ this time or the next, we do not check the return code or fail
+ to open the tablespace. But dict_update_filepath() will issue a
+ warning to the log. */
+ if (dict.filepath) {
+ if (remote.success) {
+ dict_update_filepath(id, remote.filepath);
+ } else if (def.success) {
+ dict_update_filepath(id, def.filepath);
+ if (link_file_is_bad) {
+ fil_delete_link_file(tablename);
}
+ } else if (!link_file_found || link_file_is_bad) {
+ ut_ad(dict.success);
+ /* Fix the link file if we got our filepath
+ from the dictionary but a link file did not
+ exist or it did not point to a valid file. */
+ fil_delete_link_file(tablename);
+ fil_create_link_file(tablename, dict.filepath);
}
- }
- success = os_file_flush(file);
- if (!success) {
+ } else if (remote.success && dict_filepath_same_as_default) {
+ dict_update_filepath(id, remote.filepath);
- goto func_exit;
+ } else if (remote.success && path_in == NULL) {
+ /* SYS_DATAFILES record for this space ID was not found. */
+ dict_insert_tablespace_and_filepath(
+ id, tablename, remote.filepath, flags);
}
- /* We now update the flush_lsn stamp at the start of the file */
- success = os_file_read(file, page, 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE);
- if (!success) {
+skip_validate:
+ if (err != DB_SUCCESS) {
+ ; // Don't load the tablespace into the cache
+ } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE)) {
+ err = DB_ERROR;
+ } else {
+ /* We do not measure the size of the file, that is why
+ we pass the 0 below */
- goto func_exit;
+ if (!fil_node_create(remote.success ? remote.filepath :
+ dict.success ? dict.filepath :
+ def.filepath, 0, id, FALSE)) {
+ err = DB_ERROR;
+ }
}
- mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
-
- success = os_file_write(filepath, file, page, 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE);
- if (!success) {
-
- goto func_exit;
+cleanup_and_exit:
+ if (remote.success) {
+ os_file_close(remote.file);
}
- success = os_file_flush(file);
-func_exit:
- os_file_close(file);
- ut_free(buf2);
- mem_free(filepath);
-
- return(success);
-}
-
-/********************************************************************//**
-Tries to open a single-table tablespace and optionally checks the space id is
-right in it. If does not succeed, prints an error message to the .err log. This
-function is used to open a tablespace when we start up mysqld, and also in
-IMPORT TABLESPACE.
-NOTE that we assume this operation is used either at the database startup
-or under the protection of the dictionary mutex, so that two users cannot
-race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_open_single_table_tablespace(
-/*=============================*/
- ibool check_space_id, /*!< in: should we check that the space
- id in the file is right; we assume
- that this function runs much faster
- if no check is made, since accessing
- the file inode probably is much
- faster (the OS caches them) than
- accessing the first page of the file */
- ulint id, /*!< in: space id */
- ulint flags, /*!< in: tablespace flags */
- const char* tablename) /*!< in: table name in the
- databasename/tablename format */
-{
- os_file_t file;
- char* filepath;
- ibool success;
- byte* buf2;
- byte* page;
- ulint space_id;
- ulint space_flags;
-
- filepath = fil_make_ibd_name(tablename, FALSE);
-
- fsp_flags_validate(flags);
-
- file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, filepath, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: trying to open a table,"
- " but could not\n"
- "InnoDB: open the tablespace file ", stderr);
- ut_print_filename(stderr, filepath);
- fputs("!\n"
- "InnoDB: Have you moved InnoDB .ibd files around"
- " without using the\n"
- "InnoDB: commands DISCARD TABLESPACE and"
- " IMPORT TABLESPACE?\n"
- "InnoDB: It is also possible that this is"
- " a temporary table #sql...,\n"
- "InnoDB: and MySQL removed the .ibd file for this.\n"
- "InnoDB: Please refer to\n"
- "InnoDB: " REFMAN
- "innodb-troubleshooting-datadict.html\n"
- "InnoDB: for how to resolve the issue.\n", stderr);
-
- mem_free(filepath);
-
- return(FALSE);
+ if (remote.filepath) {
+ mem_free(remote.filepath);
}
-
- if (!check_space_id) {
- space_id = id;
-
- goto skip_check;
+ if (dict.success) {
+ os_file_close(dict.file);
}
-
- /* Read the first page of the tablespace */
-
- buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
-
- success = os_file_read(file, page, 0, UNIV_PAGE_SIZE);
-
- /* We have to read the tablespace id and flags from the file. */
-
- space_id = fsp_header_get_space_id(page);
- space_flags = fsp_header_get_flags(page);
-
- ut_free(buf2);
-
- if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) {
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: tablespace id and flags in file ",
- stderr);
- ut_print_filename(stderr, filepath);
- fprintf(stderr, " are %lu and %lu, but in the InnoDB\n"
- "InnoDB: data dictionary they are %lu and %lu.\n"
- "InnoDB: Have you moved InnoDB .ibd files"
- " around without using the\n"
- "InnoDB: commands DISCARD TABLESPACE and"
- " IMPORT TABLESPACE?\n"
- "InnoDB: Please refer to\n"
- "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
- "InnoDB: for how to resolve the issue.\n",
- (ulong) space_id, (ulong) space_flags,
- (ulong) id, (ulong) flags);
-
- success = FALSE;
-
- goto func_exit;
+ if (dict.filepath) {
+ mem_free(dict.filepath);
}
-
-skip_check:
- success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE);
-
- if (!success) {
- goto func_exit;
+ if (def.success) {
+ os_file_close(def.file);
}
+ mem_free(def.filepath);
- /* We do not measure the size of the file, that is why we pass the 0
- below */
-
- fil_node_create(filepath, 0, space_id, FALSE);
-func_exit:
- os_file_close(file);
- mem_free(filepath);
-
- return(success);
+ return(err);
}
#endif /* !UNIV_HOTBACKUP */
@@ -3282,13 +3897,64 @@ fil_make_ibbackup_old_name(
memcpy(path, name, len);
memcpy(path + len, suffix, (sizeof suffix) - 1);
- ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix);
+ ut_sprintf_timestamp_without_extra_chars(
+ path + len + ((sizeof suffix) - 1));
return(path);
}
#endif /* UNIV_HOTBACKUP */
/********************************************************************//**
Opens an .ibd file and adds the associated single-table tablespace to the
+InnoDB fil0fil.cc data structures.
+Set fsp->success to TRUE if tablespace is valid, FALSE if not. */
+static
+void
+fil_validate_single_table_tablespace(
+/*=================================*/
+ const char* tablename, /*!< in: database/tablename */
+ fsp_open_info* fsp) /*!< in/out: tablespace info */
+{
+ fil_read_first_page(
+ fsp->file, FALSE, &fsp->flags, &fsp->id,
+#ifdef UNIV_LOG_ARCHIVE
+ &fsp->arch_log_no, &fsp->arch_log_no,
+#endif /* UNIV_LOG_ARCHIVE */
+ &fsp->lsn, &fsp->lsn);
+
+ if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
+ fprintf(stderr,
+ " InnoDB: Error: Tablespace is not sensible;"
+ " Table: %s Space ID: %lu Filepath: %s\n",
+ tablename, (ulong) fsp->id, fsp->filepath);
+ fsp->success = FALSE;
+ return;
+ }
+
+ mutex_enter(&fil_system->mutex);
+ fil_space_t* space = fil_space_get_by_id(fsp->id);
+ mutex_exit(&fil_system->mutex);
+ if (space != NULL) {
+ char* prev_filepath = fil_space_get_first_path(fsp->id);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Attempted to open a previously opened tablespace. "
+ "Previous tablespace %s uses space ID: %lu at "
+ "filepath: %s. Cannot open tablespace %s which uses "
+ "space ID: %lu at filepath: %s",
+ space->name, (ulong) space->id, prev_filepath,
+ tablename, (ulong) fsp->id, fsp->filepath);
+
+ mem_free(prev_filepath);
+ fsp->success = FALSE;
+ return;
+ }
+
+ fsp->success = TRUE;
+}
+
+
+/********************************************************************//**
+Opens an .ibd file and adds the associated single-table tablespace to the
InnoDB fil0fil.cc data structures. */
static
void
@@ -3296,34 +3962,49 @@ fil_load_single_table_tablespace(
/*=============================*/
const char* dbname, /*!< in: database name */
const char* filename) /*!< in: file name (not a path),
- including the .ibd extension */
+ including the .ibd or .isl extension */
{
- os_file_t file;
- char* filepath;
char* tablename;
- ibool success;
- byte* buf2;
- byte* page;
- ulint space_id;
- ulint flags;
+ ulint tablename_len;
+ ulint dbname_len = strlen(dbname);
+ ulint filename_len = strlen(filename);
+ fsp_open_info def;
+ fsp_open_info remote;
os_offset_t size;
#ifdef UNIV_HOTBACKUP
fil_space_t* space;
#endif
- filepath = static_cast<char*>(
- mem_alloc(
- strlen(dbname)
- + strlen(filename)
- + strlen(fil_path_to_mysql_datadir) + 3));
- sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname,
- filename);
- srv_normalize_path_for_win(filepath);
+ memset(&def, 0, sizeof(def));
+ memset(&remote, 0, sizeof(remote));
+ /* The caller assured that the extension is ".ibd" or ".isl". */
+ ut_ad(0 == memcmp(filename + filename_len - 4, ".ibd", 4)
+ || 0 == memcmp(filename + filename_len - 4, ".isl", 4));
+
+ /* Build up the tablename in the standard form database/table. */
tablename = static_cast<char*>(
- mem_alloc(strlen(dbname) + strlen(filename) + 2));
+ mem_alloc(dbname_len + filename_len + 2));
sprintf(tablename, "%s/%s", dbname, filename);
- tablename[strlen(tablename) - strlen(".ibd")] = 0;
+ tablename_len = strlen(tablename) - strlen(".ibd");
+ tablename[tablename_len] = '\0';
+
+ /* There may be both .ibd and .isl file in the directory.
+ And it is possible that the .isl file refers to a different
+ .ibd file. If so, we open and compare them the first time
+ one of them is sent to this function. So if this table has
+ already been loaded, there is nothing to do.*/
+ mutex_enter(&fil_system->mutex);
+ if (fil_space_get_by_name(tablename)) {
+ mem_free(tablename);
+ mutex_exit(&fil_system->mutex);
+ return;
+ }
+ mutex_exit(&fil_system->mutex);
+
+ /* Build up the filepath of the .ibd tablespace in the datadir.
+ This must be freed independent of def.success. */
+ def.filepath = fil_make_ibd_name(tablename, false);
#ifdef __WIN__
# ifndef UNIV_HOTBACKUP
@@ -3333,31 +4014,56 @@ fil_load_single_table_tablespace(
file path to lower case, so that we are consistent with InnoDB's
internal data dictionary. */
- dict_casedn_str(filepath);
+ dict_casedn_str(def.filepath);
# endif /* !UNIV_HOTBACKUP */
#endif
- file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, filepath, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
+ /* Check for a link file which locates a remote tablespace. */
+ remote.success = fil_open_linked_file(
+ tablename, &remote.filepath, &remote.file);
+
+ /* Read the first page of the remote tablespace */
+ if (remote.success) {
+ fil_validate_single_table_tablespace(tablename, &remote);
+ if (!remote.success) {
+ os_file_close(remote.file);
+ mem_free(remote.filepath);
+ }
+ }
+
+
+ /* Try to open the tablespace in the datadir. */
+ def.file = os_file_create_simple_no_error_handling(
+ innodb_file_data_key, def.filepath, OS_FILE_OPEN,
+ OS_FILE_READ_ONLY, &def.success);
+
+ /* Read the first page of the remote tablespace */
+ if (def.success) {
+ fil_validate_single_table_tablespace(tablename, &def);
+ if (!def.success) {
+ os_file_close(def.file);
+ }
+ }
+
+ if (!def.success && !remote.success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+ fprintf(stderr,
+ "InnoDB: Error: could not open single-table"
+ " tablespace file %s\n", def.filepath);
+no_good_file:
fprintf(stderr,
- "InnoDB: Error: could not open single-table tablespace"
- " file\n"
- "InnoDB: %s!\n"
"InnoDB: We do not continue the crash recovery,"
" because the table may become\n"
- "InnoDB: corrupt if we cannot apply the log records"
- " in the InnoDB log to it.\n"
+ "InnoDB: corrupt if we cannot apply the log"
+ " records in the InnoDB log to it.\n"
"InnoDB: To fix the problem and start mysqld:\n"
"InnoDB: 1) If there is a permission problem"
" in the file and mysqld cannot\n"
"InnoDB: open the file, you should"
" modify the permissions.\n"
- "InnoDB: 2) If the table is not needed, or you can"
- " restore it from a backup,\n"
+ "InnoDB: 2) If the table is not needed, or you"
+ " can restore it from a backup,\n"
"InnoDB: then you can remove the .ibd file,"
" and InnoDB will do a normal\n"
"InnoDB: crash recovery and ignore that table.\n"
@@ -3366,123 +4072,84 @@ fil_load_single_table_tablespace(
"InnoDB: the .ibd file, you can set"
" innodb_force_recovery > 0 in my.cnf\n"
"InnoDB: and force InnoDB to continue crash"
- " recovery here.\n", filepath);
-
+ " recovery here.\n");
+will_not_choose:
mem_free(tablename);
- mem_free(filepath);
-
- if (srv_force_recovery > 0) {
- fprintf(stderr,
- "InnoDB: innodb_force_recovery"
- " was set to %lu. Continuing crash recovery\n"
- "InnoDB: even though we cannot access"
- " the .ibd file of this table.\n",
- srv_force_recovery);
- return;
+ if (remote.success) {
+ mem_free(remote.filepath);
}
-
- exit(1);
- }
-
- size = os_file_get_size(file);
-
- if (UNIV_UNLIKELY(size == (os_offset_t) -1)) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
-
- fprintf(stderr,
- "InnoDB: Error: could not measure the size"
- " of single-table tablespace file\n"
- "InnoDB: %s!\n"
- "InnoDB: We do not continue crash recovery,"
- " because the table will become\n"
- "InnoDB: corrupt if we cannot apply the log records"
- " in the InnoDB log to it.\n"
- "InnoDB: To fix the problem and start mysqld:\n"
- "InnoDB: 1) If there is a permission problem"
- " in the file and mysqld cannot\n"
- "InnoDB: access the file, you should"
- " modify the permissions.\n"
- "InnoDB: 2) If the table is not needed,"
- " or you can restore it from a backup,\n"
- "InnoDB: then you can remove the .ibd file,"
- " and InnoDB will do a normal\n"
- "InnoDB: crash recovery and ignore that table.\n"
- "InnoDB: 3) If the file system or the disk is broken,"
- " and you cannot remove\n"
- "InnoDB: the .ibd file, you can set"
- " innodb_force_recovery > 0 in my.cnf\n"
- "InnoDB: and force InnoDB to continue"
- " crash recovery here.\n", filepath);
-
- os_file_close(file);
- mem_free(tablename);
- mem_free(filepath);
+ mem_free(def.filepath);
if (srv_force_recovery > 0) {
- fprintf(stderr,
- "InnoDB: innodb_force_recovery"
- " was set to %lu. Continuing crash recovery\n"
- "InnoDB: even though we cannot access"
- " the .ibd file of this table.\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "innodb_force_recovery was set to %lu. "
+ "Continuing crash recovery even though we "
+ "cannot access the .ibd file of this table.",
srv_force_recovery);
return;
}
+ /* If debug code, cause a core dump and call stack. For
+ release builds just exit and rely on the messages above. */
+ ut_ad(0);
exit(1);
}
- /* TODO: What to do in other cases where we cannot access an .ibd
- file during a crash recovery? */
+ if (def.success && remote.success) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Tablespaces for %s have been found in two places;\n"
+ "Location 1: SpaceID: %lu LSN: %lu File: %s\n"
+ "Location 2: SpaceID: %lu LSN: %lu File: %s\n"
+ "You must delete one of them.",
+ tablename, (ulong) def.id, (ulong) def.lsn,
+ def.filepath, (ulong) remote.id, (ulong) remote.lsn,
+ remote.filepath);
- /* Every .ibd file is created >= 4 pages in size. Smaller files
- cannot be ok. */
+ def.success = FALSE;
+ os_file_close(def.file);
+ os_file_close(remote.file);
+ goto will_not_choose;
+ }
-#ifndef UNIV_HOTBACKUP
- if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: the size of single-table"
- " tablespace file %s\n"
- "InnoDB: is only " UINT64PF
- ", should be at least %lu!\n",
- filepath,
- size, (ulong) (4 * UNIV_PAGE_SIZE));
- os_file_close(file);
- mem_free(tablename);
- mem_free(filepath);
+ /* At this point, only one tablespace is open */
+ ut_a(def.success == !remote.success);
- return;
- }
-#endif
- /* Read the first page of the tablespace if the size is big enough */
+ fsp_open_info* fsp = def.success ? &def : &remote;
- buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
+ /* Get and test the file size. */
+ size = os_file_get_size(fsp->file);
- if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
- success = os_file_read(file, page, 0, UNIV_PAGE_SIZE);
+ if (size == (os_offset_t) -1) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
- /* We have to read the tablespace id from the file */
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "could not measure the size of single-table "
+ "tablespace file %s", fsp->filepath);
- space_id = fsp_header_get_space_id(page);
- flags = fsp_header_get_flags(page);
- } else {
- space_id = ULINT_UNDEFINED;
- flags = 0;
+ os_file_close(fsp->file);
+ goto no_good_file;
}
+ /* Every .ibd file is created >= 4 pages in size. Smaller files
+ cannot be ok. */
+ ulong minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
+ if (size < minimum_size) {
#ifndef UNIV_HOTBACKUP
- if (space_id == ULINT_UNDEFINED || space_id == 0) {
- fprintf(stderr,
- "InnoDB: Error: tablespace id %lu in file %s"
- " is not sensible\n",
- (ulong) space_id,
- filepath);
- goto func_exit;
- }
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "The size of single-table tablespace file %s "
+ "is only " UINT64PF ", should be at least %lu!",
+ fsp->filepath, size, minimum_size);
+ os_file_close(fsp->file);
+ goto no_good_file;
#else
- if (space_id == ULINT_UNDEFINED || space_id == 0) {
+ fsp->id = ULINT_UNDEFINED;
+ fsp->flags = 0;
+#endif /* !UNIV_HOTBACKUP */
+ }
+
+#ifdef UNIV_HOTBACKUP
+ if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
char* new_path;
fprintf(stderr,
@@ -3494,18 +4161,19 @@ fil_load_single_table_tablespace(
" is not sensible.\n"
"InnoDB: This can happen in an ibbackup run,"
" and is not dangerous.\n",
- filepath, space_id, filepath, size);
- os_file_close(file);
+ fsp->filepath, fsp->id, fsp->filepath, size);
+ os_file_close(fsp->file);
- new_path = fil_make_ibbackup_old_name(filepath);
- ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
+ new_path = fil_make_ibbackup_old_name(fsp->filepath);
+
+ bool success = os_file_rename(
+ innodb_file_data_key, fsp->filepath, new_path));
+
+ ut_a(success);
- ut_free(buf2);
- mem_free(tablename);
- mem_free(filepath);
mem_free(new_path);
- return;
+ goto func_exit_after_close;
}
/* A backup may contain the same space several times, if the space got
@@ -3517,7 +4185,7 @@ fil_load_single_table_tablespace(
mutex_enter(&fil_system->mutex);
- space = fil_space_get_by_id(space_id);
+ space = fil_space_get_by_id(fsp->id);
if (space) {
char* new_path;
@@ -3529,52 +4197,64 @@ fil_load_single_table_tablespace(
"InnoDB: was scanned earlier. This can happen"
" if you have renamed tables\n"
"InnoDB: during an ibbackup run.\n",
- filepath, space_id, filepath,
+ fsp->filepath, fsp->id, fsp->filepath,
space->name);
- os_file_close(file);
+ os_file_close(fsp->file);
- new_path = fil_make_ibbackup_old_name(filepath);
+ new_path = fil_make_ibbackup_old_name(fsp->filepath);
mutex_exit(&fil_system->mutex);
- ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
+ bool success = os_file_rename(
+ innodb_file_data_key, fsp->filepath, new_path);
+
+ ut_a(success);
- ut_free(buf2);
- mem_free(tablename);
- mem_free(filepath);
mem_free(new_path);
- return;
+ goto func_exit_after_close;
}
mutex_exit(&fil_system->mutex);
-#endif
- success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE);
-
- if (!success) {
+#endif /* UNIV_HOTBACKUP */
+ ibool file_space_create_success = fil_space_create(
+ tablename, fsp->id, fsp->flags, FIL_TABLESPACE);
+ if (!file_space_create_success) {
if (srv_force_recovery > 0) {
fprintf(stderr,
- "InnoDB: innodb_force_recovery"
- " was set to %lu. Continuing crash recovery\n"
- "InnoDB: even though the tablespace creation"
- " of this table failed.\n",
+ "InnoDB: innodb_force_recovery was set"
+ " to %lu. Continuing crash recovery\n"
+ "InnoDB: even though the tablespace"
+ " creation of this table failed.\n",
srv_force_recovery);
goto func_exit;
}
- exit(1);
+ /* Exit here with a core dump, stack, etc. */
+ ut_a(file_space_create_success);
}
/* We do not use the size information we have about the file, because
the rounding formula for extents and pages is somewhat complex; we
let fil_node_open() do that task. */
- fil_node_create(filepath, 0, space_id, FALSE);
+ if (!fil_node_create(fsp->filepath, 0, fsp->id, FALSE)) {
+ ut_error;
+ }
+
func_exit:
- os_file_close(file);
- ut_free(buf2);
+ os_file_close(fsp->file);
+
+#ifdef UNIV_HOTBACKUP
+func_exit_after_close:
+#else
+ ut_ad(!mutex_own(&fil_system->mutex));
+#endif
mem_free(tablename);
- mem_free(filepath);
+ if (remote.success) {
+ mem_free(remote.filepath);
+ }
+ mem_free(def.filepath);
}
/***********************************************************************//**
@@ -3587,29 +4267,25 @@ static
int
fil_file_readdir_next_file(
/*=======================*/
- ulint* err, /*!< out: this is set to DB_ERROR if an error
+ dberr_t* err, /*!< out: this is set to DB_ERROR if an error
was encountered, otherwise not changed */
const char* dirname,/*!< in: directory name or path */
os_file_dir_t dir, /*!< in: directory stream */
- os_file_stat_t* info) /*!< in/out: buffer where the info is returned */
+ os_file_stat_t* info) /*!< in/out: buffer where the
+ info is returned */
{
- ulint i;
- int ret;
-
- for (i = 0; i < 100; i++) {
- ret = os_file_readdir_next_file(dirname, dir, info);
+ for (ulint i = 0; i < 100; i++) {
+ int ret = os_file_readdir_next_file(dirname, dir, info);
if (ret != -1) {
return(ret);
}
- fprintf(stderr,
- "InnoDB: Error: os_file_readdir_next_file()"
- " returned -1 in\n"
- "InnoDB: directory %s\n"
- "InnoDB: Crash recovery may have failed"
- " for some .ibd files!\n", dirname);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "os_file_readdir_next_file() returned -1 in "
+ "directory %s, crash recovery may have failed "
+ "for some .ibd files!", dirname);
*err = DB_ERROR;
}
@@ -3626,7 +4302,7 @@ in the doublewrite buffer, also to know where to apply log records where the
space id is != 0.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
fil_load_single_table_tablespaces(void)
/*===================================*/
{
@@ -3637,7 +4313,7 @@ fil_load_single_table_tablespaces(void)
os_file_dir_t dbdir;
os_file_stat_t dbinfo;
os_file_stat_t fileinfo;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
/* The datadir of MySQL is always the default directory of mysqld */
@@ -3686,7 +4362,6 @@ fil_load_single_table_tablespaces(void)
dbdir = os_file_opendir(dbpath, FALSE);
if (dbdir != NULL) {
- /* printf("Opened dir %s\n", dbinfo.name); */
/* We found a database directory; loop through it,
looking for possible .ibd files in it */
@@ -3694,8 +4369,6 @@ fil_load_single_table_tablespaces(void)
ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
&fileinfo);
while (ret == 0) {
- /* printf(
- " Looking at file %s\n", fileinfo.name); */
if (fileinfo.type == OS_FILE_TYPE_DIR) {
@@ -3704,11 +4377,14 @@ fil_load_single_table_tablespaces(void)
/* We found a symlink or a file */
if (strlen(fileinfo.name) > 4
- && 0 == strcmp(fileinfo.name
+ && (0 == strcmp(fileinfo.name
+ + strlen(fileinfo.name) - 4,
+ ".ibd")
+ || 0 == strcmp(fileinfo.name
+ strlen(fileinfo.name) - 4,
- ".ibd")) {
- /* The name ends in .ibd; try opening
- the file */
+ ".isl"))) {
+ /* The name ends in .ibd or .isl;
+ try opening the file */
fil_load_single_table_tablespace(
dbinfo.name, fileinfo.name);
}
@@ -3808,6 +4484,29 @@ fil_tablespace_exists_in_mem(
}
/*******************************************************************//**
+Report that a tablespace for a table was not found. */
+static
+void
+fil_report_missing_tablespace(
+/*===========================*/
+ const char* name, /*!< in: table name */
+ ulint space_id) /*!< in: table's space id */
+{
+ char index_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(index_name, sizeof(index_name), name, TRUE);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Table %s in the InnoDB data dictionary has tablespace id %lu, "
+ "but tablespace with that id or name does not exist. Have "
+ "you deleted or moved .ibd files? This may also be a table "
+ "created with CREATE TEMPORARY TABLE whose .ibd and .frm "
+ "files MySQL automatically removed, but the table still "
+ "exists in the InnoDB internal data dictionary.",
+ name, space_id);
+}
+
+/*******************************************************************//**
Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
cache. Note that if we have not done a crash recovery at the database startup,
there may be many tablespaces which are not yet in the memory cache.
@@ -3817,19 +4516,25 @@ ibool
fil_space_for_table_exists_in_mem(
/*==============================*/
ulint id, /*!< in: space id */
- const char* name, /*!< in: table name in the standard
- 'databasename/tablename' format */
+ const char* name, /*!< in: table name used in
+ fil_space_create(). Either the
+ standard 'dbname/tablename' format
+ or table->dir_path_of_temp_table */
ibool mark_space, /*!< in: in crash recovery, at database
startup we mark all spaces which have
an associated table in the InnoDB
data dictionary, so that
we can print a warning about orphaned
tablespaces */
- ibool print_error_if_does_not_exist)
+ ibool print_error_if_does_not_exist,
/*!< in: print detailed error
information to the .err log if a
matching tablespace is not found from
memory */
+ bool adjust_space, /*!< in: whether to adjust space id
+ when find table space mismatch */
+ mem_heap_t* heap, /*!< in: heap memory */
+ table_id_t table_id) /*!< in: table id */
{
fil_space_t* fnamespace;
fil_space_t* space;
@@ -3858,6 +4563,47 @@ fil_space_for_table_exists_in_mem(
return(TRUE);
}
+ /* Info from "fnamespace" comes from the ibd file itself, it can
+ be different from data obtained from System tables since it is
+ not transactional. If adjust_space is set, and the mismatching
+ space are between a user table and its temp table, we shall
+ adjust the ibd file name according to system table info */
+ if (adjust_space
+ && space != NULL
+ && row_is_mysql_tmp_table_name(space->name)
+ && !row_is_mysql_tmp_table_name(name)) {
+
+ mutex_exit(&fil_system->mutex);
+
+ DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space",
+ DBUG_SUICIDE(););
+
+ if (fnamespace) {
+ char* tmp_name;
+
+ tmp_name = dict_mem_create_temporary_tablename(
+ heap, name, table_id);
+
+ fil_rename_tablespace(fnamespace->name, fnamespace->id,
+ tmp_name, NULL);
+ }
+
+ DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space",
+ DBUG_SUICIDE(););
+
+ fil_rename_tablespace(space->name, id, name, NULL);
+
+ DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space",
+ DBUG_SUICIDE(););
+
+ mutex_enter(&fil_system->mutex);
+ fnamespace = fil_space_get_by_name(name);
+ ut_ad(space == fnamespace);
+ mutex_exit(&fil_system->mutex);
+
+ return(TRUE);
+ }
+
if (!print_error_if_does_not_exist) {
mutex_exit(&fil_system->mutex);
@@ -3867,22 +4613,9 @@ fil_space_for_table_exists_in_mem(
if (space == NULL) {
if (fnamespace == NULL) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary"
- " has tablespace id %lu,\n"
- "InnoDB: but tablespace with that id"
- " or name does not exist. Have\n"
- "InnoDB: you deleted or moved .ibd files?\n"
- "InnoDB: This may also be a table created with"
- " CREATE TEMPORARY TABLE\n"
- "InnoDB: whose .ibd and .frm files"
- " MySQL automatically removed, but the\n"
- "InnoDB: table still exists in the"
- " InnoDB internal data dictionary.\n",
- (ulong) id);
+ if (print_error_if_does_not_exist) {
+ fil_report_missing_tablespace(name, id);
+ }
} else {
ut_print_timestamp(stderr);
fputs(" InnoDB: Error: table ", stderr);
@@ -3941,7 +4674,7 @@ error_exit:
Checks if a single-table tablespace for a given table name exists in the
tablespace memory cache.
@return space id, ULINT_UNDEFINED if not found */
-static
+UNIV_INTERN
ulint
fil_get_space_id_for_table(
/*=======================*/
@@ -3996,6 +4729,8 @@ fil_extend_space_to_desired_size(
ulint pages_added;
ibool success;
+ ut_ad(!srv_read_only_mode);
+
retry:
pages_added = 0;
success = TRUE;
@@ -4070,7 +4805,7 @@ retry:
node->name, node->handle, buf,
offset, page_size * n_pages,
NULL, NULL);
-#endif
+#endif /* UNIV_HOTBACKUP */
if (success) {
os_has_said_disk_full = FALSE;
} else {
@@ -4143,7 +4878,7 @@ fil_extend_tablespaces_to_stored_len(void)
byte* buf;
ulint actual_size;
ulint size_in_header;
- ulint error;
+ dberr_t error;
ibool success;
buf = mem_alloc(UNIV_PAGE_SIZE);
@@ -4177,7 +4912,7 @@ fil_extend_tablespaces_to_stored_len(void)
"InnoDB: Check that you have free disk space"
" and retry!\n",
space->name, size_in_header, actual_size);
- exit(1);
+ ut_a(success);
}
mutex_enter(&fil_system->mutex);
@@ -4347,12 +5082,21 @@ fil_node_complete_io(
node->n_pending--;
if (type == OS_FILE_WRITE) {
+ ut_ad(!srv_read_only_mode);
system->modification_counter++;
node->modification_counter = system->modification_counter;
- if (!node->space->is_in_unflushed_spaces) {
+ if (fil_buffering_disabled(node->space)) {
+
+ /* We don't need to keep track of unflushed
+ changes as user has explicitly disabled
+ buffering. */
+ ut_ad(!node->space->is_in_unflushed_spaces);
+ node->flush_counter = node->modification_counter;
- node->space->is_in_unflushed_spaces = TRUE;
+ } else if (!node->space->is_in_unflushed_spaces) {
+
+ node->space->is_in_unflushed_spaces = true;
UT_LIST_ADD_FIRST(unflushed_spaces,
system->unflushed_spaces,
node->space);
@@ -4399,7 +5143,7 @@ Reads or writes data. This operation is asynchronous (aio).
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
i/o on a tablespace which does not exist */
UNIV_INTERN
-ulint
+dberr_t
fil_io(
/*===*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
@@ -4462,9 +5206,11 @@ fil_io(
#ifndef UNIV_HOTBACKUP
# ifndef UNIV_LOG_DEBUG
/* ibuf bitmap pages must be read in the sync aio mode: */
- ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
+ ut_ad(recv_no_ibuf_operations
+ || type == OS_FILE_WRITE
|| !ibuf_bitmap_page(zip_size, block_offset)
- || sync || is_log);
+ || sync
+ || is_log);
# endif /* UNIV_LOG_DEBUG */
if (sync) {
mode = OS_AIO_SYNC;
@@ -4483,9 +5229,10 @@ fil_io(
#endif /* !UNIV_HOTBACKUP */
if (type == OS_FILE_READ) {
- srv_data_read+= len;
+ srv_stats.data_read.add(len);
} else if (type == OS_FILE_WRITE) {
- srv_data_written+= len;
+ ut_ad(!srv_read_only_mode);
+ srv_stats.data_written.add(len);
}
/* Reserve the fil_system mutex and make sure that we can open at
@@ -4497,48 +5244,43 @@ fil_io(
/* If we are deleting a tablespace we don't allow any read
operations on that. However, we do allow write operations. */
- if (!space || (type == OS_FILE_READ && space->stop_new_ops)) {
+ if (space == 0 || (type == OS_FILE_READ && space->stop_new_ops)) {
mutex_exit(&fil_system->mutex);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: trying to do i/o"
- " to a tablespace which does not exist.\n"
- "InnoDB: i/o type %lu, space id %lu,"
- " page no. %lu, i/o length %lu bytes\n",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Trying to do i/o to a tablespace which does "
+ "not exist. i/o type %lu, space id %lu, "
+ "page no. %lu, i/o length %lu bytes",
(ulong) type, (ulong) space_id, (ulong) block_offset,
(ulong) len);
return(DB_TABLESPACE_DELETED);
}
- ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
+ ut_ad(mode != OS_AIO_IBUF || space->purpose == FIL_TABLESPACE);
node = UT_LIST_GET_FIRST(space->chain);
for (;;) {
- if (UNIV_UNLIKELY(node == NULL)) {
+ if (node == NULL) {
if (ignore_nonexistent_pages) {
mutex_exit(&fil_system->mutex);
return(DB_ERROR);
}
- /* else */
fil_report_invalid_page_access(
block_offset, space_id, space->name,
byte_offset, len, type);
ut_error;
- }
- if (fil_is_user_tablespace_id(space->id) && node->size == 0) {
+ } else if (fil_is_user_tablespace_id(space->id)
+ && node->size == 0) {
+
/* We do not know the size of a single-table tablespace
before we open the file */
-
break;
- }
-
- if (node->size > block_offset) {
+ } else if (node->size > block_offset) {
/* Found! */
break;
} else {
@@ -4600,6 +5342,7 @@ fil_io(
if (type == OS_FILE_READ) {
ret = os_file_read(node->handle, buf, offset, len);
} else {
+ ut_ad(!srv_read_only_mode);
ret = os_file_write(node->name, node->handle, buf,
offset, len);
}
@@ -4607,7 +5350,7 @@ fil_io(
/* Queue the aio request */
ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
offset, len, node, message);
-#endif
+#endif /* UNIV_HOTBACKUP */
ut_a(ret);
if (mode == OS_AIO_SYNC) {
@@ -4649,24 +5392,24 @@ fil_aio_wait(
if (srv_use_native_aio) {
srv_set_io_thread_op_info(segment, "native aio handle");
#ifdef WIN_ASYNC_IO
- ret = os_aio_windows_handle(segment, 0, &fil_node,
- &message, &type);
+ ret = os_aio_windows_handle(
+ segment, 0, &fil_node, &message, &type);
#elif defined(LINUX_NATIVE_AIO)
- ret = os_aio_linux_handle(segment, &fil_node,
- &message, &type);
+ ret = os_aio_linux_handle(
+ segment, &fil_node, &message, &type);
#else
ut_error;
ret = 0; /* Eliminate compiler warning */
-#endif
+#endif /* WIN_ASYNC_IO */
} else {
srv_set_io_thread_op_info(segment, "simulated aio handle");
- ret = os_aio_simulated_handle(segment, &fil_node,
- &message, &type);
+ ret = os_aio_simulated_handle(
+ segment, &fil_node, &message, &type);
}
ut_a(ret);
- if (UNIV_UNLIKELY(fil_node == NULL)) {
+ if (fil_node == NULL) {
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
return;
}
@@ -4722,6 +5465,28 @@ fil_flush(
return;
}
+ if (fil_buffering_disabled(space)) {
+
+ /* No need to flush. User has explicitly disabled
+ buffering. */
+ ut_ad(!space->is_in_unflushed_spaces);
+ ut_ad(fil_space_is_flushed(space));
+ ut_ad(space->n_pending_flushes == 0);
+
+#ifdef UNIV_DEBUG
+ for (node = UT_LIST_GET_FIRST(space->chain);
+ node != NULL;
+ node = UT_LIST_GET_NEXT(chain, node)) {
+ ut_ad(node->modification_counter
+ == node->flush_counter);
+ ut_ad(node->n_pending_flushes == 0);
+ }
+#endif /* UNIV_DEBUG */
+
+ mutex_exit(&fil_system->mutex);
+ return;
+ }
+
space->n_pending_flushes++; /*!< prevent dropping of the space while
we are flushing */
node = UT_LIST_GET_FIRST(space->chain);
@@ -4745,7 +5510,7 @@ fil_flush(
goto skip_flush;
}
-#endif
+#endif /* __WIN__ */
retry:
if (node->n_pending_flushes > 0) {
/* We want to avoid calling os_file_flush() on
@@ -4788,7 +5553,7 @@ skip_flush:
if (space->is_in_unflushed_spaces
&& fil_space_is_flushed(space)) {
- space->is_in_unflushed_spaces = FALSE;
+ space->is_in_unflushed_spaces = false;
UT_LIST_REMOVE(
unflushed_spaces,
@@ -5025,3 +5790,401 @@ fil_close(void)
fil_system = NULL;
}
+
+/********************************************************************//**
+Initializes a buffer control block when the buf_pool is created. */
+static
+void
+fil_buf_block_init(
+/*===============*/
+ buf_block_t* block, /*!< in: pointer to control block */
+ byte* frame) /*!< in: pointer to buffer frame */
+{
+ UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
+
+ block->frame = frame;
+
+ block->page.io_fix = BUF_IO_NONE;
+ /* There are assertions that check for this. */
+ block->page.buf_fix_count = 1;
+ block->page.state = BUF_BLOCK_READY_FOR_USE;
+
+ page_zip_des_init(&block->page.zip);
+}
+
+struct fil_iterator_t {
+ os_file_t file; /*!< File handle */
+ const char* filepath; /*!< File path name */
+ os_offset_t start; /*!< From where to start */
+ os_offset_t end; /*!< Where to stop */
+ os_offset_t file_size; /*!< File size in bytes */
+ ulint page_size; /*!< Page size */
+ ulint n_io_buffers; /*!< Number of pages to use
+ for IO */
+ byte* io_buffer; /*!< Buffer to use for IO */
+};
+
+/********************************************************************//**
+TODO: This can be made parallel trivially by chunking up the file and creating
+a callback per thread. . Main benefit will be to use multiple CPUs for
+checksums and compressed tables. We have to do compressed tables block by
+block right now. Secondly we need to decompress/compress and copy too much
+of data. These are CPU intensive.
+
+Iterate over all the pages in the tablespace.
+@param iter - Tablespace iterator
+@param block - block to use for IO
+@param callback - Callback to inspect and update page contents
+@retval DB_SUCCESS or error code */
+static
+dberr_t
+fil_iterate(
+/*========*/
+ const fil_iterator_t& iter,
+ buf_block_t* block,
+ PageCallback& callback)
+{
+ os_offset_t offset;
+ ulint page_no = 0;
+ ulint space_id = callback.get_space_id();
+ ulint n_bytes = iter.n_io_buffers * iter.page_size;
+
+ ut_ad(!srv_read_only_mode);
+
+ /* TODO: For compressed tables we do a lot of useless
+ copying for non-index pages. Unfortunately, it is
+ required by buf_zip_decompress() */
+
+ for (offset = iter.start; offset < iter.end; offset += n_bytes) {
+
+ byte* io_buffer = iter.io_buffer;
+
+ block->frame = io_buffer;
+
+ if (callback.get_zip_size() > 0) {
+ page_zip_des_init(&block->page.zip);
+ page_zip_set_size(&block->page.zip, iter.page_size);
+ block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
+ ut_d(block->page.zip.m_external = true);
+ ut_ad(iter.page_size == callback.get_zip_size());
+
+ /* Zip IO is done in the compressed page buffer. */
+ io_buffer = block->page.zip.data;
+ } else {
+ io_buffer = iter.io_buffer;
+ }
+
+ /* We have to read the exact number of bytes. Otherwise the
+ InnoDB IO functions croak on failed reads. */
+
+ n_bytes = static_cast<ulint>(
+ ut_min(static_cast<os_offset_t>(n_bytes),
+ iter.end - offset));
+
+ ut_ad(n_bytes > 0);
+ ut_ad(!(n_bytes % iter.page_size));
+
+ if (!os_file_read(iter.file, io_buffer, offset,
+ (ulint) n_bytes)) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
+
+ return(DB_IO_ERROR);
+ }
+
+ bool updated = false;
+ os_offset_t page_off = offset;
+ ulint n_pages_read = (ulint) n_bytes / iter.page_size;
+
+ for (ulint i = 0; i < n_pages_read; ++i) {
+
+ buf_block_set_file_page(block, space_id, page_no++);
+
+ dberr_t err;
+
+ if ((err = callback(page_off, block)) != DB_SUCCESS) {
+
+ return(err);
+
+ } else if (!updated) {
+ updated = buf_block_get_state(block)
+ == BUF_BLOCK_FILE_PAGE;
+ }
+
+ buf_block_set_state(block, BUF_BLOCK_NOT_USED);
+ buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
+
+ page_off += iter.page_size;
+ block->frame += iter.page_size;
+ }
+
+ /* A page was updated in the set, write back to disk. */
+ if (updated
+ && !os_file_write(
+ iter.filepath, iter.file, io_buffer,
+ offset, (ulint) n_bytes)) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
+
+ return(DB_IO_ERROR);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table - the table definiton in the server
+@param n_io_buffers - number of blocks to read and write together
+@param callback - functor that will do the page updates
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+ dict_table_t* table,
+ ulint n_io_buffers,
+ PageCallback& callback)
+{
+ dberr_t err;
+ os_file_t file;
+ char* filepath;
+
+ ut_a(n_io_buffers > 0);
+ ut_ad(!srv_read_only_mode);
+
+ DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
+ return(DB_CORRUPTION););
+
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ dict_get_and_save_data_dir_path(table, false);
+ ut_a(table->data_dir_path);
+
+ filepath = os_file_make_remote_pathname(
+ table->data_dir_path, table->name, "ibd");
+ } else {
+ filepath = fil_make_ibd_name(table->name, false);
+ }
+
+ {
+ ibool success;
+
+ file = os_file_create_simple_no_error_handling(
+ innodb_file_data_key, filepath,
+ OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
+
+ DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
+ {
+ static bool once;
+
+ if (!once || ut_rnd_interval(0, 10) == 5) {
+ once = true;
+ success = FALSE;
+ os_file_close(file);
+ }
+ });
+
+ if (!success) {
+ /* The following call prints an error message */
+ os_file_get_last_error(true);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Trying to import a tablespace, but could not "
+ "open the tablespace file %s", filepath);
+
+ mem_free(filepath);
+
+ return(DB_TABLESPACE_NOT_FOUND);
+
+ } else {
+ err = DB_SUCCESS;
+ }
+ }
+
+ callback.set_file(filepath, file);
+
+ os_offset_t file_size = os_file_get_size(file);
+ ut_a(file_size != (os_offset_t) -1);
+
+ /* The block we will use for every physical page */
+ buf_block_t block;
+
+ memset(&block, 0x0, sizeof(block));
+
+ /* Allocate a page to read in the tablespace header, so that we
+ can determine the page size and zip_size (if it is compressed).
+ We allocate an extra page in case it is a compressed table. One
+ page is to ensure alignement. */
+
+ void* page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
+ byte* page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
+
+ fil_buf_block_init(&block, page);
+
+ /* Read the first page and determine the page and zip size. */
+
+ if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) {
+
+ err = DB_IO_ERROR;
+
+ } else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
+ fil_iterator_t iter;
+
+ iter.file = file;
+ iter.start = 0;
+ iter.end = file_size;
+ iter.filepath = filepath;
+ iter.file_size = file_size;
+ iter.n_io_buffers = n_io_buffers;
+ iter.page_size = callback.get_page_size();
+
+ /* Compressed pages can't be optimised for block IO for now.
+ We do the IMPORT page by page. */
+
+ if (callback.get_zip_size() > 0) {
+ iter.n_io_buffers = 1;
+ ut_a(iter.page_size == callback.get_zip_size());
+ }
+
+ /** Add an extra page for compressed page scratch area. */
+
+ void* io_buffer = mem_alloc(
+ (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
+
+ iter.io_buffer = static_cast<byte*>(
+ ut_align(io_buffer, UNIV_PAGE_SIZE));
+
+ err = fil_iterate(iter, &block, callback);
+
+ mem_free(io_buffer);
+ }
+
+ if (err == DB_SUCCESS) {
+
+ ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
+
+ if (!os_file_flush(file)) {
+ ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
+ err = DB_IO_ERROR;
+ } else {
+ ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
+ }
+ }
+
+ os_file_close(file);
+
+ mem_free(page_ptr);
+ mem_free(filepath);
+
+ return(err);
+}
+
+/**
+Set the tablespace compressed table size.
+@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
+dberr_t
+PageCallback::set_zip_size(const buf_frame_t* page) UNIV_NOTHROW
+{
+ m_zip_size = fsp_header_get_zip_size(page);
+
+ if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
+ return(DB_CORRUPTION);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/********************************************************************//**
+Delete the tablespace file and any related files like .cfg.
+This should not be called for temporary tables. */
+UNIV_INTERN
+void
+fil_delete_file(
+/*============*/
+ const char* ibd_name) /*!< in: filepath of the ibd
+ tablespace */
+{
+ /* Force a delete of any stale .ibd files that are lying around. */
+
+ ib_logf(IB_LOG_LEVEL_INFO, "Deleting %s", ibd_name);
+
+ os_file_delete_if_exists(ibd_name);
+
+ char* cfg_name = fil_make_cfg_name(ibd_name);
+
+ os_file_delete_if_exists(cfg_name);
+
+ mem_free(cfg_name);
+}
+
+/**
+Iterate over all the spaces in the space list and fetch the
+tablespace names. It will return a copy of the name that must be
+freed by the caller using: delete[].
+@return DB_SUCCESS if all OK. */
+UNIV_INTERN
+dberr_t
+fil_get_space_names(
+/*================*/
+ space_name_list_t& space_name_list)
+ /*!< in/out: List to append to */
+{
+ fil_space_t* space;
+ dberr_t err = DB_SUCCESS;
+
+ mutex_enter(&fil_system->mutex);
+
+ for (space = UT_LIST_GET_FIRST(fil_system->space_list);
+ space != NULL;
+ space = UT_LIST_GET_NEXT(space_list, space)) {
+
+ if (space->purpose == FIL_TABLESPACE) {
+ ulint len;
+ char* name;
+
+ len = strlen(space->name);
+ name = new(std::nothrow) char[len + 1];
+
+ if (name == 0) {
+ /* Caller to free elements allocated so far. */
+ err = DB_OUT_OF_MEMORY;
+ break;
+ }
+
+ memcpy(name, space->name, len);
+ name[len] = 0;
+
+ space_name_list.push_back(name);
+ }
+ }
+
+ mutex_exit(&fil_system->mutex);
+
+ return(err);
+}
+
+/****************************************************************//**
+Generate redo logs for swapping two .ibd files */
+UNIV_INTERN
+void
+fil_mtr_rename_log(
+/*===============*/
+ ulint old_space_id, /*!< in: tablespace id of the old
+ table. */
+ const char* old_name, /*!< in: old table name */
+ ulint new_space_id, /*!< in: tablespace id of the new
+ table */
+ const char* new_name, /*!< in: new table name */
+ const char* tmp_name) /*!< in: temp table name used while
+ swapping */
+{
+ mtr_t mtr;
+ mtr_start(&mtr);
+ fil_op_write_log(MLOG_FILE_RENAME, old_space_id,
+ 0, 0, old_name, tmp_name, &mtr);
+ fil_op_write_log(MLOG_FILE_RENAME, new_space_id,
+ 0, 0, new_name, old_name, &mtr);
+ mtr_commit(&mtr);
+}
+
diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc
index 398dd24afed..dc843a89fb9 100644
--- a/storage/innobase/fsp/fsp0fsp.cc
+++ b/storage/innobase/fsp/fsp0fsp.cc
@@ -93,15 +93,13 @@ fseg_n_reserved_pages_low(
/********************************************************************//**
Marks a page used. The page must reside within the extents of the given
segment. */
-static
+static __attribute__((nonnull))
void
fseg_mark_page_used(
/*================*/
fseg_inode_t* seg_inode,/*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
ulint page, /*!< in: page offset */
+ xdes_t* descr, /*!< in: extent descriptor */
mtr_t* mtr); /*!< in/out: mini-transaction */
/**********************************************************************//**
Returns the first extent descriptor for a segment. We think of the extent
@@ -214,30 +212,18 @@ Gets a descriptor bit of a page.
@return TRUE if free */
UNIV_INLINE
ibool
-xdes_get_bit(
-/*=========*/
+xdes_mtr_get_bit(
+/*=============*/
const xdes_t* descr, /*!< in: descriptor */
ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
ulint offset, /*!< in: page offset within extent:
0 ... FSP_EXTENT_SIZE - 1 */
- mtr_t* mtr) /*!< in/out: mini-transaction */
+ mtr_t* mtr) /*!< in: mini-transaction */
{
- ulint index;
- ulint byte_index;
- ulint bit_index;
-
+ ut_ad(mtr->state == MTR_ACTIVE);
ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
- ut_ad(offset < FSP_EXTENT_SIZE);
- index = bit + XDES_BITS_PER_PAGE * offset;
-
- byte_index = index / 8;
- bit_index = index % 8;
-
- return(ut_bit_get_nth(mtr_read_ulint(descr + XDES_BITMAP + byte_index,
- MLOG_1BYTE, mtr),
- bit_index));
+ return(xdes_get_bit(descr, bit, offset));
}
/**********************************************************************//**
@@ -287,7 +273,8 @@ xdes_find_bit(
xdes_t* descr, /*!< in: descriptor */
ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
ibool val, /*!< in: desired bit value */
- ulint hint, /*!< in: hint of which bit position would be desirable */
+ ulint hint, /*!< in: hint of which bit position would
+ be desirable */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint i;
@@ -297,14 +284,14 @@ xdes_find_bit(
ut_ad(hint < FSP_EXTENT_SIZE);
ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
for (i = hint; i < FSP_EXTENT_SIZE; i++) {
- if (val == xdes_get_bit(descr, bit, i, mtr)) {
+ if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
return(i);
}
}
for (i = 0; i < hint; i++) {
- if (val == xdes_get_bit(descr, bit, i, mtr)) {
+ if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
return(i);
}
@@ -324,7 +311,8 @@ xdes_find_bit_downward(
xdes_t* descr, /*!< in: descriptor */
ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
ibool val, /*!< in: desired bit value */
- ulint hint, /*!< in: hint of which bit position would be desirable */
+ ulint hint, /*!< in: hint of which bit position would
+ be desirable */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
ulint i;
@@ -334,14 +322,14 @@ xdes_find_bit_downward(
ut_ad(hint < FSP_EXTENT_SIZE);
ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
for (i = hint + 1; i > 0; i--) {
- if (val == xdes_get_bit(descr, bit, i - 1, mtr)) {
+ if (val == xdes_mtr_get_bit(descr, bit, i - 1, mtr)) {
return(i - 1);
}
}
for (i = FSP_EXTENT_SIZE - 1; i > hint; i--) {
- if (val == xdes_get_bit(descr, bit, i, mtr)) {
+ if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
return(i);
}
@@ -360,13 +348,12 @@ xdes_get_n_used(
const xdes_t* descr, /*!< in: descriptor */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- ulint i;
ulint count = 0;
ut_ad(descr && mtr);
ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- for (i = 0; i < FSP_EXTENT_SIZE; i++) {
- if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
+ for (ulint i = 0; i < FSP_EXTENT_SIZE; ++i) {
+ if (FALSE == xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
count++;
}
}
@@ -471,76 +458,11 @@ xdes_init(
}
/********************************************************************//**
-Calculates the page where the descriptor of a page resides.
-@return descriptor page offset */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_page(
-/*======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset) /*!< in: page offset */
-{
-#ifndef DOXYGEN /* Doxygen gets confused of these */
-# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET \
- + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX) \
- * XDES_SIZE_MAX
-# error
-# endif
-# if UNIV_ZIP_SIZE_MIN <= XDES_ARR_OFFSET \
- + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE_MIN) \
- * XDES_SIZE_MIN
-# error
-# endif
-#endif /* !DOXYGEN */
-
- ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
- + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE)
- * XDES_SIZE);
- ut_ad(UNIV_ZIP_SIZE_MIN > XDES_ARR_OFFSET
- + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
- * XDES_SIZE);
-
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
- } else {
- ut_ad(zip_size > XDES_ARR_OFFSET
- + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
- return(ut_2pow_round(offset, zip_size));
- }
-}
-
-/********************************************************************//**
-Calculates the descriptor index within a descriptor page.
-@return descriptor index */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_index(
-/*=======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset) /*!< in: page offset */
-{
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
- / FSP_EXTENT_SIZE);
- } else {
- return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
- }
-}
-
-/********************************************************************//**
Gets pointer to a the extent descriptor of a page. The page where the extent
-descriptor resides is x-locked. If the page offset is equal to the free limit
-of the space, adds new extents from above the free limit to the space free
-list, if not free limit == space size. This adding is necessary to make the
-descriptor defined, as they are uninitialized above the free limit.
+descriptor resides is x-locked. This function no longer extends the data
+file.
@return pointer to the extent descriptor, NULL if the page does not
-exist in the space or if the offset exceeds the free limit */
+exist in the space or if the offset is >= the free limit */
UNIV_INLINE __attribute__((nonnull, warn_unused_result))
xdes_t*
xdes_get_descriptor_with_space_hdr(
@@ -570,19 +492,10 @@ xdes_get_descriptor_with_space_hdr(
zip_size = fsp_flags_get_zip_size(
mach_read_from_4(sp_header + FSP_SPACE_FLAGS));
- /* If offset is >= size or > limit, return NULL */
-
- if ((offset >= size) || (offset > limit)) {
-
+ if ((offset >= size) || (offset >= limit)) {
return(NULL);
}
- /* If offset is == limit, fill free list of the space. */
-
- if (offset == limit) {
- fsp_fill_free_list(FALSE, space, sp_header, mtr);
- }
-
descr_page_no = xdes_calc_descriptor_page(zip_size, offset);
if (descr_page_no == 0) {
@@ -668,7 +581,7 @@ UNIV_INLINE
ulint
xdes_get_offset(
/*============*/
- xdes_t* descr) /*!< in: extent descriptor */
+ const xdes_t* descr) /*!< in: extent descriptor */
{
ut_ad(descr);
@@ -784,7 +697,7 @@ fsp_header_init_fields(
ulint space_id, /*!< in: space id */
ulint flags) /*!< in: tablespace flags (FSP_SPACE_FLAGS) */
{
- fsp_flags_validate(flags);
+ ut_a(fsp_flags_is_valid(flags));
mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page,
space_id);
@@ -872,11 +785,13 @@ fsp_header_get_space_id(
id = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+ DBUG_EXECUTE_IF("fsp_header_get_space_id_failure",
+ id = ULINT_UNDEFINED;);
+
if (id != fsp_id) {
- fprintf(stderr,
- "InnoDB: Error: space id in fsp header %lu,"
- " but in the page header %lu\n",
- (ulong) fsp_id, (ulong) id);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Space id in fsp header %lu,but in the page header "
+ "%lu", fsp_id, id);
return(ULINT_UNDEFINED);
}
@@ -1348,7 +1263,7 @@ fsp_alloc_from_free_frag(
ulint frag_n_used;
ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
- ut_a(xdes_get_bit(descr, XDES_FREE_BIT, bit, mtr));
+ ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT, bit, mtr));
xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
/* Update the FRAG_N_USED field */
@@ -1583,7 +1498,9 @@ fsp_free_page(
ut_error;
}
- if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
+ if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
+ page % FSP_EXTENT_SIZE, mtr)) {
+
fprintf(stderr,
"InnoDB: Error: File space extent descriptor"
" of page %lu says it is free\n"
@@ -1728,16 +1645,15 @@ fsp_seg_inode_page_find_free(
ulint zip_size,/*!< in: compressed page size, or 0 */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- fseg_inode_t* inode;
-
for (; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
+ fseg_inode_t* inode;
+
inode = fsp_seg_inode_page_get_nth_inode(
page, i, zip_size, mtr);
if (!mach_read_from_8(inode + FSEG_ID)) {
/* This is unused */
-
return(i);
}
@@ -1763,11 +1679,11 @@ fsp_alloc_seg_inode_page(
page_t* page;
ulint space;
ulint zip_size;
- ulint i;
ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
space = page_get_space_id(page_align(space_header));
+
zip_size = fsp_flags_get_zip_size(
mach_read_from_4(FSP_SPACE_FLAGS + space_header));
@@ -1788,16 +1704,18 @@ fsp_alloc_seg_inode_page(
mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE,
MLOG_2BYTES, mtr);
- for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
+ for (ulint i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
- inode = fsp_seg_inode_page_get_nth_inode(page, i,
- zip_size, mtr);
+ inode = fsp_seg_inode_page_get_nth_inode(
+ page, i, zip_size, mtr);
mlog_write_ull(inode + FSEG_ID, 0, mtr);
}
- flst_add_last(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
+ flst_add_last(
+ space_header + FSP_SEG_INODES_FREE,
+ page + FSEG_INODE_PAGE_NODE, mtr);
+
return(TRUE);
}
@@ -2486,8 +2404,8 @@ fseg_alloc_free_page_low(
/*-------------------------------------------------------------*/
if ((xdes_get_state(descr, mtr) == XDES_FSEG)
&& mach_read_from_8(descr + XDES_ID) == seg_id
- && (xdes_get_bit(descr, XDES_FREE_BIT,
- hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
+ && (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
+ hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
take_hinted_page:
/* 1. We can take the hinted page
=================================*/
@@ -2652,10 +2570,12 @@ got_hinted_page:
ut_ad(xdes_get_descriptor(space, zip_size, ret_page, mtr)
== ret_descr);
- ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT,
- ret_page % FSP_EXTENT_SIZE, mtr) == TRUE);
- fseg_mark_page_used(seg_inode, space, zip_size, ret_page, mtr);
+ ut_ad(xdes_mtr_get_bit(
+ ret_descr, XDES_FREE_BIT,
+ ret_page % FSP_EXTENT_SIZE, mtr));
+
+ fseg_mark_page_used(seg_inode, ret_page, ret_descr, mtr);
}
return(fsp_page_create(
@@ -3053,27 +2973,21 @@ fsp_get_available_space_in_free_extents(
/********************************************************************//**
Marks a page used. The page must reside within the extents of the given
segment. */
-static
+static __attribute__((nonnull))
void
fseg_mark_page_used(
/*================*/
fseg_inode_t* seg_inode,/*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
ulint page, /*!< in: page offset */
+ xdes_t* descr, /*!< in: extent descriptor */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- xdes_t* descr;
ulint not_full_n_used;
- ut_ad(seg_inode && mtr);
ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
== FSEG_MAGIC_N_VALUE);
- descr = xdes_get_descriptor(space, zip_size, page, mtr);
-
ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr)
== mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr));
@@ -3086,8 +3000,9 @@ fseg_mark_page_used(
descr + XDES_FLST_NODE, mtr);
}
- ut_ad(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
- == TRUE);
+ ut_ad(xdes_mtr_get_bit(
+ descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr));
+
/* We mark the page as used */
xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr);
@@ -3142,8 +3057,8 @@ fseg_free_page_low(
descr = xdes_get_descriptor(space, zip_size, page, mtr);
- ut_a(descr);
- if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) {
+ if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
+ page % FSP_EXTENT_SIZE, mtr)) {
fputs("InnoDB: Dump of the tablespace extent descriptor: ",
stderr);
ut_print_buf(stderr, descr, 40);
@@ -3278,6 +3193,49 @@ fseg_free_page(
}
/**********************************************************************//**
+Checks if a single page of a segment is free.
+@return true if free */
+UNIV_INTERN
+bool
+fseg_page_is_free(
+/*==============*/
+ fseg_header_t* seg_header, /*!< in: segment header */
+ ulint space, /*!< in: space id */
+ ulint page) /*!< in: page offset */
+{
+ mtr_t mtr;
+ ibool is_free;
+ ulint flags;
+ rw_lock_t* latch;
+ xdes_t* descr;
+ ulint zip_size;
+ fseg_inode_t* seg_inode;
+
+ latch = fil_space_get_latch(space, &flags);
+ zip_size = dict_tf_get_zip_size(flags);
+
+ mtr_start(&mtr);
+ mtr_x_lock(latch, &mtr);
+
+ seg_inode = fseg_inode_get(seg_header, space, zip_size, &mtr);
+
+ ut_a(seg_inode);
+ ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
+ == FSEG_MAGIC_N_VALUE);
+ ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
+
+ descr = xdes_get_descriptor(space, zip_size, page, &mtr);
+ ut_a(descr);
+
+ is_free = xdes_mtr_get_bit(
+ descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, &mtr);
+
+ mtr_commit(&mtr);
+
+ return(is_free);
+}
+
+/**********************************************************************//**
Frees an extent of a segment to the space free list. */
static
void
@@ -3308,7 +3266,7 @@ fseg_free_extent(
first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
for (i = 0; i < FSP_EXTENT_SIZE; i++) {
- if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
+ if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
/* Drop search system page hash index if the page is
found in the pool and is hashed */
@@ -3388,9 +3346,9 @@ fseg_free_step(
/* Check that the header resides on a page which has not been
freed yet */
- ut_a(descr);
- ut_a(xdes_get_bit(descr, XDES_FREE_BIT,
- header_page % FSP_EXTENT_SIZE, mtr) == FALSE);
+ ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT,
+ header_page % FSP_EXTENT_SIZE, mtr) == FALSE);
+
inode = fseg_inode_try_get(header, space, zip_size, mtr);
if (UNIV_UNLIKELY(inode == NULL)) {
diff --git a/storage/innobase/fts/fts0ast.cc b/storage/innobase/fts/fts0ast.cc
index c01c43a021f..972f5acf461 100644
--- a/storage/innobase/fts/fts0ast.cc
+++ b/storage/innobase/fts/fts0ast.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -98,9 +98,21 @@ fts_ast_create_node_text(
void* arg, /*!< in: ast state instance */
const char* ptr) /*!< in: ast text string */
{
+ ulint len = strlen(ptr);
+ fts_ast_node_t* node = NULL;
+
+ ut_ad(len >= 2);
+
+ if (len == 2) {
+ ut_ad(ptr[0] == '\"');
+ ut_ad(ptr[1] == '\"');
+ return(NULL);
+ }
+
+ node = fts_ast_node_create();
+
/*!< We ignore the actual quotes "" */
- ulint len = strlen(ptr) - 2;
- fts_ast_node_t* node = fts_ast_node_create();
+ len -= 2;
node->type = FTS_AST_TEXT;
node->text.ptr = static_cast<byte*>(ut_malloc(len + 1));
@@ -381,34 +393,100 @@ fts_ast_node_print(
}
/******************************************************************//**
-Traverse the AST - in-order traversal.
+Traverse the AST - in-order traversal, except for the FTS_IGNORE
+nodes, which will be ignored in the first pass of each level, and
+visited in a second pass after all other nodes in the same level are visited.
@return DB_SUCCESS if all went well */
UNIV_INTERN
-ulint
+dberr_t
fts_ast_visit(
/*==========*/
fts_ast_oper_t oper, /*!< in: current operator */
fts_ast_node_t* node, /*!< in: current root node */
fts_ast_callback visitor, /*!< in: callback function */
- void* arg) /*!< in: arg for callback */
+ void* arg, /*!< in: arg for callback */
+ bool* has_ignore) /*!< out: true, if the operator
+ was ignored during processing,
+ currently we only ignore
+ FTS_IGNORE operator */
{
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
+ fts_ast_node_t* oper_node = NULL;
+ fts_ast_node_t* start_node;
+ bool revisit = false;
+ bool will_be_ignored = false;
+
+ start_node = node->list.head;
ut_a(node->type == FTS_AST_LIST
|| node->type == FTS_AST_SUBEXP_LIST);
+ /* In the first pass of the tree, at the leaf level of the
+ tree, FTS_IGNORE operation will be ignored. It will be
+ repeated at the level above the leaf level */
for (node = node->list.head;
- node && error == DB_SUCCESS;
+ node && (error == DB_SUCCESS);
node = node->next) {
if (node->type == FTS_AST_LIST) {
- error = fts_ast_visit(oper, node, visitor, arg);
+ error = fts_ast_visit(oper, node, visitor,
+ arg, &will_be_ignored);
+
+ /* If will_be_ignored is set to true, then
+ we encountered and ignored a FTS_IGNORE operator,
+ and a second pass is needed to process FTS_IGNORE
+ operator */
+ if (will_be_ignored) {
+ revisit = true;
+ }
} else if (node->type == FTS_AST_SUBEXP_LIST) {
error = fts_ast_visit_sub_exp(node, visitor, arg);
} else if (node->type == FTS_AST_OPER) {
oper = node->oper;
+ oper_node = node;
} else {
- visitor(oper, node, arg);
+ if (node->visited) {
+ continue;
+ }
+
+ ut_a(oper == FTS_NONE || !oper_node
+ || oper_node->oper == oper);
+
+ if (oper == FTS_IGNORE) {
+ *has_ignore = true;
+ /* Change the operator to FTS_IGNORE_SKIP,
+ so that it is processed in the second pass */
+ oper_node->oper = FTS_IGNORE_SKIP;
+ continue;
+ }
+
+ if (oper == FTS_IGNORE_SKIP) {
+ /* This must be the second pass, now we process
+ the FTS_IGNORE operator */
+ visitor(FTS_IGNORE, node, arg);
+ } else {
+ visitor(oper, node, arg);
+ }
+
+ node->visited = true;
+ }
+ }
+
+ /* Second pass to process the skipped FTS_IGNORE operation.
+ It is only performed at the level above leaf level */
+ if (revisit) {
+ for (node = start_node;
+ node && error == DB_SUCCESS;
+ node = node->next) {
+
+ if (node->type == FTS_AST_LIST) {
+ /* In this pass, it will process all those
+ operators ignored in the first pass, and those
+ whose operators are set to FTS_IGNORE_SKIP */
+ error = fts_ast_visit(
+ oper, node, visitor, arg,
+ &will_be_ignored);
+ }
}
}
diff --git a/storage/innobase/fts/fts0blex.cc b/storage/innobase/fts/fts0blex.cc
index b3350010db0..1abd737ec06 100644
--- a/storage/innobase/fts/fts0blex.cc
+++ b/storage/innobase/fts/fts0blex.cc
@@ -35,7 +35,7 @@
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS 1
@@ -247,7 +247,7 @@ struct yy_buffer_state
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
-
+
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
@@ -305,9 +305,9 @@ YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner
YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
-void *fts0balloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) );
-void *fts0brealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) );
-void fts0bfree (void * , yyscan_t yyscanner __attribute__((unused)) );
+void *fts0balloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
+void *fts0brealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
+void fts0bfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
#define yy_new_buffer fts0b_create_buffer
@@ -347,7 +347,7 @@ typedef int yy_state_type;
static yy_state_type yy_get_previous_state (yyscan_t yyscanner );
static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner);
static int yy_get_next_buffer (yyscan_t yyscanner );
-static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) );
+static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) );
/* Done after the current pattern has been matched and before the
* corresponding action - sets up yytext.
@@ -368,10 +368,10 @@ struct yy_trans_info
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
-static yyconst flex_int16_t yy_accept[18] =
+static yyconst flex_int16_t yy_accept[19] =
{ 0,
- 4, 4, 8, 4, 1, 6, 1, 7, 2, 3,
- 4, 1, 1, 0, 5, 3, 0
+ 4, 4, 8, 4, 1, 6, 1, 7, 7, 2,
+ 3, 4, 1, 1, 0, 5, 3, 0
} ;
static yyconst flex_int32_t yy_ec[256] =
@@ -379,17 +379,17 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 4, 1, 5, 1, 1, 1, 1, 1, 6,
- 6, 6, 6, 1, 6, 1, 1, 7, 7, 7,
- 7, 7, 7, 7, 7, 7, 7, 1, 1, 6,
- 1, 6, 1, 6, 1, 1, 1, 1, 1, 1,
+ 1, 4, 1, 5, 1, 1, 6, 1, 1, 7,
+ 7, 7, 7, 1, 7, 1, 1, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 1, 1, 7,
+ 1, 7, 1, 7, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 6, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 7, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -406,35 +406,39 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1
} ;
-static yyconst flex_int32_t yy_meta[8] =
+static yyconst flex_int32_t yy_meta[9] =
{ 0,
- 1, 2, 3, 4, 5, 5, 1
+ 1, 2, 3, 4, 5, 5, 5, 1
} ;
-static yyconst flex_int16_t yy_base[21] =
+static yyconst flex_int16_t yy_base[22] =
{ 0,
- 0, 0, 21, 0, 6, 22, 0, 13, 22, 7,
- 0, 0, 0, 4, 22, 0, 22, 10, 11, 15
+ 0, 0, 22, 0, 7, 23, 0, 14, 23, 23,
+ 7, 0, 0, 0, 5, 23, 0, 23, 11, 12,
+ 16
} ;
-static yyconst flex_int16_t yy_def[21] =
+static yyconst flex_int16_t yy_def[22] =
{ 0,
- 17, 1, 17, 18, 18, 17, 19, 20, 17, 18,
- 18, 5, 19, 20, 17, 10, 0, 17, 17, 17
+ 18, 1, 18, 19, 19, 18, 20, 21, 18, 18,
+ 19, 19, 5, 20, 21, 18, 11, 0, 18, 18,
+ 18
} ;
-static yyconst flex_int16_t yy_nxt[30] =
+static yyconst flex_int16_t yy_nxt[32] =
{ 0,
- 4, 5, 6, 7, 8, 9, 10, 12, 15, 13,
- 11, 11, 13, 16, 13, 14, 14, 15, 14, 14,
- 17, 3, 17, 17, 17, 17, 17, 17, 17
+ 4, 5, 6, 7, 8, 9, 10, 11, 13, 16,
+ 14, 12, 12, 14, 17, 14, 15, 15, 16, 15,
+ 15, 18, 3, 18, 18, 18, 18, 18, 18, 18,
+ 18
} ;
-static yyconst flex_int16_t yy_chk[30] =
+static yyconst flex_int16_t yy_chk[32] =
{ 0,
- 1, 1, 1, 1, 1, 1, 1, 5, 14, 5,
- 18, 18, 19, 10, 19, 20, 20, 8, 20, 20,
- 3, 17, 17, 17, 17, 17, 17, 17, 17
+ 1, 1, 1, 1, 1, 1, 1, 1, 5, 15,
+ 5, 19, 19, 20, 11, 20, 21, 21, 8, 21,
+ 21, 3, 18, 18, 18, 18, 18, 18, 18, 18,
+ 18
} ;
/* The intent behind this definition is that it'll catch
@@ -477,7 +481,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
#define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner)
#define YY_NO_INPUT 1
-#line 480 "fts0blex.cc"
+#line 484 "fts0blex.cc"
#define INITIAL 0
@@ -575,11 +579,11 @@ extern int fts0bwrap (yyscan_t yyscanner );
#endif
#ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)));
+static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)));
#endif
#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)));
+static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)));
#endif
#ifndef YY_NO_INPUT
@@ -699,12 +703,12 @@ YY_DECL
register yy_state_type yy_current_state;
register char *yy_cp, *yy_bp;
register int yy_act;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
#line 43 "fts0blex.l"
-#line 707 "fts0blex.cc"
+#line 711 "fts0blex.cc"
if ( !yyg->yy_init )
{
@@ -757,13 +761,13 @@ yy_match:
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 18 )
+ if ( yy_current_state >= 19 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
++yy_cp;
}
- while ( yy_current_state != 17 );
+ while ( yy_current_state != 18 );
yy_cp = yyg->yy_last_accepting_cpos;
yy_current_state = yyg->yy_last_accepting_state;
@@ -835,7 +839,7 @@ YY_RULE_SETUP
#line 73 "fts0blex.l"
ECHO;
YY_BREAK
-#line 838 "fts0blex.cc"
+#line 842 "fts0blex.cc"
case YY_STATE_EOF(INITIAL):
yyterminate();
@@ -978,7 +982,7 @@ case YY_STATE_EOF(INITIAL):
*/
static int yy_get_next_buffer (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
register char *source = yyg->yytext_ptr;
register int number_to_move, i;
@@ -1044,9 +1048,9 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
else
b->yy_buf_size *= 2;
- b->yy_ch_buf = (char*)
+ b->yy_ch_buf = (char *)
/* Include room in for 2 EOB chars. */
- fts0brealloc((void*) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner );
+ fts0brealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner );
}
else
/* Can't grow it, we don't own it. */
@@ -1095,7 +1099,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
if ((yy_size_t) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
/* Extend the array by 50%, plus the number we really need. */
yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1);
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char*) fts0brealloc((void*) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner );
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) fts0brealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner );
if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
}
@@ -1115,7 +1119,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
{
register yy_state_type yy_current_state;
register char *yy_cp;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yy_current_state = yyg->yy_start;
@@ -1130,7 +1134,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 18 )
+ if ( yy_current_state >= 19 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -1147,7 +1151,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner)
{
register int yy_is_jam;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner; /* This var may be unused depending upon options. */
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */
register char *yy_cp = yyg->yy_c_buf_p;
register YY_CHAR yy_c = 1;
@@ -1159,11 +1163,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 18 )
+ if ( yy_current_state >= 19 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 17);
+ yy_is_jam = (yy_current_state == 18);
return yy_is_jam ? 0 : yy_current_state;
}
@@ -1177,7 +1181,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
{
int c;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
*yyg->yy_c_buf_p = yyg->yy_hold_char;
@@ -1235,7 +1239,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
}
}
- c = *(unsigned char*) yyg->yy_c_buf_p; /* cast for 8-bit char's */
+ c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */
*yyg->yy_c_buf_p = '\0'; /* preserve yytext */
yyg->yy_hold_char = *++yyg->yy_c_buf_p;
@@ -1250,7 +1254,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
*/
void fts0brestart (FILE * input_file , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if ( ! YY_CURRENT_BUFFER ){
fts0bensure_buffer_stack (yyscanner);
@@ -1268,7 +1272,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
*/
void fts0b_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
/* TODO. We should be able to replace this entire function body
* with
@@ -1300,7 +1304,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
static void fts0b_load_buffer_state (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
@@ -1316,7 +1320,7 @@ static void fts0b_load_buffer_state (yyscan_t yyscanner)
YY_BUFFER_STATE fts0b_create_buffer (FILE * file, int size , yyscan_t yyscanner)
{
YY_BUFFER_STATE b;
-
+
b = (YY_BUFFER_STATE) fts0balloc(sizeof( struct yy_buffer_state ) ,yyscanner );
if ( ! b )
YY_FATAL_ERROR( "out of dynamic memory in fts0b_create_buffer()" );
@@ -1326,7 +1330,7 @@ static void fts0b_load_buffer_state (yyscan_t yyscanner)
/* yy_ch_buf has to be 2 characters longer than the size given because
* we need to put in 2 end-of-buffer characters.
*/
- b->yy_ch_buf = (char*) fts0balloc(b->yy_buf_size + 2 ,yyscanner );
+ b->yy_ch_buf = (char *) fts0balloc(b->yy_buf_size + 2 ,yyscanner );
if ( ! b->yy_ch_buf )
YY_FATAL_ERROR( "out of dynamic memory in fts0b_create_buffer()" );
@@ -1343,7 +1347,7 @@ static void fts0b_load_buffer_state (yyscan_t yyscanner)
*/
void fts0b_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if ( ! b )
return;
@@ -1352,9 +1356,9 @@ static void fts0b_load_buffer_state (yyscan_t yyscanner)
YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
if ( b->yy_is_our_buffer )
- fts0bfree((void*) b->yy_ch_buf ,yyscanner );
+ fts0bfree((void *) b->yy_ch_buf ,yyscanner );
- fts0bfree((void*) b ,yyscanner );
+ fts0bfree((void *) b ,yyscanner );
}
/* Initializes or reinitializes a buffer.
@@ -1365,7 +1369,7 @@ static void fts0b_load_buffer_state (yyscan_t yyscanner)
{
int oerrno = errno;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
fts0b_flush_buffer(b ,yyscanner);
@@ -1382,7 +1386,7 @@ static void fts0b_load_buffer_state (yyscan_t yyscanner)
}
b->yy_is_interactive = 0;
-
+
errno = oerrno;
}
@@ -1392,7 +1396,7 @@ static void fts0b_load_buffer_state (yyscan_t yyscanner)
*/
void fts0b_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if ( ! b )
return;
@@ -1422,7 +1426,7 @@ static void fts0b_load_buffer_state (yyscan_t yyscanner)
*/
void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if (new_buffer == NULL)
return;
@@ -1453,7 +1457,7 @@ void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
*/
void fts0bpop_buffer_state (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if (!YY_CURRENT_BUFFER)
return;
@@ -1474,7 +1478,7 @@ void fts0bpop_buffer_state (yyscan_t yyscanner)
static void fts0bensure_buffer_stack (yyscan_t yyscanner)
{
int num_to_alloc;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if (!yyg->yy_buffer_stack) {
@@ -1483,14 +1487,14 @@ static void fts0bensure_buffer_stack (yyscan_t yyscanner)
* immediate realloc on the next call.
*/
num_to_alloc = 1;
- yyg->yy_buffer_stack = (struct yy_buffer_state**) fts0balloc
+ yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0balloc
(num_to_alloc * sizeof(struct yy_buffer_state*)
, yyscanner);
if ( ! yyg->yy_buffer_stack )
YY_FATAL_ERROR( "out of dynamic memory in fts0bensure_buffer_stack()" );
-
+
memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
+
yyg->yy_buffer_stack_max = num_to_alloc;
yyg->yy_buffer_stack_top = 0;
return;
@@ -1502,7 +1506,7 @@ static void fts0bensure_buffer_stack (yyscan_t yyscanner)
int grow_size = 8 /* arbitrary grow size */;
num_to_alloc = yyg->yy_buffer_stack_max + grow_size;
- yyg->yy_buffer_stack = (struct yy_buffer_state**) fts0brealloc
+ yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0brealloc
(yyg->yy_buffer_stack,
num_to_alloc * sizeof(struct yy_buffer_state*)
, yyscanner);
@@ -1519,12 +1523,12 @@ static void fts0bensure_buffer_stack (yyscan_t yyscanner)
* @param base the character buffer
* @param size the size in bytes of the character buffer
* @param yyscanner The scanner object.
- * @return the newly allocated buffer state object.
+ * @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE fts0b_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner)
{
YY_BUFFER_STATE b;
-
+
if ( size < 2 ||
base[size-2] != YY_END_OF_BUFFER_CHAR ||
base[size-1] != YY_END_OF_BUFFER_CHAR )
@@ -1560,7 +1564,7 @@ YY_BUFFER_STATE fts0b_scan_buffer (char * base, yy_size_t size , yyscan_t yysc
*/
YY_BUFFER_STATE fts0b_scan_string (yyconst char * yystr , yyscan_t yyscanner)
{
-
+
return fts0b_scan_bytes(yystr,strlen(yystr) ,yyscanner);
}
@@ -1577,10 +1581,10 @@ YY_BUFFER_STATE fts0b_scan_bytes (yyconst char * yybytes, int _yybytes_len , y
char *buf;
yy_size_t n;
int i;
-
+
/* Get memory for full buffer, including space for trailing EOB's. */
n = _yybytes_len + 2;
- buf = (char*) fts0balloc(n ,yyscanner );
+ buf = (char *) fts0balloc(n ,yyscanner );
if ( ! buf )
YY_FATAL_ERROR( "out of dynamic memory in fts0b_scan_bytes()" );
@@ -1605,7 +1609,7 @@ YY_BUFFER_STATE fts0b_scan_bytes (yyconst char * yybytes, int _yybytes_len , y
#define YY_EXIT_FAILURE 2
#endif
-static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)))
+static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
(void) fprintf( stderr, "%s\n", msg );
exit( YY_EXIT_FAILURE );
@@ -1635,7 +1639,7 @@ static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute_
*/
YY_EXTRA_TYPE fts0bget_extra (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yyextra;
}
@@ -1644,11 +1648,11 @@ YY_EXTRA_TYPE fts0bget_extra (yyscan_t yyscanner)
*/
int fts0bget_lineno (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
-
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
if (! YY_CURRENT_BUFFER)
return 0;
-
+
return yylineno;
}
@@ -1657,11 +1661,11 @@ int fts0bget_lineno (yyscan_t yyscanner)
*/
int fts0bget_column (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
-
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+
if (! YY_CURRENT_BUFFER)
return 0;
-
+
return yycolumn;
}
@@ -1670,7 +1674,7 @@ int fts0bget_column (yyscan_t yyscanner)
*/
FILE *fts0bget_in (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yyin;
}
@@ -1679,7 +1683,7 @@ FILE *fts0bget_in (yyscan_t yyscanner)
*/
FILE *fts0bget_out (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yyout;
}
@@ -1688,7 +1692,7 @@ FILE *fts0bget_out (yyscan_t yyscanner)
*/
int fts0bget_leng (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yyleng;
}
@@ -1698,7 +1702,7 @@ int fts0bget_leng (yyscan_t yyscanner)
char *fts0bget_text (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yytext;
}
@@ -1708,7 +1712,7 @@ char *fts0bget_text (yyscan_t yyscanner)
*/
void fts0bset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yyextra = user_defined ;
}
@@ -1718,12 +1722,12 @@ void fts0bset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner)
*/
void fts0bset_lineno (int line_number , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
/* lineno is only valid if an input buffer exists. */
if (! YY_CURRENT_BUFFER )
- yy_fatal_error( "fts0bset_lineno called with no buffer" , yyscanner);
-
+ yy_fatal_error( "fts0bset_lineno called with no buffer" , yyscanner);
+
yylineno = line_number;
}
@@ -1733,12 +1737,12 @@ void fts0bset_lineno (int line_number , yyscan_t yyscanner)
*/
void fts0bset_column (int column_no , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
/* column is only valid if an input buffer exists. */
if (! YY_CURRENT_BUFFER )
- yy_fatal_error( "fts0bset_column called with no buffer" , yyscanner);
-
+ yy_fatal_error( "fts0bset_column called with no buffer" , yyscanner);
+
yycolumn = column_no;
}
@@ -1750,25 +1754,25 @@ void fts0bset_column (int column_no , yyscan_t yyscanner)
*/
void fts0bset_in (FILE * in_str , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yyin = in_str ;
}
void fts0bset_out (FILE * out_str , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yyout = out_str ;
}
int fts0bget_debug (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yy_flex_debug;
}
void fts0bset_debug (int bdebug , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yy_flex_debug = bdebug ;
}
@@ -1821,26 +1825,26 @@ int fts0blex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals )
errno = EINVAL;
return 1;
}
-
+
*ptr_yy_globals = (yyscan_t) fts0balloc ( sizeof( struct yyguts_t ), &dummy_yyguts );
-
+
if (*ptr_yy_globals == NULL){
errno = ENOMEM;
return 1;
}
-
+
/* By setting to 0xAA, we expose bugs in
yy_init_globals. Leave at 0x00 for releases. */
memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
-
+
fts0bset_extra (yy_user_defined, *ptr_yy_globals);
-
+
return yy_init_globals ( *ptr_yy_globals );
}
static int yy_init_globals (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
/* Initialization is the same as for the non-reentrant scanner.
* This function is called from fts0blex_destroy(), so don't allocate here.
*/
@@ -1848,7 +1852,7 @@ static int yy_init_globals (yyscan_t yyscanner)
yyg->yy_buffer_stack = 0;
yyg->yy_buffer_stack_top = 0;
yyg->yy_buffer_stack_max = 0;
- yyg->yy_c_buf_p = (char*) 0;
+ yyg->yy_c_buf_p = (char *) 0;
yyg->yy_init = 0;
yyg->yy_start = 0;
@@ -1861,8 +1865,8 @@ static int yy_init_globals (yyscan_t yyscanner)
yyin = stdin;
yyout = stdout;
#else
- yyin = (FILE*) 0;
- yyout = (FILE*) 0;
+ yyin = (FILE *) 0;
+ yyout = (FILE *) 0;
#endif
/* For future reference: Set errno on error, since we are called by
@@ -1874,7 +1878,7 @@ static int yy_init_globals (yyscan_t yyscanner)
/* fts0blex_destroy is for both reentrant and non-reentrant scanners. */
int fts0blex_destroy (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
/* Pop the buffer stack, destroying each element. */
while(YY_CURRENT_BUFFER){
@@ -1906,7 +1910,7 @@ int fts0blex_destroy (yyscan_t yyscanner)
*/
#ifndef yytext_ptr
-static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)))
+static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
register int i;
for ( i = 0; i < n; ++i )
@@ -1915,7 +1919,7 @@ static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yysc
#endif
#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)))
+static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
register int n;
for ( n = 0; s[n]; ++n )
@@ -1925,26 +1929,26 @@ static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__(
}
#endif
-void *fts0balloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)))
+void *fts0balloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
- return (void*) malloc( size );
+ return (void *) malloc( size );
}
-void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)))
+void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
- /* The cast to (char*) in the following accommodates both
+ /* The cast to (char *) in the following accommodates both
* implementations that use char* generic pointers, and those
* that use void* generic pointers. It works with the latter
* because both ANSI C and C++ allow castless assignment from
* any pointer type to void*, and deal with argument conversions
* as though doing an assignment.
*/
- return (void*) realloc( (char*) ptr, size );
+ return (void *) realloc( (char *) ptr, size );
}
-void fts0bfree (void * ptr , yyscan_t yyscanner __attribute__((unused)))
+void fts0bfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)))
{
- free( (char*) ptr ); /* see fts0brealloc() for (char*) cast */
+ free( (char *) ptr ); /* see fts0brealloc() for (char *) cast */
}
#define YYTABLES_NAME "yytables"
diff --git a/storage/innobase/fts/fts0blex.l b/storage/innobase/fts/fts0blex.l
index b84b0cea294..6193f0df187 100644
--- a/storage/innobase/fts/fts0blex.l
+++ b/storage/innobase/fts/fts0blex.l
@@ -56,7 +56,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
return(FTS_NUMB);
}
-[^" \n*()+\-<>~@]* {
+[^" \n*()+\-<>~@%]* {
val->token = strdup(fts0bget_text(yyscanner));
return(FTS_TERM);
diff --git a/storage/innobase/fts/fts0config.cc b/storage/innobase/fts/fts0config.cc
index 3f849ef183c..9cac680101c 100644
--- a/storage/innobase/fts/fts0config.cc
+++ b/storage/innobase/fts/fts0config.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -70,7 +70,7 @@ Get value from the config table. The caller must ensure that enough
space is allocated for value to hold the column contents.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_get_value(
/*=================*/
trx_t* trx, /*!< transaction */
@@ -83,7 +83,7 @@ fts_config_get_value(
{
pars_info_t* info;
que_t* graph;
- ulint error;
+ dberr_t error;
ulint name_len = strlen(name);
info = pars_info_create();
@@ -162,7 +162,7 @@ must ensure that enough space is allocated for value to hold the
column contents.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_get_index_value(
/*=======================*/
trx_t* trx, /*!< transaction */
@@ -173,7 +173,7 @@ fts_config_get_index_value(
config table */
{
char* name;
- ulint error;
+ dberr_t error;
fts_table_t fts_table;
FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
@@ -193,7 +193,7 @@ fts_config_get_index_value(
Set the value in the config table for name.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_set_value(
/*=================*/
trx_t* trx, /*!< transaction */
@@ -206,7 +206,7 @@ fts_config_set_value(
{
pars_info_t* info;
que_t* graph;
- ulint error;
+ dberr_t error;
undo_no_t undo_no;
undo_no_t n_rows_updated;
ulint name_len = strlen(name);
@@ -262,7 +262,7 @@ fts_config_set_value(
Set the value specific to an FTS index in the config table.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_set_index_value(
/*=======================*/
trx_t* trx, /*!< transaction */
@@ -273,7 +273,7 @@ fts_config_set_index_value(
config table */
{
char* name;
- ulint error;
+ dberr_t error;
fts_table_t fts_table;
FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
@@ -293,7 +293,7 @@ fts_config_set_index_value(
Get an ulint value from the config table.
@return DB_SUCCESS if all OK else error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_get_index_ulint(
/*=======================*/
trx_t* trx, /*!< in: transaction */
@@ -301,7 +301,7 @@ fts_config_get_index_ulint(
const char* name, /*!< in: param name */
ulint* int_value) /*!< out: value */
{
- ulint error;
+ dberr_t error;
fts_string_t value;
/* We set the length of value to the max bytes it can hold. This
@@ -314,8 +314,8 @@ fts_config_get_index_ulint(
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) reading `%s'\n",
- error, name);
+ fprintf(stderr, " InnoDB: Error: (%s) reading `%s'\n",
+ ut_strerr(error), name);
} else {
*int_value = strtoul((char*) value.f_str, NULL, 10);
}
@@ -329,7 +329,7 @@ fts_config_get_index_ulint(
Set an ulint value in the config table.
@return DB_SUCCESS if all OK else error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_set_index_ulint(
/*=======================*/
trx_t* trx, /*!< in: transaction */
@@ -337,7 +337,7 @@ fts_config_set_index_ulint(
const char* name, /*!< in: param name */
ulint int_value) /*!< in: value */
{
- ulint error;
+ dberr_t error;
fts_string_t value;
/* We set the length of value to the max bytes it can hold. This
@@ -356,8 +356,8 @@ fts_config_set_index_ulint(
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) writing `%s'\n",
- error, name);
+ fprintf(stderr, " InnoDB: Error: (%s) writing `%s'\n",
+ ut_strerr(error), name);
}
ut_free(value.f_str);
@@ -369,7 +369,7 @@ fts_config_set_index_ulint(
Get an ulint value from the config table.
@return DB_SUCCESS if all OK else error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_get_ulint(
/*=================*/
trx_t* trx, /*!< in: transaction */
@@ -378,7 +378,7 @@ fts_config_get_ulint(
const char* name, /*!< in: param name */
ulint* int_value) /*!< out: value */
{
- ulint error;
+ dberr_t error;
fts_string_t value;
/* We set the length of value to the max bytes it can hold. This
@@ -391,8 +391,8 @@ fts_config_get_ulint(
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) reading `%s'\n",
- error, name);
+ fprintf(stderr, " InnoDB: Error: (%s) reading `%s'\n",
+ ut_strerr(error), name);
} else {
*int_value = strtoul((char*) value.f_str, NULL, 10);
}
@@ -406,7 +406,7 @@ fts_config_get_ulint(
Set an ulint value in the config table.
@return DB_SUCCESS if all OK else error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_set_ulint(
/*=================*/
trx_t* trx, /*!< in: transaction */
@@ -415,7 +415,7 @@ fts_config_set_ulint(
const char* name, /*!< in: param name */
ulint int_value) /*!< in: value */
{
- ulint error;
+ dberr_t error;
fts_string_t value;
/* We set the length of value to the max bytes it can hold. This
@@ -434,8 +434,8 @@ fts_config_set_ulint(
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) writing `%s'\n",
- error, name);
+ fprintf(stderr, " InnoDB: Error: (%s) writing `%s'\n",
+ ut_strerr(error), name);
}
ut_free(value.f_str);
@@ -447,7 +447,7 @@ fts_config_set_ulint(
Increment the value in the config table for column name.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_increment_value(
/*=======================*/
trx_t* trx, /*!< transaction */
@@ -458,7 +458,7 @@ fts_config_increment_value(
ulint delta) /*!< in: increment by this
much */
{
- ulint error;
+ dberr_t error;
fts_string_t value;
que_t* graph = NULL;
ulint name_len = strlen(name);
@@ -520,8 +520,8 @@ fts_config_increment_value(
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) "
- "while incrementing %s.\n", error, name);
+ fprintf(stderr, " InnoDB: Error: (%s) "
+ "while incrementing %s.\n", ut_strerr(error), name);
}
ut_free(value.f_str);
@@ -533,7 +533,7 @@ fts_config_increment_value(
Increment the per index value in the config table for column name.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_increment_index_value(
/*=============================*/
trx_t* trx, /*!< transaction */
@@ -544,7 +544,7 @@ fts_config_increment_index_value(
much */
{
char* name;
- ulint error;
+ dberr_t error;
fts_table_t fts_table;
FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc
index f716b980501..a81d3043e9c 100644
--- a/storage/innobase/fts/fts0fts.cc
+++ b/storage/innobase/fts/fts0fts.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -36,12 +36,8 @@ Full Text Search interface
#include "dict0priv.h"
#include "dict0stats.h"
#include "btr0pcur.h"
-#include "row0row.h"
-#include "ha_prototypes.h"
-#ifdef UNIV_NONINL
-#include "fts0priv.ic"
-#endif
+#include "ha_prototypes.h"
#define FTS_MAX_ID_LEN 32
@@ -63,9 +59,6 @@ UNIV_INTERN ulong fts_min_token_size;
ib_time_t elapsed_time = 0;
ulint n_nodes = 0;
-typedef struct fts_schema_struct fts_schema_t;
-typedef struct fts_sys_table_struct fts_sys_table_t;
-
/** Error condition reported by fts_utf8_decode() */
const ulint UTF8_ERROR = 0xFFFFFFFF;
@@ -142,7 +135,7 @@ const char *fts_default_stopword[] =
};
/** For storing table info when checking for orphaned tables. */
-struct fts_sys_table_struct {
+struct fts_aux_table_t {
table_id_t id; /*!< Table id */
table_id_t parent_id; /*!< Parent table id */
table_id_t index_id; /*!< Table FT index id */
@@ -246,7 +239,7 @@ static const char* fts_config_table_insert_values_sql =
FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n"
""
"INSERT INTO %s VALUES ('"
- FTS_SYNCED_DOC_ID "', '1');\n"
+ FTS_SYNCED_DOC_ID "', '0');\n"
""
"INSERT INTO %s VALUES ('"
FTS_TOTAL_DELETED_COUNT "', '0');\n"
@@ -257,12 +250,13 @@ static const char* fts_config_table_insert_values_sql =
/****************************************************************//**
Run SYNC on the table, i.e., write out data from the cache to the
FTS auxiliary INDEX table and clear the cache at the end.
-@return DB_SUCCESS if all OK */
+@return DB_SUCCESS if all OK */
static
-ulint
+dberr_t
fts_sync(
/*=====*/
- fts_sync_t* sync); /*!< in: sync state */
+ fts_sync_t* sync) /*!< in: sync state */
+ __attribute__((nonnull));
/****************************************************************//**
Release all resources help by the words rb tree e.g., the node ilist. */
@@ -270,7 +264,8 @@ static
void
fts_words_free(
/*===========*/
- ib_rbt_t* words); /*!< in: rb tree of words */
+ ib_rbt_t* words) /*!< in: rb tree of words */
+ __attribute__((nonnull));
#ifdef FTS_CACHE_SIZE_DEBUG
/****************************************************************//**
Read the max cache size parameter from the config table. */
@@ -294,19 +289,35 @@ fts_add_doc_by_id(
doc_id_t doc_id, /*!< in: doc id */
ib_vector_t* fts_indexes __attribute__((unused)));
/*!< in: affected fts indexes */
+#ifdef FTS_DOC_STATS_DEBUG
/****************************************************************//**
Check whether a particular word (term) exists in the FTS index.
@return DB_SUCCESS if all went fine */
static
-ulint
+dberr_t
fts_is_word_in_index(
/*=================*/
trx_t* trx, /*!< in: FTS query state */
que_t** graph, /*!< out: Query graph */
fts_table_t* fts_table, /*!< in: table instance */
const fts_string_t* word, /*!< in: the word to check */
- ibool* found); /*!< out: TRUE if exists */
+ ibool* found) /*!< out: TRUE if exists */
+ __attribute__((nonnull, warn_unused_result));
+#endif /* FTS_DOC_STATS_DEBUG */
+/******************************************************************//**
+Update the last document id. This function could create a new
+transaction to update the last document id.
+@return DB_SUCCESS if OK */
+static
+dberr_t
+fts_update_sync_doc_id(
+/*===================*/
+ const dict_table_t* table, /*!< in: table */
+ const char* table_name, /*!< in: table name, or NULL */
+ doc_id_t doc_id, /*!< in: last document id */
+ trx_t* trx) /*!< in: update trx, or NULL */
+ __attribute__((nonnull(1)));
/********************************************************************
Check if we should stop. */
UNIV_INLINE
@@ -443,7 +454,7 @@ fts_load_user_stopword(
{
pars_info_t* info;
que_t* graph;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ibool ret = TRUE;
trx_t* trx;
ibool has_lock = fts->fts_status & TABLE_DICT_LOCKED;
@@ -507,9 +518,9 @@ fts_load_user_stopword(
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: %lu "
+ fprintf(stderr, " InnoDB: Error '%s' "
"while reading user stopword table.\n",
- error);
+ ut_strerr(error));
ret = FALSE;
break;
}
@@ -542,7 +553,7 @@ fts_index_cache_init(
index_cache->words = rbt_create_arg_cmp(
sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
- index_cache->charset);
+ (void*) index_cache->charset);
ut_a(index_cache->doc_stats == NULL);
@@ -670,7 +681,7 @@ fts_add_index(
ib_vector_push(fts->indexes, &index);
- index_cache = (fts_index_cache_t*) fts_find_index_cache(cache, index);
+ index_cache = fts_find_index_cache(cache, index);
if (!index_cache) {
/* Add new index cache structure */
@@ -805,7 +816,7 @@ fts_check_cached_index(
Drop auxiliary tables related to an FTS index
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
fts_drop_index(
/*===========*/
dict_table_t* table, /*!< in: Table where indexes are dropped */
@@ -813,7 +824,7 @@ fts_drop_index(
trx_t* trx) /*!< in: Transaction for the drop */
{
ib_vector_t* indexes = table->fts->indexes;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
ut_a(indexes);
@@ -821,6 +832,8 @@ fts_drop_index(
&& (index == static_cast<dict_index_t*>(
ib_vector_getp(table->fts->indexes, 0))))
|| ib_vector_is_empty(indexes)) {
+ doc_id_t current_doc_id;
+ doc_id_t first_doc_id;
/* If we are dropping the only FTS index of the table,
remove it from optimize thread */
@@ -844,17 +857,20 @@ fts_drop_index(
return(err);
}
+ current_doc_id = table->fts->cache->next_doc_id;
+ first_doc_id = table->fts->cache->first_doc_id;
fts_cache_clear(table->fts->cache, TRUE);
fts_cache_destroy(table->fts->cache);
table->fts->cache = fts_cache_create(table);
+ table->fts->cache->next_doc_id = current_doc_id;
+ table->fts->cache->first_doc_id = first_doc_id;
} else {
fts_cache_t* cache = table->fts->cache;
fts_index_cache_t* index_cache;
rw_lock_x_lock(&cache->init_lock);
- index_cache = (fts_index_cache_t*) fts_find_index_cache(
- cache, index);
+ index_cache = fts_find_index_cache(cache, index);
if (index_cache->words) {
fts_words_free(index_cache->words);
@@ -1215,7 +1231,7 @@ fts_tokenizer_word_get(
if (rbt_search(cache->stopword_info.cached_stopword,
&parent, text) == 0) {
- return NULL;
+ return(NULL);
}
/* Check if we found a match, if not then add word to tree. */
@@ -1445,38 +1461,40 @@ fts_cache_add_doc(
/****************************************************************//**
Drops a table. If the table can't be found we return a SUCCESS code.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_drop_table(
/*===========*/
trx_t* trx, /*!< in: transaction */
const char* table_name) /*!< in: table to drop */
{
- ulint error = DB_SUCCESS;
+ dict_table_t* table;
+ dberr_t error = DB_SUCCESS;
- /* Check that the table exists in our data dictionary. */
- if (dict_table_get_low(table_name)) {
+ /* Check that the table exists in our data dictionary.
+ Similar to regular drop table case, we will open table with
+ DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
+ table = dict_table_open_on_name(
+ table_name, TRUE, FALSE,
+ static_cast<dict_err_ignore_t>(
+ DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
-#ifdef FTS_INTERNAL_DIAG_PRINT
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Dropping %s\n", table_name);
-#endif
+ if (table != 0) {
- error = row_drop_table_for_mysql(table_name, trx, TRUE);
+ dict_table_close(table, TRUE, FALSE);
+
+ /* Pass nonatomic=false (dont allow data dict unlock),
+ because the transaction may hold locks on SYS_* tables from
+ previous calls to fts_drop_table(). */
+ error = row_drop_table_for_mysql(table_name, trx, true, false);
- /* We only return the status of the last error. */
if (error != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) dropping "
- "FTS index table %s\n", error, table_name);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unable to drop FTS index aux table %s: %s",
+ table_name, ut_strerr(error));
}
} else {
- ut_print_timestamp(stderr);
-
- /* FIXME: Should provide appropriate error return code
- rather than printing message indiscriminately. */
- fprintf(stderr, " InnoDB: %s not found.\n",
- table_name);
+ error = DB_FAIL;
}
return(error);
@@ -1487,8 +1505,8 @@ Drops the common ancillary tables needed for supporting an FTS index
on the given table. row_mysql_lock_data_dictionary must have been called
before this.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_drop_common_tables(
/*===================*/
trx_t* trx, /*!< in: transaction */
@@ -1496,10 +1514,10 @@ fts_drop_common_tables(
index */
{
ulint i;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
for (i = 0; fts_common_tables[i] != NULL; ++i) {
- ulint err;
+ dberr_t err;
char* table_name;
fts_table->suffix = fts_common_tables[i];
@@ -1509,7 +1527,7 @@ fts_drop_common_tables(
err = fts_drop_table(trx, table_name);
/* We only return the status of the last error. */
- if (err != DB_SUCCESS) {
+ if (err != DB_SUCCESS && err != DB_FAIL) {
error = err;
}
@@ -1520,11 +1538,11 @@ fts_drop_common_tables(
}
/****************************************************************//**
-Since we do a horizontal split on the index table, we need to drop the
+Since we do a horizontal split on the index table, we need to drop
all the split tables.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_drop_index_split_tables(
/*========================*/
trx_t* trx, /*!< in: transaction */
@@ -1533,12 +1551,12 @@ fts_drop_index_split_tables(
{
ulint i;
fts_table_t fts_table;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
for (i = 0; fts_index_selector[i].value; ++i) {
- ulint err;
+ dberr_t err;
char* table_name;
fts_table.suffix = fts_get_suffix(i);
@@ -1548,7 +1566,7 @@ fts_drop_index_split_tables(
err = fts_drop_table(trx, table_name);
/* We only return the status of the last error. */
- if (err != DB_SUCCESS) {
+ if (err != DB_SUCCESS && err != DB_FAIL) {
error = err;
}
@@ -1562,23 +1580,21 @@ fts_drop_index_split_tables(
Drops FTS auxiliary tables for an FTS index
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_drop_index_tables(
/*==================*/
trx_t* trx, /*!< in: transaction */
dict_index_t* index) /*!< in: Index to drop */
{
- ulint err;
- ulint error = DB_SUCCESS;
fts_table_t fts_table;
- ulint j;
+ dberr_t error = DB_SUCCESS;
static const char* index_tables[] = {
"DOC_ID",
NULL
};
- err = fts_drop_index_split_tables(trx, index);
+ dberr_t err = fts_drop_index_split_tables(trx, index);
/* We only return the status of the last error. */
if (err != DB_SUCCESS) {
@@ -1587,18 +1603,17 @@ fts_drop_index_tables(
FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
- for (j = 0; index_tables[j] != NULL; ++j) {
- ulint err;
+ for (ulint i = 0; index_tables[i] != NULL; ++i) {
char* table_name;
- fts_table.suffix = index_tables[j];
+ fts_table.suffix = index_tables[i];
table_name = fts_get_table_name(&fts_table);
err = fts_drop_table(trx, table_name);
/* We only return the status of the last error. */
- if (err != DB_SUCCESS) {
+ if (err != DB_SUCCESS && err != DB_FAIL) {
error = err;
}
@@ -1613,18 +1628,20 @@ Drops FTS ancillary tables needed for supporting an FTS index
on the given table. row_mysql_lock_data_dictionary must have been called
before this.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_drop_all_index_tables(
/*======================*/
trx_t* trx, /*!< in: transaction */
fts_t* fts) /*!< in: fts instance */
{
- ulint i;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
- for (i = 0; i < ib_vector_size(fts->indexes); ++i) {
- ulint err;
+ for (ulint i = 0;
+ fts->indexes != 0 && i < ib_vector_size(fts->indexes);
+ ++i) {
+
+ dberr_t err;
dict_index_t* index;
index = static_cast<dict_index_t*>(
@@ -1646,17 +1663,19 @@ given table. row_mysql_lock_data_dictionary must have been called before
this.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_drop_tables(
/*============*/
trx_t* trx, /*!< in: transaction */
dict_table_t* table) /*!< in: table has the FTS index */
{
- ulint error;
+ dberr_t error;
fts_table_t fts_table;
FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
+ /* TODO: This is not atomic and can cause problems during recovery. */
+
error = fts_drop_common_tables(trx, &fts_table);
if (error == DB_SUCCESS) {
@@ -1692,20 +1711,20 @@ on the given table. row_mysql_lock_data_dictionary must have been called
before this.
@return DB_SUCCESS if succeed */
UNIV_INTERN
-ulint
+dberr_t
fts_create_common_tables(
/*=====================*/
- trx_t* trx, /*!< in: transaction */
- const dict_table_t* table, /*!< in: table with FTS index */
- const char* name, /*!< in: table name normalized.*/
- ibool skip_doc_id_index) /*!< in: Skip index on doc id */
-
+ trx_t* trx, /*!< in: transaction */
+ const dict_table_t* table, /*!< in: table with FTS index */
+ const char* name, /*!< in: table name normalized.*/
+ bool skip_doc_id_index)/*!< in: Skip index on doc id */
{
char* sql;
- ulint error;
+ dberr_t error;
que_t* graph;
fts_table_t fts_table;
mem_heap_t* heap = mem_heap_create(1024);
+ pars_info_t* info;
FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
@@ -1744,17 +1763,23 @@ fts_create_common_tables(
goto func_exit;
}
+ info = pars_info_create();
+
+ pars_info_bind_id(info, TRUE, "table_name", name);
+ pars_info_bind_id(info, TRUE, "index_name", FTS_DOC_ID_INDEX_NAME);
+ pars_info_bind_id(info, TRUE, "doc_id_col_name", FTS_DOC_ID_COL_NAME);
+
/* Create the FTS DOC_ID index on the hidden column. Currently this
is common for any FT index created on the table. */
graph = fts_parse_sql_no_dict_lock(
NULL,
- NULL,
+ info,
mem_heap_printf(
heap,
"BEGIN\n"
""
- "CREATE UNIQUE INDEX %s ON %s(%s);\n",
- FTS_DOC_ID_INDEX_NAME, name, FTS_DOC_ID_COL_NAME));
+ "CREATE UNIQUE INDEX $index_name ON $table_name("
+ "$doc_id_col_name);\n"));
error = fts_eval_sql(trx, graph);
que_graph_free(graph);
@@ -1794,7 +1819,7 @@ fts_create_one_index_table(
dict_field_t* field;
dict_table_t* new_table = NULL;
char* table_name = fts_get_table_name(fts_table);
- ulint error;
+ dberr_t error;
CHARSET_INFO* charset;
ut_ad(index->type & DICT_FTS);
@@ -1828,14 +1853,14 @@ fts_create_one_index_table(
dict_mem_table_add_col(new_table, heap, "ilist", DATA_BLOB,
4130048, 0);
- error = row_create_table_for_mysql(new_table, trx);
+ error = row_create_table_for_mysql(new_table, trx, true);
if (error != DB_SUCCESS) {
- trx->error_state = static_cast<db_err>(error);
+ trx->error_state = error;
dict_mem_table_free(new_table);
new_table = NULL;
- fprintf(stderr, " InnoDB: Warning: Fail to create FTS "
- " index table %s \n", table_name);
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Fail to create FTS index table %s", table_name);
}
mem_free(table_name);
@@ -1848,7 +1873,7 @@ Wrapper function of fts_create_index_tables_low(), create auxiliary
tables for an FTS index
@return: DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_create_index_tables_low(
/*========================*/
trx_t* trx, /*!< in: transaction */
@@ -1862,7 +1887,7 @@ fts_create_index_tables_low(
char* sql;
que_t* graph;
fts_table_t fts_table;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
mem_heap_t* heap = mem_heap_create(1024);
fts_table.type = FTS_INDEX_TABLE;
@@ -1874,6 +1899,7 @@ fts_create_index_tables_low(
/* Create the FTS auxiliary tables that are specific
to an FTS index. */
sql = fts_prepare_sql(&fts_table, fts_create_index_tables_sql);
+
graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql);
mem_free(sql);
@@ -1903,9 +1929,7 @@ fts_create_index_tables_low(
que_graph_free(graph);
}
- if (error == DB_SUCCESS) {
- error = fts_sql_commit(trx);
- } else {
+ if (error != DB_SUCCESS) {
/* We have special error handling here */
trx->error_state = DB_SUCCESS;
@@ -1928,18 +1952,25 @@ FTS index on the given table. row_mysql_lock_data_dictionary must have
been called before this.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_create_index_tables(
/*====================*/
trx_t* trx, /*!< in: transaction */
const dict_index_t* index) /*!< in: the index instance */
{
+ dberr_t err;
dict_table_t* table;
table = dict_table_get_low(index->table_name);
ut_a(table != NULL);
- return(fts_create_index_tables_low(trx, index, table->name, table->id));
+ err = fts_create_index_tables_low(trx, index, table->name, table->id);
+
+ if (err == DB_SUCCESS) {
+ trx_commit(trx);
+ }
+
+ return(err);
}
#if 0
/******************************************************************//**
@@ -1953,22 +1984,22 @@ fts_get_state_str(
{
switch (state) {
case FTS_INSERT:
- return "INSERT";
+ return("INSERT");
case FTS_MODIFY:
- return "MODIFY";
+ return("MODIFY");
case FTS_DELETE:
- return "DELETE";
+ return("DELETE");
case FTS_NOTHING:
- return "NOTHING";
+ return("NOTHING");
case FTS_INVALID:
- return "INVALID";
+ return("INVALID");
default:
- return "UNKNOWN";
+ return("UNKNOWN");
}
}
#endif
@@ -2321,7 +2352,7 @@ fts_get_max_cache_size(
trx_t* trx, /*!< in: transaction */
fts_table_t* fts_table) /*!< in: table instance */
{
- ulint error;
+ dberr_t error;
fts_string_t value;
ulint cache_size_in_mb;
@@ -2381,32 +2412,19 @@ fts_get_max_cache_size(
}
#endif
-/*********************************************************************//**
-Get the total number of documents in the FTS.
-@return estimated number of rows in the table */
-UNIV_INTERN
-ulint
-fts_get_total_document_count(
-/*=========================*/
- dict_table_t* table) /*!< in: table instance */
-{
- ut_ad(table->stat_initialized);
-
- return((ulint) table->stat_n_rows);
-}
-
+#ifdef FTS_DOC_STATS_DEBUG
/*********************************************************************//**
Get the total number of words in the FTS for a particular FTS index.
@return DB_SUCCESS if all OK else error code */
UNIV_INTERN
-ulint
+dberr_t
fts_get_total_word_count(
/*=====================*/
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: for this index */
ulint* total) /* out: total words */
{
- ulint error;
+ dberr_t error;
fts_string_t value;
*total = 0;
@@ -2426,14 +2444,15 @@ fts_get_total_word_count(
*total = strtoul((char*) value.f_str, NULL, 10);
} else {
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) reading total words "
- "value from config table\n", error);
+ fprintf(stderr, " InnoDB: Error: (%s) reading total words "
+ "value from config table\n", ut_strerr(error));
}
ut_free(value.f_str);
return(error);
}
+#endif /* FTS_DOC_STATS_DEBUG */
/*********************************************************************//**
Update the next and last Doc ID in the CONFIG table to be the input
@@ -2443,8 +2462,9 @@ UNIV_INTERN
void
fts_update_next_doc_id(
/*===================*/
+ trx_t* trx, /*!< in/out: transaction */
const dict_table_t* table, /*!< in: table */
- const char* table_name, /*!< in: table name */
+ const char* table_name, /*!< in: table name, or NULL */
doc_id_t doc_id) /*!< in: DOC ID to set */
{
table->fts->cache->synced_doc_id = doc_id;
@@ -2453,7 +2473,7 @@ fts_update_next_doc_id(
table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
fts_update_sync_doc_id(
- table, table_name, table->fts->cache->synced_doc_id, NULL);
+ table, table_name, table->fts->cache->synced_doc_id, trx);
}
@@ -2461,7 +2481,7 @@ fts_update_next_doc_id(
Get the next available document id.
@return DB_SUCCESS if OK */
UNIV_INTERN
-ulint
+dberr_t
fts_get_next_doc_id(
/*================*/
const dict_table_t* table, /*!< in: table */
@@ -2494,8 +2514,8 @@ fts_get_next_doc_id(
This function fetch the Doc ID from CONFIG table, and compare with
the Doc ID supplied. And store the larger one to the CONFIG table.
@return DB_SUCCESS if OK */
-UNIV_INTERN
-ulint
+static __attribute__((nonnull))
+dberr_t
fts_cmp_set_sync_doc_id(
/*====================*/
const dict_table_t* table, /*!< in: table */
@@ -2509,7 +2529,7 @@ fts_cmp_set_sync_doc_id(
{
trx_t* trx;
pars_info_t* info;
- ulint error;
+ dberr_t error;
fts_table_t fts_table;
que_t* graph = NULL;
fts_cache_t* cache = table->fts->cache;
@@ -2559,8 +2579,6 @@ retry:
goto func_exit;
}
- ut_a(*doc_id > 0);
-
if (read_only) {
goto func_exit;
}
@@ -2594,8 +2612,8 @@ func_exit:
*doc_id = 0;
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) "
- "while getting next doc id.\n", error);
+ fprintf(stderr, " InnoDB: Error: (%s) "
+ "while getting next doc id.\n", ut_strerr(error));
fts_sql_rollback(trx);
@@ -2614,23 +2632,23 @@ func_exit:
Update the last document id. This function could create a new
transaction to update the last document id.
@return DB_SUCCESS if OK */
-UNIV_INTERN
-ulint
+static
+dberr_t
fts_update_sync_doc_id(
/*===================*/
const dict_table_t* table, /*!< in: table */
- const char* table_name, /*!< in: table name */
+ const char* table_name, /*!< in: table name, or NULL */
doc_id_t doc_id, /*!< in: last document id */
- trx_t* trx) /*!< in: update trx */
+ trx_t* trx) /*!< in: update trx, or NULL */
{
byte id[FTS_MAX_ID_LEN];
pars_info_t* info;
fts_table_t fts_table;
ulint id_len;
que_t* graph = NULL;
- ulint error;
+ dberr_t error;
ibool local_trx = FALSE;
- fts_cache_t* cache = table->fts->cache;;
+ fts_cache_t* cache = table->fts->cache;
fts_table.suffix = "CONFIG";
fts_table.table_id = table->id;
@@ -2651,8 +2669,7 @@ fts_update_sync_doc_id(
info = pars_info_create();
- // FIXME: Get rid of snprintf
- id_len = snprintf(
+ id_len = ut_snprintf(
(char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
@@ -2672,9 +2689,10 @@ fts_update_sync_doc_id(
fts_sql_commit(trx);
cache->synced_doc_id = doc_id;
} else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) "
- "while updating last doc id.\n", error);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "(%s) while updating last doc id.",
+ ut_strerr(error));
fts_sql_rollback(trx);
}
@@ -2725,15 +2743,15 @@ fts_doc_ids_free(
/*********************************************************************//**
Do commit-phase steps necessary for the insertion of a new row.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_add(
/*====*/
fts_trx_table_t*ftt, /*!< in: FTS trx table */
fts_trx_row_t* row) /*!< in: row */
{
dict_table_t* table = ftt->table;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
doc_id_t doc_id = row->doc_id;
ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
@@ -2757,8 +2775,8 @@ fts_add(
/*********************************************************************//**
Do commit-phase steps necessary for the deletion of a row.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_delete(
/*=======*/
fts_trx_table_t*ftt, /*!< in: FTS trx table */
@@ -2766,7 +2784,7 @@ fts_delete(
{
que_t* graph;
fts_table_t fts_table;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
doc_id_t write_doc_id;
dict_table_t* table = ftt->table;
doc_id_t doc_id = row->doc_id;
@@ -2848,14 +2866,14 @@ fts_delete(
/*********************************************************************//**
Do commit-phase steps necessary for the modification of a row.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_modify(
/*=======*/
fts_trx_table_t* ftt, /*!< in: FTS trx table */
fts_trx_row_t* row) /*!< in: row */
{
- ulint error;
+ dberr_t error;
ut_a(row->state == FTS_MODIFY);
@@ -2872,7 +2890,7 @@ fts_modify(
Create a new document id.
@return DB_SUCCESS if all went well else error */
UNIV_INTERN
-ulint
+dberr_t
fts_create_doc_id(
/*==============*/
dict_table_t* table, /*!< in: row is of this table. */
@@ -2882,7 +2900,7 @@ fts_create_doc_id(
mem_heap_t* heap) /*!< in: heap */
{
doc_id_t doc_id;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ut_a(table->fts->doc_col != ULINT_UNDEFINED);
@@ -2919,15 +2937,15 @@ fts_create_doc_id(
The given transaction is about to be committed; do whatever is necessary
from the FTS system's POV.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_commit_table(
/*=============*/
fts_trx_table_t* ftt) /*!< in: FTS table to commit*/
{
const ib_rbt_node_t* node;
ib_rbt_t* rows;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
fts_cache_t* cache = ftt->table->fts->cache;
trx_t* trx = trx_allocate_for_background();
@@ -2979,13 +2997,13 @@ The given transaction is about to be committed; do whatever is necessary
from the FTS system's POV.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_commit(
/*=======*/
trx_t* trx) /*!< in: transaction */
{
const ib_rbt_node_t* node;
- ulint error;
+ dberr_t error;
ib_rbt_t* tables;
fts_savepoint_t* savepoint;
@@ -3008,10 +3026,9 @@ fts_commit(
}
/*********************************************************************//**
-Create a new empty document.
-@return new document */
+Initialize a document. */
UNIV_INTERN
-fts_doc_t*
+void
fts_doc_init(
/*=========*/
fts_doc_t* doc) /*!< in: doc to initialize */
@@ -3021,8 +3038,6 @@ fts_doc_init(
memset(doc, 0, sizeof(*doc));
doc->self_heap = ib_heap_allocator_create(heap);
-
- return(doc);
}
/*********************************************************************//**
@@ -3075,7 +3090,7 @@ fts_fetch_row_id(
/*********************************************************************//**
Callback function for fetch that stores the text of an FTS document,
converting each column to UTF-16.
-@return: always returns FALSE */
+@return always FALSE */
UNIV_INTERN
ibool
fts_query_expansion_fetch_doc(
@@ -3467,13 +3482,15 @@ fts_get_max_doc_id(
dfield = dict_index_get_nth_field(index, 0);
+#if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
+#endif
mtr_start(&mtr);
/* fetch the largest indexes value */
btr_pcur_open_at_index_side(
- FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+ false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
const rec_t* rec = NULL;
@@ -3516,13 +3533,14 @@ func_exit:
Fetch document with the given document id.
@return DB_SUCCESS if OK else error */
UNIV_INTERN
-ulint
+dberr_t
fts_doc_fetch_by_doc_id(
/*====================*/
fts_get_doc_t* get_doc, /*!< in: state */
doc_id_t doc_id, /*!< in: id of document to
fetch */
- dict_index_t* index_to_use, /*!< in: caller supplied FTS index */
+ dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
+ or NULL */
ulint option, /*!< in: search option, if it is
greater than doc_id or equal */
fts_sql_callback
@@ -3530,7 +3548,7 @@ fts_doc_fetch_by_doc_id(
void* arg) /*!< in: callback arg */
{
pars_info_t* info;
- ulint error;
+ dberr_t error;
const char* select_str;
doc_id_t write_doc_id;
dict_index_t* index;
@@ -3555,6 +3573,7 @@ fts_doc_fetch_by_doc_id(
pars_info_bind_function(info, "my_func", callback, arg);
select_str = fts_get_select_columns_str(index, info, info->heap);
+ pars_info_bind_id(info, TRUE, "table_name", index->table_name);
if (!get_doc || !get_doc->get_document_graph) {
if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
@@ -3564,7 +3583,7 @@ fts_doc_fetch_by_doc_id(
mem_heap_printf(info->heap,
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS"
- " SELECT %s FROM %s"
+ " SELECT %s FROM $table_name"
" WHERE %s = :doc_id;\n"
"BEGIN\n"
""
@@ -3576,20 +3595,32 @@ fts_doc_fetch_by_doc_id(
" END IF;\n"
"END LOOP;\n"
"CLOSE c;",
- select_str, index->table_name,
- FTS_DOC_ID_COL_NAME));
+ select_str, FTS_DOC_ID_COL_NAME));
} else {
ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
+ /* This is used for crash recovery of table with
+ hidden DOC ID or FTS indexes. We will scan the table
+ to re-processing user table rows whose DOC ID or
+ FTS indexed documents have not been sync-ed to disc
+ during recent crash.
+ In the case that all fulltext indexes are dropped
+ for a table, we will keep the "hidden" FTS_DOC_ID
+ column, and this scan is to retreive the largest
+ DOC ID being used in the table to determine the
+ appropriate next DOC ID.
+ In the case of there exists fulltext index(es), this
+ operation will re-tokenize any docs that have not
+ been sync-ed to the disk, and re-prime the FTS
+ cached */
graph = fts_parse_sql(
NULL,
info,
mem_heap_printf(info->heap,
"DECLARE FUNCTION my_func;\n"
"DECLARE CURSOR c IS"
- " SELECT %s, %s FROM %s"
- " WHERE %s > :doc_id"
- " ORDER BY %s;\n"
+ " SELECT %s, %s FROM $table_name"
+ " WHERE %s > :doc_id;\n"
"BEGIN\n"
""
"OPEN c;\n"
@@ -3601,9 +3632,7 @@ fts_doc_fetch_by_doc_id(
"END LOOP;\n"
"CLOSE c;",
FTS_DOC_ID_COL_NAME,
- select_str, index->table_name,
- FTS_DOC_ID_COL_NAME,
- FTS_DOC_ID_COL_NAME));
+ select_str, FTS_DOC_ID_COL_NAME));
}
if (get_doc) {
get_doc->get_document_graph = graph;
@@ -3633,7 +3662,7 @@ fts_doc_fetch_by_doc_id(
Write out a single word's data as new entry/entries in the INDEX table.
@return DB_SUCCESS if all OK. */
UNIV_INTERN
-ulint
+dberr_t
fts_write_node(
/*===========*/
trx_t* trx, /*!< in: transaction */
@@ -3643,7 +3672,7 @@ fts_write_node(
fts_node_t* node) /*!< in: node columns */
{
pars_info_t* info;
- ulint error;
+ dberr_t error;
ib_uint32_t doc_count;
ib_time_t start_time;
doc_id_t last_doc_id;
@@ -3698,8 +3727,8 @@ fts_write_node(
/*********************************************************************//**
Add rows to the DELETED_CACHE table.
@return DB_SUCCESS if all went well else error code*/
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_sync_add_deleted_cache(
/*=======================*/
fts_sync_t* sync, /*!< in: sync state */
@@ -3710,7 +3739,7 @@ fts_sync_add_deleted_cache(
que_t* graph;
fts_table_t fts_table;
doc_id_t dummy = 0;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ulint n_elems = ib_vector_size(doc_ids);
ut_a(ib_vector_size(doc_ids) > 0);
@@ -3748,9 +3777,10 @@ fts_sync_add_deleted_cache(
}
/*********************************************************************//**
-Write the words and ilist to disk.*/
-static
-ulint
+Write the words and ilist to disk.
+@return DB_SUCCESS if all went well else error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_sync_write_words(
/*=================*/
trx_t* trx, /*!< in: transaction */
@@ -3761,10 +3791,12 @@ fts_sync_write_words(
ulint n_nodes = 0;
ulint n_words = 0;
const ib_rbt_node_t* rbt_node;
- ulint n_new_words = 0;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ibool print_error = FALSE;
+#ifdef FTS_DOC_STATS_DEBUG
dict_table_t* table = index_cache->index->table;
+ ulint n_new_words = 0;
+#endif /* FTS_DOC_STATS_DEBUG */
FTS_INIT_INDEX_TABLE(
&fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
@@ -3789,9 +3821,10 @@ fts_sync_write_words(
fts_table.suffix = fts_get_suffix(selected);
+#ifdef FTS_DOC_STATS_DEBUG
/* Check if the word exists in the FTS index and if not
then we need to increment the total word count stats. */
- if (error == DB_SUCCESS) {
+ if (error == DB_SUCCESS && fts_enable_diag_print) {
ibool found = FALSE;
error = fts_is_word_in_index(
@@ -3805,6 +3838,7 @@ fts_sync_write_words(
++n_new_words;
}
}
+#endif /* FTS_DOC_STATS_DEBUG */
n_nodes += ib_vector_size(word->nodes);
@@ -3829,9 +3863,9 @@ fts_sync_write_words(
if (error != DB_SUCCESS && !print_error) {
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error (%lu) writing "
+ fprintf(stderr, " InnoDB: Error (%s) writing "
"word node to FTS auxiliary index "
- "table.\n", error);
+ "table.\n", ut_strerr(error));
print_error = TRUE;
}
@@ -3840,19 +3874,23 @@ fts_sync_write_words(
ut_free(rbt_remove_node(index_cache->words, rbt_node));
}
- if (error == DB_SUCCESS && n_new_words > 0) {
+#ifdef FTS_DOC_STATS_DEBUG
+ if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) {
fts_table_t fts_table;
FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
/* Increment the total number of words in the FTS index */
- fts_config_increment_index_value(
+ error = fts_config_increment_index_value(
trx, index_cache->index, FTS_TOTAL_WORD_COUNT,
n_new_words);
}
+#endif /* FTS_DOC_STATS_DEBUG */
- printf("Avg number of nodes: %lf\n",
- (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
+ if (fts_enable_diag_print) {
+ printf("Avg number of nodes: %lf\n",
+ (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
+ }
return(error);
}
@@ -3861,8 +3899,8 @@ fts_sync_write_words(
/*********************************************************************//**
Write a single documents statistics to disk.
@return DB_SUCCESS if all went well else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_sync_write_doc_stat(
/*====================*/
trx_t* trx, /*!< in: transaction */
@@ -3872,7 +3910,7 @@ fts_sync_write_doc_stat(
{
pars_info_t* info;
doc_id_t doc_id;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ib_uint32_t word_count;
if (*graph) {
@@ -3918,9 +3956,9 @@ fts_sync_write_doc_stat(
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: %lu "
+ fprintf(stderr, " InnoDB: Error: (%s) "
"while writing to FTS doc_id.\n",
- error);
+ ut_strerr(error));
break; /* Exit the loop. */
}
@@ -3940,7 +3978,7 @@ fts_sync_write_doc_stats(
trx_t* trx, /*!< in: transaction */
const fts_index_cache_t*index_cache) /*!< in: index cache */
{
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
que_t* graph = NULL;
fts_doc_stats_t* doc_stat;
@@ -3973,7 +4011,6 @@ fts_sync_write_doc_stats(
return(error);
}
-#endif /* FTS_DOC_STATS_DEBUG */
/*********************************************************************//**
Callback to check the existince of a word.
@@ -4007,13 +4044,12 @@ fts_lookup_word(
}
/*********************************************************************//**
-Check whether a particular word (term) exists in the FTS index. */
+Check whether a particular word (term) exists in the FTS index.
+@return DB_SUCCESS if all went well else error code */
static
-ulint
+dberr_t
fts_is_word_in_index(
/*=================*/
- /* out: DB_SUCCESS if all went
- well else error code */
trx_t* trx, /*!< in: FTS query state */
que_t** graph, /* out: Query graph */
fts_table_t* fts_table, /*!< in: table instance */
@@ -4022,7 +4058,7 @@ fts_is_word_in_index(
ibool* found) /* out: TRUE if exists */
{
pars_info_t* info;
- ulint error;
+ dberr_t error;
trx->op_info = "looking up word in FTS index";
@@ -4073,8 +4109,9 @@ fts_is_word_in_index(
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: %lu "
- "while reading FTS index.\n", error);
+ fprintf(stderr, " InnoDB: Error: (%s) "
+ "while reading FTS index.\n",
+ ut_strerr(error));
break; /* Exit the loop. */
}
@@ -4083,6 +4120,7 @@ fts_is_word_in_index(
return(error);
}
+#endif /* FTS_DOC_STATS_DEBUG */
/*********************************************************************//**
Begin Sync, create transaction, acquire locks, etc. */
@@ -4101,29 +4139,36 @@ fts_sync_begin(
sync->trx = trx_allocate_for_background();
- ut_print_timestamp(stderr);
- fprintf(stderr, " SYNC deleted count: %ld size: %lu bytes\n",
- ib_vector_size(cache->deleted_doc_ids), cache->total_size);
+ if (fts_enable_diag_print) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "FTS SYNC for table %s, deleted count: %ld size: "
+ "%lu bytes",
+ sync->table->name,
+ ib_vector_size(cache->deleted_doc_ids),
+ cache->total_size);
+ }
}
/*********************************************************************//**
Run SYNC on the table, i.e., write out data from the index specific
-cache to the FTS aux INDEX table and FTS aux doc id stats table. */
-static
-ulint
+cache to the FTS aux INDEX table and FTS aux doc id stats table.
+@return DB_SUCCESS if all OK */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_sync_index(
/*===========*/
- /* out: DB_SUCCESS if all OK */
fts_sync_t* sync, /*!< in: sync state */
fts_index_cache_t* index_cache) /*!< in: index cache */
{
trx_t* trx = sync->trx;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
trx->op_info = "doing SYNC index";
- ut_print_timestamp(stderr);
- fprintf(stderr, " SYNC words: %ld\n", rbt_size(index_cache->words));
+ if (fts_enable_diag_print) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "SYNC words: %ld", rbt_size(index_cache->words));
+ }
ut_ad(rbt_validate(index_cache->words));
@@ -4146,13 +4191,13 @@ fts_sync_index(
/*********************************************************************//**
Commit the SYNC, change state of processed doc ids etc.
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_sync_commit(
/*============*/
fts_sync_t* sync) /*!< in: sync state */
{
- ulint error;
+ dberr_t error;
trx_t* trx = sync->trx;
fts_cache_t* cache = sync->table->fts->cache;
doc_id_t last_doc_id;
@@ -4191,13 +4236,18 @@ fts_sync_commit(
fts_sql_rollback(trx);
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) during SYNC.\n", error);
+ fprintf(stderr, " InnoDB: Error: (%s) during SYNC.\n",
+ ut_strerr(error));
}
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: SYNC time : %lusecs: elapsed %lf ins/sec\n",
- (ulong) (ut_time() - sync->start_time),
- (double) n_nodes/ (double) elapsed_time);
+ if (fts_enable_diag_print && elapsed_time) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "SYNC for table %s: SYNC time : %lu secs: "
+ "elapsed %lf ins/sec",
+ sync->table->name,
+ (ulong) (ut_time() - sync->start_time),
+ (double) n_nodes/ (double) elapsed_time);
+ }
trx_free_for_background(trx);
@@ -4226,13 +4276,13 @@ Run SYNC on the table, i.e., write out data from the cache to the
FTS auxiliary INDEX table and clear the cache at the end.
@return DB_SUCCESS if all OK */
static
-ulint
+dberr_t
fts_sync(
/*=====*/
fts_sync_t* sync) /*!< in: sync state */
{
ulint i;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
fts_cache_t* cache = sync->table->fts->cache;
rw_lock_x_lock(&cache->lock);
@@ -4275,34 +4325,28 @@ fts_sync(
/****************************************************************//**
Run SYNC on the table, i.e., write out data from the cache to the
-FTS auxiliary INDEX table and clear the cache at the end.
-@return DB_SUCCESS if all OK */
+FTS auxiliary INDEX table and clear the cache at the end. */
UNIV_INTERN
-ulint
+void
fts_sync_table(
/*===========*/
dict_table_t* table) /*!< in: table */
{
- ulint error = DB_SUCCESS;
-
ut_ad(table->fts);
if (table->fts->cache) {
fts_sync(table->fts->cache->sync);
}
-
- return(error);
}
/********************************************************************
Process next token from document starting at the given position, i.e., add
-the token's start position to the token's list of positions. */
+the token's start position to the token's list of positions.
+@return number of characters handled in this call */
static
ulint
fts_process_token(
/*==============*/
- /* out: number of characters
- handled in this call */
fts_doc_t* doc, /* in/out: document to
tokenize */
fts_doc_t* result, /* out: if provided, save
@@ -4406,7 +4450,7 @@ fts_tokenize_document(
ut_a(doc->charset);
doc->tokens = rbt_create_arg_cmp(
- sizeof(fts_token_t), innobase_fts_text_cmp, doc->charset);
+ sizeof(fts_token_t), innobase_fts_text_cmp, (void*) doc->charset);
for (ulint i = 0; i < doc->text.f_len; i += inc) {
inc = fts_process_token(doc, result, i, 0);
@@ -4473,6 +4517,7 @@ fts_get_docs_create(
memset(get_doc, 0x0, sizeof(*get_doc));
get_doc->index_cache = fts_get_index_cache(cache, *index);
+ get_doc->cache = cache;
/* Must find the index cache. */
ut_a(get_doc->index_cache != NULL);
@@ -4520,11 +4565,14 @@ fts_init_doc_id(
rw_lock_x_lock(&table->fts->cache->lock);
+ /* Return if the table is already initialized for DOC ID */
if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
rw_lock_x_unlock(&table->fts->cache->lock);
return(0);
}
+ DEBUG_SYNC_C("fts_initialize_doc_id");
+
/* Then compare this value with the ID value stored in the CONFIG
table. The larger one will be our new initial Doc ID */
fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
@@ -4591,7 +4639,7 @@ fts_get_rows_count(
trx_t* trx;
pars_info_t* info;
que_t* graph;
- ulint error;
+ dberr_t error;
ulint count = 0;
trx = trx_allocate_for_background();
@@ -4639,9 +4687,9 @@ fts_get_rows_count(
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: %lu "
+ fprintf(stderr, " InnoDB: Error: (%s) "
"while reading FTS table.\n",
- error);
+ ut_strerr(error));
break; /* Exit the loop. */
}
@@ -4678,7 +4726,7 @@ fts_update_max_cache_size(
trx_free_for_background(trx);
}
-#endif
+#endif /* FTS_CACHE_SIZE_DEBUG */
/*********************************************************************//**
Free the modified rows of a table. */
@@ -4861,13 +4909,13 @@ fts_get_doc_id_from_rec(
col_no = dict_col_get_clust_pos(
&table->cols[table->fts->doc_col], clust_index);
+ ut_ad(col_no != ULINT_UNDEFINED);
- /* We have no choice but to cast rec here :-( */
- data = rec_get_nth_field((rec_t*) rec, offsets, col_no, &len);
+ data = rec_get_nth_field(rec, offsets, col_no, &len);
ut_a(len == 8);
- ut_a(len == sizeof(doc_id));
- doc_id = (doc_id_t) mach_read_from_8(data);
+ ut_ad(8 == sizeof(doc_id));
+ doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
return(doc_id);
}
@@ -4876,7 +4924,7 @@ fts_get_doc_id_from_rec(
Search the index specific cache for a particular FTS index.
@return the index specific cache else NULL */
UNIV_INTERN
-const fts_index_cache_t*
+fts_index_cache_t*
fts_find_index_cache(
/*=================*/
const fts_cache_t* cache, /*!< in: cache to search */
@@ -4884,7 +4932,8 @@ fts_find_index_cache(
{
/* We cast away the const because our internal function, takes
non-const cache arg and returns a non-const pointer. */
- return(fts_get_index_cache((fts_cache_t*) cache, index));
+ return(static_cast<fts_index_cache_t*>(
+ fts_get_index_cache((fts_cache_t*) cache, index)));
}
/*********************************************************************//**
@@ -4960,7 +5009,7 @@ fts_cache_append_deleted_doc_ids(
{
ulint i;
- mutex_enter((mutex_t*) &cache->deleted_lock);
+ mutex_enter((ib_mutex_t*) &cache->deleted_lock);
for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
fts_update_t* update;
@@ -4971,7 +5020,7 @@ fts_cache_append_deleted_doc_ids(
ib_vector_push(vector, &update->doc_id);
}
- mutex_exit((mutex_t*) &cache->deleted_lock);
+ mutex_exit((ib_mutex_t*) &cache->deleted_lock);
}
/*********************************************************************//**
@@ -5043,11 +5092,11 @@ UNIV_INTERN
void
fts_add_doc_id_column(
/*==================*/
- dict_table_t* table) /*!< in/out: Table with FTS index */
+ dict_table_t* table, /*!< in/out: Table with FTS index */
+ mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
{
dict_mem_table_add_col(
- table,
- table->heap,
+ table, heap,
FTS_DOC_ID_COL_NAME,
DATA_INT,
dtype_form_prtype(
@@ -5069,7 +5118,7 @@ fts_update_doc_id(
doc_id_t* next_doc_id) /*!< in/out: buffer for writing */
{
doc_id_t doc_id;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
if (*next_doc_id) {
doc_id = *next_doc_id;
@@ -5236,13 +5285,12 @@ fts_savepoint_copy(
ftt_dst = fts_trx_table_clone(*ftt_src);
- rbt_insert(dst->tables, &ftt_dst->table->id, &ftt_dst);
+ rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
}
}
/*********************************************************************//**
-Take a FTS savepoint.
-@return DB_SUCCESS or error code */
+Take a FTS savepoint. */
UNIV_INTERN
void
fts_savepoint_take(
@@ -5312,7 +5360,6 @@ fts_savepoint_release(
const char* name) /*!< in: savepoint name */
{
ulint i;
- fts_savepoint_t* prev;
ib_vector_t* savepoints;
ulint top_of_stack = 0;
@@ -5322,9 +5369,6 @@ fts_savepoint_release(
ut_a(ib_vector_size(savepoints) > 0);
- prev = static_cast<fts_savepoint_t*>(
- ib_vector_get(savepoints, top_of_stack));
-
/* Skip the implied savepoint (first element). */
for (i = 1; i < ib_vector_size(savepoints); ++i) {
fts_savepoint_t* savepoint;
@@ -5338,17 +5382,6 @@ fts_savepoint_release(
we have to skip deleted/released entries. */
if (savepoint->name != NULL
&& strcmp(name, savepoint->name) == 0) {
-
- fts_savepoint_t* last;
- fts_savepoint_t temp;
-
- last = static_cast<fts_savepoint_t*>(
- ib_vector_last(savepoints));
-
- /* Swap the entries. */
- memcpy(&temp, last, sizeof(temp));
- memcpy(last, prev, sizeof(*last));
- memcpy(prev, &temp, sizeof(prev));
break;
/* Track the previous savepoint instance that will
@@ -5357,8 +5390,6 @@ fts_savepoint_release(
/* We need to delete all entries
greater than this element. */
top_of_stack = i;
-
- prev = savepoint;
}
}
@@ -5395,8 +5426,7 @@ fts_savepoint_release(
}
/**********************************************************************//**
-Refresh last statement savepoint.
-@return DB_SUCCESS or error code */
+Refresh last statement savepoint. */
UNIV_INTERN
void
fts_savepoint_laststmt_refresh(
@@ -5588,7 +5618,7 @@ static
ibool
fts_is_aux_table_name(
/*==================*/
- fts_sys_table_t*table, /*!< out: table info */
+ fts_aux_table_t*table, /*!< out: table info */
const char* name, /*!< in: table name */
ulint len) /*!< in: length of table name */
{
@@ -5614,7 +5644,6 @@ fts_is_aux_table_name(
if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) {
ulint i;
-
/* Skip the prefix. */
ptr += 4;
len -= 4;
@@ -5689,7 +5718,7 @@ fts_read_tables(
void* user_arg) /*!< in: pointer to ib_vector_t */
{
int i;
- fts_sys_table_t*table;
+ fts_aux_table_t*table;
mem_heap_t* heap;
ibool done = FALSE;
ib_vector_t* tables = static_cast<ib_vector_t*>(user_arg);
@@ -5701,7 +5730,7 @@ fts_read_tables(
/* We will use this heap for allocating strings. */
heap = static_cast<mem_heap_t*>(tables->allocator->arg);
- table = static_cast<fts_sys_table_t*>(ib_vector_push(tables, NULL));
+ table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL));
memset(table, 0x0, sizeof(*table));
@@ -5726,9 +5755,9 @@ fts_read_tables(
}
table->name = static_cast<char*>(
- mem_heap_dup(heap, data, len + 1));
- table->name[len] = '\0';
- printf("Found [%.*s]\n", (int) len, table->name);
+ mem_heap_alloc(heap, len + 1));
+ memcpy(table->name, data, len);
+ table->name[len] = 0;
break;
case 1: /* ID */
@@ -5749,41 +5778,41 @@ fts_read_tables(
Check and drop all orphaned FTS auxiliary tables, those that don't have
a parent table or FTS index defined on them.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull))
+void
fts_check_and_drop_orphaned_tables(
/*===============================*/
trx_t* trx, /*!< in: transaction */
ib_vector_t* tables) /*!< in: tables to check */
{
- ulint i;
- ulint error = DB_SUCCESS;
-
- for (i = 0; i < ib_vector_size(tables); ++i) {
+ for (ulint i = 0; i < ib_vector_size(tables); ++i) {
dict_table_t* table;
- fts_sys_table_t* sys_table;
- ibool drop = FALSE;
+ fts_aux_table_t* aux_table;
+ bool drop = false;
- sys_table = static_cast<fts_sys_table_t*>(
+ aux_table = static_cast<fts_aux_table_t*>(
ib_vector_get(tables, i));
- table = dict_table_open_on_id(sys_table->parent_id, FALSE);
+ table = dict_table_open_on_id(
+ aux_table->parent_id, TRUE, FALSE);
if (table == NULL || table->fts == NULL) {
- drop = TRUE;
+ drop = true;
- } else if (sys_table->index_id != 0) {
- ulint j;
+ } else if (aux_table->index_id != 0) {
index_id_t id;
- fts_t* fts;
+ fts_t* fts;
- drop = TRUE;
+ drop = true;
fts = table->fts;
- id = sys_table->index_id;
+ id = aux_table->index_id;
/* Search for the FT index in the table's list. */
- for (j = 0; j < ib_vector_size(fts->indexes); ++j) {
+ for (ulint j = 0;
+ j < ib_vector_size(fts->indexes);
+ ++j) {
+
const dict_index_t* index;
index = static_cast<const dict_index_t*>(
@@ -5791,28 +5820,36 @@ fts_check_and_drop_orphaned_tables(
if (index->id == id) {
- drop = FALSE;
+ drop = false;
break;
}
}
}
if (table) {
- dict_table_close(table, FALSE);
+ dict_table_close(table, TRUE, FALSE);
}
if (drop) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Warning: Parent table of "
- "FT auxiliary table %s not found.\n",
- sys_table->name);
- /* We ignore drop errors. */
- fts_drop_table(trx, sys_table->name);
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Parent table of FTS auxiliary table %s not "
+ "found.", aux_table->name);
+
+ dberr_t err = fts_drop_table(trx, aux_table->name);
+
+ if (err == DB_FAIL) {
+ char* path;
+
+ path = fil_make_ibd_name(
+ aux_table->name, false);
+
+ os_file_delete_if_exists(path);
+
+ mem_free(path);
+ }
}
}
-
- return(error);
}
/**********************************************************************//**
@@ -5823,19 +5860,62 @@ void
fts_drop_orphaned_tables(void)
/*==========================*/
{
- trx_t* trx;
- pars_info_t* info;
- mem_heap_t* heap;
- que_t* graph;
- ib_vector_t* tables;
- ib_alloc_t* heap_alloc;
- ulint error = DB_SUCCESS;
+ trx_t* trx;
+ pars_info_t* info;
+ mem_heap_t* heap;
+ que_t* graph;
+ ib_vector_t* tables;
+ ib_alloc_t* heap_alloc;
+ space_name_list_t space_name_list;
+ dberr_t error = DB_SUCCESS;
+
+ /* Note: We have to free the memory after we are done with the list. */
+ error = fil_get_space_names(space_name_list);
+
+ if (error == DB_OUT_OF_MEMORY) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Out of memory");
+ ut_error;
+ }
heap = mem_heap_create(1024);
heap_alloc = ib_heap_allocator_create(heap);
/* We store the table ids of all the FTS indexes that were found. */
- tables = ib_vector_create(heap_alloc, sizeof(fts_sys_table_t), 128);
+ tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128);
+
+ /* Get the list of all known .ibd files and check for orphaned
+ FTS auxiliary files in that list. We need to remove them because
+ users can't map them back to table names and this will create
+ unnecessary clutter. */
+
+ for (space_name_list_t::iterator it = space_name_list.begin();
+ it != space_name_list.end();
+ ++it) {
+
+ fts_aux_table_t* fts_aux_table;
+
+ fts_aux_table = static_cast<fts_aux_table_t*>(
+ ib_vector_push(tables, NULL));
+
+ memset(fts_aux_table, 0x0, sizeof(*fts_aux_table));
+
+ if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) {
+ ib_vector_pop(tables);
+ } else {
+ ulint len = strlen(*it);
+
+ fts_aux_table->id = fil_get_space_id_for_table(*it);
+
+ /* We got this list from fil0fil.cc. The tablespace
+ with this name must exist. */
+ ut_a(fts_aux_table->id != ULINT_UNDEFINED);
+
+ fts_aux_table->name = static_cast<char*>(
+ mem_heap_dup(heap, *it, len + 1));
+
+ fts_aux_table->name[len] = 0;
+ }
+ }
trx = trx_allocate_for_background();
trx->op_info = "dropping orphaned FTS tables";
@@ -5867,10 +5947,7 @@ fts_drop_orphaned_tables(void)
error = fts_eval_sql(trx, graph);
if (error == DB_SUCCESS) {
- error = fts_check_and_drop_orphaned_tables(trx, tables);
- }
-
- if (error == DB_SUCCESS) {
+ fts_check_and_drop_orphaned_tables(trx, tables);
fts_sql_commit(trx);
break; /* Exit the loop. */
} else {
@@ -5881,15 +5958,15 @@ fts_drop_orphaned_tables(void)
ut_print_timestamp(stderr);
if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading SYS_TABLES. "
- "Retrying!\n");
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "lock wait timeout reading SYS_TABLES. "
+ "Retrying!");
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: %lu "
- "while reading SYS_TABLES.\n",
- error);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "(%s) while reading SYS_TABLES.",
+ ut_strerr(error));
break; /* Exit the loop. */
}
@@ -5905,6 +5982,14 @@ fts_drop_orphaned_tables(void)
if (heap != NULL) {
mem_heap_free(heap);
}
+
+ /** Free the memory allocated to store the .ibd names. */
+ for (space_name_list_t::iterator it = space_name_list.begin();
+ it != space_name_list.end();
+ ++it) {
+
+ delete[] *it;
+ }
}
/**********************************************************************//**
@@ -5986,7 +6071,7 @@ fts_load_stopword(
{
fts_table_t fts_table;
fts_string_t str;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ulint use_stopword;
fts_cache_t* cache;
const char* stopword_to_use = NULL;
@@ -6086,6 +6171,43 @@ cleanup:
/**********************************************************************//**
Callback function when we initialize the FTS at the start up
+time. It recovers the maximum Doc IDs presented in the current table.
+@return: always returns TRUE */
+static
+ibool
+fts_init_get_doc_id(
+/*================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: fts cache */
+{
+ doc_id_t doc_id = FTS_NULL_DOC_ID;
+ sel_node_t* node = static_cast<sel_node_t*>(row);
+ que_node_t* exp = node->select_list;
+ fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg);
+
+ ut_ad(ib_vector_is_empty(cache->get_docs));
+
+ /* Copy each indexed column content into doc->text.f_str */
+ if (exp) {
+ dfield_t* dfield = que_node_get_val(exp);
+ dtype_t* type = dfield_get_type(dfield);
+ void* data = dfield_get_data(dfield);
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+
+ doc_id = static_cast<doc_id_t>(mach_read_from_8(
+ static_cast<const byte*>(data)));
+
+ if (doc_id >= cache->next_doc_id) {
+ cache->next_doc_id = doc_id + 1;
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************//**
+Callback function when we initialize the FTS at the start up
time. It recovers Doc IDs that have not sync-ed to the auxiliary
table, and require to bring them back into FTS index.
@return: always returns TRUE */
@@ -6100,22 +6222,16 @@ fts_init_recover_doc(
fts_doc_t doc;
ulint doc_len = 0;
ulint field_no = 0;
- ibool has_fts = TRUE;
- fts_get_doc_t* get_doc = NULL;
+ fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg);
doc_id_t doc_id = FTS_NULL_DOC_ID;
sel_node_t* node = static_cast<sel_node_t*>(row);
que_node_t* exp = node->select_list;
- fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg);
+ fts_cache_t* cache = get_doc->cache;
- if (ib_vector_is_empty(cache->get_docs)) {
- has_fts = FALSE;
- } else {
- get_doc = static_cast<fts_get_doc_t*>(
- ib_vector_get(cache->get_docs, 0));
+ fts_doc_init(&doc);
+ doc.found = TRUE;
- fts_doc_init(&doc);
- doc.found = TRUE;
- }
+ ut_ad(cache);
/* Copy each indexed column content into doc->text.f_str */
while (exp) {
@@ -6131,18 +6247,11 @@ fts_init_recover_doc(
doc_id = static_cast<doc_id_t>(mach_read_from_8(
static_cast<const byte*>(data)));
- /* Just need to fetch the Doc ID */
- if (!has_fts) {
- goto func_exit;
- }
-
field_no++;
exp = que_node_get_next(exp);
continue;
}
- ut_a(has_fts);
-
if (len == UNIV_SQL_NULL) {
exp = que_node_get_next(exp);
continue;
@@ -6196,7 +6305,6 @@ fts_init_recover_doc(
cache->added++;
-func_exit:
if (doc_id >= cache->next_doc_id) {
cache->next_doc_id = doc_id + 1;
}
@@ -6223,6 +6331,9 @@ fts_init_index(
fts_get_doc_t* get_doc = NULL;
ibool has_fts = TRUE;
fts_cache_t* cache = table->fts->cache;
+ bool need_init = false;
+
+ ut_ad(!mutex_own(&dict_sys->mutex));
/* First check cache->get_docs is initialized */
if (!has_cache_lock) {
@@ -6239,6 +6350,8 @@ fts_init_index(
goto func_exit;
}
+ need_init = true;
+
start_doc = cache->synced_doc_id;
if (!start_doc) {
@@ -6250,28 +6363,32 @@ fts_init_index(
dropped, and we re-initialize the Doc ID system for subsequent
insertion */
if (ib_vector_is_empty(cache->get_docs)) {
- index = dict_table_get_first_index(table);
+ index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
+
+ ut_a(index);
+
has_fts = FALSE;
+ fts_doc_fetch_by_doc_id(NULL, start_doc, index,
+ FTS_FETCH_DOC_BY_ID_LARGE,
+ fts_init_get_doc_id, cache);
} else {
- /* We only have one FTS index per table */
- get_doc = static_cast<fts_get_doc_t*>(
- ib_vector_get(cache->get_docs, 0));
+ for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
+ get_doc = static_cast<fts_get_doc_t*>(
+ ib_vector_get(cache->get_docs, i));
- index = get_doc->index_cache->index;
- }
+ index = get_doc->index_cache->index;
- fts_doc_fetch_by_doc_id(NULL, start_doc, index,
- FTS_FETCH_DOC_BY_ID_LARGE,
- fts_init_recover_doc, cache);
+ fts_doc_fetch_by_doc_id(NULL, start_doc, index,
+ FTS_FETCH_DOC_BY_ID_LARGE,
+ fts_init_recover_doc, get_doc);
+ }
+ }
if (has_fts) {
if (table->fts->cache->stopword_info.status
& STOPWORD_NOT_INIT) {
fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
}
-
- /* Register the table with the optimize thread. */
- fts_optimize_add_table(table);
}
table->fts->fts_status |= ADDED_TABLE_SYNCED;
@@ -6283,5 +6400,12 @@ func_exit:
rw_lock_x_unlock(&cache->lock);
}
+ if (need_init) {
+ mutex_enter(&dict_sys->mutex);
+ /* Register the table with the optimize thread. */
+ fts_optimize_add_table(table);
+ mutex_exit(&dict_sys->mutex);
+ }
+
return(TRUE);
}
diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc
index 92e040d2715..9abeeccac91 100644
--- a/storage/innobase/fts/fts0opt.cc
+++ b/storage/innobase/fts/fts0opt.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,26 +39,29 @@ Completed 2011/7/10 Sunny and Jimmy Yang
#include "fts0vlc.ic"
#endif
-/* The FTS optimize thread's work queue. */
+/** The FTS optimize thread's work queue. */
static ib_wqueue_t* fts_optimize_wq;
-/* The number of document ids to delete in one statement. */
+/** The number of document ids to delete in one statement. */
static const ulint FTS_MAX_DELETE_DOC_IDS = 1000;
-/* Time to wait for a message. */
+/** Time to wait for a message. */
static const ulint FTS_QUEUE_WAIT_IN_USECS = 5000000;
-/* Default optimize interval in secs. */
+/** Default optimize interval in secs. */
static const ulint FTS_OPTIMIZE_INTERVAL_IN_SECS = 300;
+/** Server is shutting down, so does we exiting the optimize thread */
+static bool fts_opt_start_shutdown = false;
+
#if 0
-/* Check each table in round robin to see whether they'd
+/** Check each table in round robin to see whether they'd
need to be "optimized" */
static ulint fts_optimize_sync_iterator = 0;
#endif
/** State of a table within the optimization sub system. */
-enum fts_state_enum {
+enum fts_state_t {
FTS_STATE_LOADED,
FTS_STATE_RUNNING,
FTS_STATE_SUSPENDED,
@@ -67,7 +70,7 @@ enum fts_state_enum {
};
/** FTS optimize thread message types. */
-enum fts_msg_type_enum {
+enum fts_msg_type_t {
FTS_MSG_START, /*!< Start optimizing thread */
FTS_MSG_PAUSE, /*!< Pause optimizing thread */
@@ -83,21 +86,9 @@ enum fts_msg_type_enum {
threads work queue */
};
-typedef enum fts_state_enum fts_state_t;
-typedef struct fts_zip_struct fts_zip_t;
-typedef struct fts_msg_struct fts_msg_t;
-typedef struct fts_slot_struct fts_slot_t;
-typedef struct fts_encode_struct fts_encode_t;
-typedef enum fts_msg_type_enum fts_msg_type_t;
-typedef struct fts_msg_del_struct fts_msg_del_t;
-typedef struct fts_msg_stop_struct fts_msg_stop_t;
-typedef struct fts_optimize_struct fts_optimize_t;
-typedef struct fts_msg_optimize_struct fts_msg_optimize_t;
-typedef struct fts_optimize_graph_struct fts_optimize_graph_t;
-
/** Compressed list of words that have been read from FTS INDEX
that needs to be optimized. */
-struct fts_zip_struct {
+struct fts_zip_t {
ulint status; /*!< Status of (un)/zip operation */
ulint n_words; /*!< Number of words compressed */
@@ -128,7 +119,7 @@ struct fts_zip_struct {
};
/** Prepared statemets used during optimize */
-struct fts_optimize_graph_struct {
+struct fts_optimize_graph_t {
/*!< Delete a word from FTS INDEX */
que_t* delete_nodes_graph;
/*!< Insert a word into FTS INDEX */
@@ -140,7 +131,7 @@ struct fts_optimize_graph_struct {
};
/** Used by fts_optimize() to store state. */
-struct fts_optimize_struct {
+struct fts_optimize_t {
trx_t* trx; /*!< The transaction used for all SQL */
ib_alloc_t* self_heap; /*!< Heap to use for allocations */
@@ -183,14 +174,14 @@ struct fts_optimize_struct {
};
/** Used by the optimize, to keep state during compacting nodes. */
-struct fts_encode_struct {
+struct fts_encode_t {
doc_id_t src_last_doc_id;/*!< Last doc id read from src node */
byte* src_ilist_ptr; /*!< Current ptr within src ilist */
};
/** We use this information to determine when to start the optimize
cycle for a table. */
-struct fts_slot_struct {
+struct fts_slot_t {
dict_table_t* table; /*!< Table to optimize */
fts_state_t state; /*!< State of this slot */
@@ -210,7 +201,7 @@ struct fts_slot_struct {
};
/** A table remove message for the FTS optimize thread. */
-struct fts_msg_del_struct {
+struct fts_msg_del_t {
dict_table_t* table; /*!< The table to remove */
os_event_t event; /*!< Event to synchronize acknowledgement
@@ -219,12 +210,12 @@ struct fts_msg_del_struct {
};
/** Stop the optimize thread. */
-struct fts_msg_optimize_struct {
+struct fts_msg_optimize_t {
dict_table_t* table; /*!< Table to optimize */
};
/** The FTS optimize message work queue message type. */
-struct fts_msg_struct {
+struct fts_msg_t {
fts_msg_type_t type; /*!< Message type */
void* ptr; /*!< The message contents */
@@ -466,9 +457,9 @@ fts_optimize_index_fetch_node(
/**********************************************************************//**
Read the rows from the FTS inde.
-@return vector of rows fetched */
+@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_index_fetch_nodes(
/*==================*/
trx_t* trx, /*!< in: transaction */
@@ -479,7 +470,7 @@ fts_index_fetch_nodes(
fts_fetch_t* fetch) /*!< in: fetch callback.*/
{
pars_info_t* info;
- ulint error;
+ dberr_t error;
trx->op_info = "fetching FTS index nodes";
@@ -543,8 +534,9 @@ fts_index_fetch_nodes(
trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: %lu "
- "while reading FTS index.\n", error);
+ fprintf(stderr, " InnoDB: Error: (%s) "
+ "while reading FTS index.\n",
+ ut_strerr(error));
break; /* Exit the loop. */
}
@@ -781,8 +773,8 @@ fts_zip_deflate_end(
Read the words from the FTS INDEX.
@return DB_SUCCESS if all OK, DB_TABLE_NOT_FOUND if no more indexes
to search else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_index_fetch_words(
/*==================*/
fts_optimize_t* optim, /*!< in: optimize scratch pad */
@@ -794,7 +786,7 @@ fts_index_fetch_words(
que_t* graph;
ulint selected;
fts_zip_t* zip = NULL;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
mem_heap_t* heap = static_cast<mem_heap_t*>(optim->self_heap->arg);
ibool inited = FALSE;
@@ -849,13 +841,14 @@ fts_index_fetch_words(
zip = optim->zip;
for(;;) {
+ int err;
- if (!inited && ((error = deflateInit(zip->zp, 9))
+ if (!inited && ((err = deflateInit(zip->zp, 9))
!= Z_OK)) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: ZLib deflateInit() "
- "failed: %lu\n", error);
+ "failed: %d\n", err);
error = DB_ERROR;
break;
@@ -885,9 +878,9 @@ fts_index_fetch_words(
optim->trx->error_state = DB_SUCCESS;
} else {
- fprintf(stderr, " InnoDB: Error: %lu "
+ fprintf(stderr, " InnoDB: Error: (%s) "
"while reading document.\n",
- error);
+ ut_strerr(error));
break; /* Exit the loop. */
}
@@ -962,14 +955,14 @@ fts_fetch_doc_ids(
Read the rows from a FTS common auxiliary table.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_table_fetch_doc_ids(
/*====================*/
trx_t* trx, /*!< in: transaction */
fts_table_t* fts_table, /*!< in: table */
fts_doc_ids_t* doc_ids) /*!< in: For collecting doc ids */
{
- ulint error;
+ dberr_t error;
que_t* graph;
pars_info_t* info = pars_info_create();
ibool alloc_bk_trx = FALSE;
@@ -1114,8 +1107,8 @@ fts_optimize_lookup(
/**********************************************************************//**
Encode the word pos list into the node
@return DB_SUCCESS or error code*/
-static
-ulint
+static __attribute__((nonnull))
+dberr_t
fts_optimize_encode_node(
/*=====================*/
fts_node_t* node, /*!< in: node to fill*/
@@ -1126,7 +1119,7 @@ fts_optimize_encode_node(
ulint enc_len;
ulint pos_enc_len;
doc_id_t doc_id_delta;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
byte* src = enc->src_ilist_ptr;
if (node->first_doc_id == 0) {
@@ -1202,8 +1195,8 @@ fts_optimize_encode_node(
/**********************************************************************//**
Optimize the data contained in a node.
@return DB_SUCCESS or error code*/
-static
-ulint
+static __attribute__((nonnull))
+dberr_t
fts_optimize_node(
/*==============*/
ib_vector_t* del_vec, /*!< in: vector of doc ids to delete*/
@@ -1213,7 +1206,7 @@ fts_optimize_node(
fts_encode_t* enc) /*!< in: encoding state */
{
ulint copied;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
doc_id_t doc_id = enc->src_last_doc_id;
if (!enc->src_ilist_ptr) {
@@ -1299,8 +1292,8 @@ test_again:
/**********************************************************************//**
Determine the starting pos within the deleted doc id vector for a word.
-@return DB_SUCCESS or error code */
-static
+@return delete position */
+static __attribute__((nonnull, warn_unused_result))
int
fts_optimize_deleted_pos(
/*=====================*/
@@ -1428,8 +1421,8 @@ fts_optimize_word(
/**********************************************************************//**
Update the FTS index table. This is a delete followed by an insert.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_write_word(
/*====================*/
trx_t* trx, /*!< in: transaction */
@@ -1441,7 +1434,7 @@ fts_optimize_write_word(
pars_info_t* info;
que_t* graph;
ulint selected;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
char* table_name = fts_get_table_name(fts_table);
info = pars_info_create();
@@ -1470,8 +1463,9 @@ fts_optimize_write_word(
if (error != DB_SUCCESS) {
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) during optimize, "
- "when deleting a word from the FTS index.\n", error);
+ fprintf(stderr, " InnoDB: Error: (%s) during optimize, "
+ "when deleting a word from the FTS index.\n",
+ ut_strerr(error));
}
fts_que_graph_free(graph);
@@ -1491,9 +1485,10 @@ fts_optimize_write_word(
if (error != DB_SUCCESS) {
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%lu) "
+ fprintf(stderr, " InnoDB: Error: (%s) "
"during optimize, while adding a "
- "word to the FTS index.\n", error);
+ "word to the FTS index.\n",
+ ut_strerr(error));
}
}
@@ -1529,8 +1524,8 @@ fts_word_free(
/**********************************************************************//**
Optimize the word ilist and rewrite data to the FTS index.
@return status one of RESTART, EXIT, ERROR */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_compact(
/*=================*/
fts_optimize_t* optim, /*!< in: optimize state data */
@@ -1538,7 +1533,7 @@ fts_optimize_compact(
ib_time_t start_time) /*!< in: optimize start time */
{
ulint i;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ulint size = ib_vector_size(optim->words);
for (i = 0; i < size && error == DB_SUCCESS && !optim->done; ++i) {
@@ -1622,77 +1617,63 @@ fts_optimize_create(
/**********************************************************************//**
Get optimize start time of an FTS index.
@return DB_SUCCESS if all OK else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_get_index_start_time(
/*==============================*/
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: FTS index */
ib_time_t* start_time) /*!< out: time in secs */
{
- ulint error;
-
- error = fts_config_get_index_ulint(
- trx, index, FTS_OPTIMIZE_START_TIME, (ulint*) start_time);
-
- return(error);
+ return(fts_config_get_index_ulint(
+ trx, index, FTS_OPTIMIZE_START_TIME,
+ (ulint*) start_time));
}
/**********************************************************************//**
Set the optimize start time of an FTS index.
@return DB_SUCCESS if all OK else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_set_index_start_time(
/*==============================*/
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: FTS index */
ib_time_t start_time) /*!< in: start time */
{
- ulint error;
-
- error = fts_config_set_index_ulint(
- trx, index, FTS_OPTIMIZE_START_TIME, (ulint) start_time);
-
- return(error);
+ return(fts_config_set_index_ulint(
+ trx, index, FTS_OPTIMIZE_START_TIME,
+ (ulint) start_time));
}
/**********************************************************************//**
Get optimize end time of an FTS index.
@return DB_SUCCESS if all OK else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_get_index_end_time(
/*============================*/
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: FTS index */
ib_time_t* end_time) /*!< out: time in secs */
{
- ulint error;
-
- error = fts_config_get_index_ulint(
- trx, index, FTS_OPTIMIZE_END_TIME, (ulint*) end_time);
-
- return(error);
+ return(fts_config_get_index_ulint(
+ trx, index, FTS_OPTIMIZE_END_TIME, (ulint*) end_time));
}
/**********************************************************************//**
Set the optimize end time of an FTS index.
@return DB_SUCCESS if all OK else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_set_index_end_time(
/*============================*/
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: FTS index */
ib_time_t end_time) /*!< in: end time */
{
- ulint error;
-
- error = fts_config_set_index_ulint(
- trx, index, FTS_OPTIMIZE_END_TIME, (ulint) end_time);
-
- return(error);
+ return(fts_config_set_index_ulint(
+ trx, index, FTS_OPTIMIZE_END_TIME, (ulint) end_time));
}
#endif
@@ -1798,7 +1779,7 @@ fts_optimize_words(
fprintf(stderr, "%.*s\n", (int) word->f_len, word->f_str);
while(!optim->done) {
- ulint error;
+ dberr_t error;
trx_t* trx = optim->trx;
ulint selected;
@@ -1901,15 +1882,15 @@ fts_optimize_set_next_word(
Optimize is complete. Set the completion time, and reset the optimize
start string for this FTS index to "".
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_index_completed(
/*=========================*/
fts_optimize_t* optim, /*!< in: optimize instance */
dict_index_t* index) /*!< in: table with one FTS index */
{
fts_string_t word;
- ulint error;
+ dberr_t error;
byte buf[sizeof(ulint)];
#ifdef FTS_OPTIMIZE_DEBUG
ib_time_t end_time = ut_time();
@@ -1929,8 +1910,8 @@ fts_optimize_index_completed(
if (error != DB_SUCCESS) {
- fprintf(stderr, "InnoDB: Error: (%lu) while "
- "updating last optimized word!\n", error);
+ fprintf(stderr, "InnoDB: Error: (%s) while "
+ "updating last optimized word!\n", ut_strerr(error));
}
return(error);
@@ -1941,15 +1922,15 @@ fts_optimize_index_completed(
Read the list of words from the FTS auxiliary index that will be
optimized in this pass.
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_index_read_words(
/*==========================*/
fts_optimize_t* optim, /*!< in: optimize instance */
dict_index_t* index, /*!< in: table with one FTS index */
fts_string_t* word) /*!< in: buffer to use */
{
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
if (optim->del_list_regenerated) {
word->f_len = 0;
@@ -1998,15 +1979,15 @@ fts_optimize_index_read_words(
Run OPTIMIZE on the given FTS index. Note: this can take a very long
time (hours).
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_index(
/*===============*/
fts_optimize_t* optim, /*!< in: optimize instance */
dict_index_t* index) /*!< in: table with one FTS index */
{
fts_string_t word;
- ulint error;
+ dberr_t error;
byte str[FTS_MAX_WORD_LEN + 1];
/* Set the current index that we have to optimize. */
@@ -2069,8 +2050,8 @@ fts_optimize_index(
/**********************************************************************//**
Delete the document ids in the delete, and delete cache tables.
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_purge_deleted_doc_ids(
/*===============================*/
fts_optimize_t* optim) /*!< in: optimize instance */
@@ -2081,7 +2062,7 @@ fts_optimize_purge_deleted_doc_ids(
fts_update_t* update;
char* sql_str;
doc_id_t write_doc_id;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
info = pars_info_create();
@@ -2138,13 +2119,13 @@ fts_optimize_purge_deleted_doc_ids(
/**********************************************************************//**
Delete the document ids in the pending delete, and delete tables.
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_purge_deleted_doc_id_snapshot(
/*=======================================*/
fts_optimize_t* optim) /*!< in: optimize instance */
{
- ulint error;
+ dberr_t error;
que_t* graph;
char* sql_str;
@@ -2188,13 +2169,13 @@ Copy the deleted doc ids that will be purged during this optimize run
to the being deleted FTS auxiliary tables. The transaction is committed
upon successfull copy and rolled back on DB_DUPLICATE_KEY error.
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_create_deleted_doc_id_snapshot(
/*========================================*/
fts_optimize_t* optim) /*!< in: optimize instance */
{
- ulint error;
+ dberr_t error;
que_t* graph;
char* sql_str;
@@ -2226,13 +2207,13 @@ fts_optimize_create_deleted_doc_id_snapshot(
Read in the document ids that are to be purged during optimize. The
transaction is committed upon successfully read.
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_read_deleted_doc_id_snapshot(
/*======================================*/
fts_optimize_t* optim) /*!< in: optimize instance */
{
- ulint error;
+ dberr_t error;
optim->fts_common_table.suffix = "BEING_DELETED";
@@ -2263,14 +2244,14 @@ Optimze all the FTS indexes, skipping those that have already been
optimized, since the FTS auxiliary indexes are not guaranteed to be
of the same cardinality.
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_indexes(
/*=================*/
fts_optimize_t* optim) /*!< in: optimize instance */
{
ulint i;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
fts_t* fts = optim->table->fts;
/* Optimize the FTS indexes. */
@@ -2333,13 +2314,13 @@ fts_optimize_indexes(
/*********************************************************************//**
Cleanup the snapshot tables and the master deleted table.
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_purge_snapshot(
/*========================*/
fts_optimize_t* optim) /*!< in: optimize instance */
{
- ulint error;
+ dberr_t error;
/* Delete the doc ids from the master deleted tables, that were
in the snapshot that was taken at the start of optimize. */
@@ -2362,13 +2343,13 @@ fts_optimize_purge_snapshot(
/*********************************************************************//**
Reset the start time to 0 so that a new optimize can be started.
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_optimize_reset_start_time(
/*==========================*/
fts_optimize_t* optim) /*!< in: optimize instance */
{
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
#ifdef FTS_OPTIMIZE_DEBUG
fts_t* fts = optim->table->fts;
@@ -2401,13 +2382,13 @@ fts_optimize_reset_start_time(
/*********************************************************************//**
Run OPTIMIZE on the given table by a background thread.
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull))
+dberr_t
fts_optimize_table_bk(
/*==================*/
fts_slot_t* slot) /*!< in: table to optimiza */
{
- ulint error;
+ dberr_t error;
dict_table_t* table = slot->table;
fts_t* fts = table->fts;
@@ -2440,12 +2421,12 @@ fts_optimize_table_bk(
Run OPTIMIZE on the given table.
@return DB_SUCCESS if all OK */
UNIV_INTERN
-ulint
+dberr_t
fts_optimize_table(
/*===============*/
dict_table_t* table) /*!< in: table to optimiza */
{
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
fts_optimize_t* optim = NULL;
fts_t* fts = table->fts;
@@ -2567,6 +2548,11 @@ fts_optimize_add_table(
return;
}
+ /* Make sure table with FTS index cannot be evicted */
+ if (table->can_be_evicted) {
+ dict_table_move_from_lru_to_non_lru(table);
+ }
+
msg = fts_optimize_create_msg(FTS_MSG_ADD_TABLE, table);
ib_wqueue_add(fts_optimize_wq, msg, msg->heap);
@@ -2602,18 +2588,26 @@ fts_optimize_remove_table(
dict_table_t* table) /*!< in: table to remove */
{
fts_msg_t* msg;
- os_event_t event;
- fts_msg_del_t* remove;
+ os_event_t event;
+ fts_msg_del_t* remove;
/* if the optimize system not yet initialized, return */
if (!fts_optimize_wq) {
return;
}
+ /* FTS optimizer thread is already exited */
+ if (fts_opt_start_shutdown) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Try to remove table %s after FTS optimize"
+ " thread exiting.", table->name);
+ return;
+ }
+
msg = fts_optimize_create_msg(FTS_MSG_DEL_TABLE, NULL);
/* We will wait on this event until signalled by the consumer. */
- event = os_event_create(table->name);
+ event = os_event_create();
remove = static_cast<fts_msg_del_t*>(
mem_heap_alloc(msg->heap, sizeof(*remove)));
@@ -2889,6 +2883,8 @@ fts_optimize_thread(
ulint n_optimize = 0;
ib_wqueue_t* wq = (ib_wqueue_t*) arg;
+ ut_ad(!srv_read_only_mode);
+
heap = mem_heap_create(sizeof(dict_table_t*) * 64);
heap_alloc = ib_heap_allocator_create(heap);
@@ -3010,10 +3006,10 @@ fts_optimize_thread(
ib_vector_get(tables, i));
if (slot->state != FTS_STATE_EMPTY) {
- dict_table_t* table;
+ dict_table_t* table = NULL;
- table = dict_table_open_on_name_no_stats(
- slot->table->name, FALSE,
+ table = dict_table_open_on_name(
+ slot->table->name, FALSE, FALSE,
DICT_ERR_IGNORE_INDEX_ROOT);
if (table) {
@@ -3022,8 +3018,11 @@ fts_optimize_thread(
fts_sync_table(table);
}
- fts_free(table);
- dict_table_close(table, FALSE);
+ if (table->fts) {
+ fts_free(table);
+ }
+
+ dict_table_close(table, FALSE, FALSE);
}
}
}
@@ -3031,10 +3030,7 @@ fts_optimize_thread(
ib_vector_free(tables);
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: FTS optimize thread exiting.\n");
-
- ib_wqueue_free(wq);
+ ib_logf(IB_LOG_LEVEL_INFO, "FTS optimize thread exiting.");
os_event_set(exit_event);
@@ -3052,6 +3048,8 @@ void
fts_optimize_init(void)
/*===================*/
{
+ ut_ad(!srv_read_only_mode);
+
/* For now we only support one optimize thread. */
ut_a(fts_optimize_wq == NULL);
@@ -3074,18 +3072,30 @@ fts_optimize_is_init(void)
/**********************************************************************//**
Signal the optimize thread to prepare for shutdown. */
-
+UNIV_INTERN
void
fts_optimize_start_shutdown(void)
/*=============================*/
{
+ ut_ad(!srv_read_only_mode);
+
fts_msg_t* msg;
os_event_t event;
+ /* If there is an ongoing activity on dictionary, such as
+ srv_master_evict_from_table_cache(), wait for it */
+ dict_mutex_enter_for_mysql();
+
+ /* Tells FTS optimizer system that we are exiting from
+ optimizer thread, message send their after will not be
+ processed */
+ fts_opt_start_shutdown = true;
+ dict_mutex_exit_for_mysql();
+
/* We tell the OPTIMIZE thread to switch to state done, we
can't delete the work queue here because the add thread needs
deregister the FTS tables. */
- event = os_event_create(NULL);
+ event = os_event_create();
msg = fts_optimize_create_msg(FTS_MSG_STOP, NULL);
msg->ptr = event;
@@ -3094,15 +3104,20 @@ fts_optimize_start_shutdown(void)
os_event_wait(event);
os_event_free(event);
+
+ ib_wqueue_free(fts_optimize_wq);
+
}
/**********************************************************************//**
Reset the work queue. */
-
+UNIV_INTERN
void
fts_optimize_end(void)
/*==================*/
{
+ ut_ad(!srv_read_only_mode);
+
// FIXME: Potential race condition here: We should wait for
// the optimize thread to confirm shutdown.
fts_optimize_wq = NULL;
diff --git a/storage/innobase/fts/fts0pars.cc b/storage/innobase/fts/fts0pars.cc
index 4fdfff5ca42..dd2984b1beb 100644
--- a/storage/innobase/fts/fts0pars.cc
+++ b/storage/innobase/fts/fts0pars.cc
@@ -105,7 +105,7 @@ extern int ftserror(const char* p);
typedef int (*fts_scanner_alt)(YYSTYPE* val, yyscan_t yyscanner);
typedef int (*fts_scanner)();
-struct fts_lexer_struct {
+struct fts_lexer_t {
fts_scanner scanner;
void* yyscanner;
};
diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc
index 58b429a8406..5c757b4f176 100644
--- a/storage/innobase/fts/fts0que.cc
+++ b/storage/innobase/fts/fts0que.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,6 +24,7 @@ Created 2007/03/27 Sunny Bains
Completed 2011/7/10 Sunny and Jimmy Yang
*******************************************************/
+#include "dict0dict.h" /* dict_table_get_n_rows() */
#include "ut0rbt.h"
#include "row0sel.h"
#include "fts0fts.h"
@@ -57,15 +58,10 @@ static const double FTS_NORMALIZE_COEFF = 0.0115F;
/* For parsing the search phrase */
static const char* FTS_PHRASE_DELIMITER = "\t ";
-typedef struct fts_match_struct fts_match_t;
-typedef struct fts_query_struct fts_query_t;
-typedef struct fts_phrase_struct fts_phrase_t;
-typedef struct fts_select_struct fts_select_t;
-typedef struct fts_doc_freq_struct fts_doc_freq_t;
-typedef struct fts_word_freq_struct fts_word_freq_t;
+struct fts_word_freq_t;
/** State of an FTS query. */
-struct fts_query_struct {
+struct fts_query_t {
mem_heap_t* heap; /*!< Heap to use for allocations */
trx_t* trx; /*!< The query transaction */
@@ -126,11 +122,11 @@ struct fts_query_struct {
position info for each matched word
in the word list */
- ulint total_docs; /*!< The total number of documents */
+ ib_uint64_t total_docs; /*!< The total number of documents */
ulint total_words; /*!< The total number of words */
- ulint error; /*!< Error code if any, that is
+ dberr_t error; /*!< Error code if any, that is
encountered during query processing */
ib_rbt_t* word_freqs; /*!< RB tree of word frequencies per
@@ -144,7 +140,7 @@ struct fts_query_struct {
/** For phrase matching, first we collect the documents and the positions
then we match. */
-struct fts_match_struct {
+struct fts_match_t {
doc_id_t doc_id; /*!< Document id */
ulint start; /*!< Start the phrase match from
@@ -158,7 +154,7 @@ struct fts_match_struct {
/** For matching tokens in a phrase search. We use this data structure in
the callback that determines whether a document should be accepted or
rejected for a phrase search. */
-struct fts_select_struct {
+struct fts_select_t {
doc_id_t doc_id; /*!< The document id to match */
ulint min_pos; /*!< For found to be TRUE at least
@@ -173,8 +169,23 @@ struct fts_select_struct {
the FTS index */
};
+/** structure defines a set of ranges for original documents, each of which
+has a minimum position and maximum position. Text in such range should
+contain all words in the proximity search. We will need to count the
+words in such range to make sure it is less than the specified distance
+of the proximity search */
+struct fts_proximity_t {
+ ulint n_pos; /*!< number of position set, defines
+ a range (min to max) containing all
+ matching words */
+ ulint* min_pos; /*!< the minimum position (in bytes)
+ of the range */
+ ulint* max_pos; /*!< the maximum position (in bytes)
+ of the range */
+};
+
/** The match positions and tokesn to match */
-struct fts_phrase_struct {
+struct fts_phrase_t {
ibool found; /*!< Match result */
const fts_match_t*
@@ -188,23 +199,26 @@ struct fts_phrase_struct {
CHARSET_INFO* charset; /*!< Phrase match charset */
mem_heap_t* heap; /*!< Heap for word processing */
ulint zip_size; /*!< row zip size */
+ fts_proximity_t*proximity_pos; /*!< position info for proximity
+ search verification. Records the min
+ and max position of words matched */
};
/** For storing the frequncy of a word/term in a document */
-struct fts_doc_freq_struct {
+struct fts_doc_freq_t {
doc_id_t doc_id; /*!< Document id */
ulint freq; /*!< Frequency of a word in a document */
};
/** To determine the word frequency per document. */
-struct fts_word_freq_struct {
+struct fts_word_freq_t {
byte* word; /*!< Word for which we need the freq,
it's allocated on the query heap */
ib_rbt_t* doc_freqs; /*!< RB Tree for storing per document
word frequencies. The elements are
of type fts_doc_freq_t */
- ulint doc_count; /*!< Total number of documents that
+ ib_uint64_t doc_count; /*!< Total number of documents that
contain this word */
double idf; /*!< Inverse document frequency */
};
@@ -257,37 +271,46 @@ search arguments to search the document again, thus "expand"
the search result set.
@return DB_SUCCESS if success, otherwise the error code */
static
-ulint
+dberr_t
fts_expand_query(
/*=============*/
dict_index_t* index, /*!< in: FTS index to search */
- fts_query_t* query); /*!< in: query result, to be freed
+ fts_query_t* query) /*!< in: query result, to be freed
by the client */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
This function finds documents that contain all words in a
phrase or proximity search. And if proximity search, verify
-the words are close to each other enough, as in specified distance.
+the words are close enough to each other, as in specified distance.
This function is called for phrase and proximity search.
@return TRUE if documents are found, FALSE if otherwise */
static
ibool
-fts_check_phrase_proximity(
-/*=======================*/
- fts_query_t* query, /*!< in: query instance */
+fts_phrase_or_proximity_search(
+/*===========================*/
+ fts_query_t* query, /*!< in/out: query instance
+ query->doc_ids might be instantiated
+ with qualified doc IDs */
ib_vector_t* tokens); /*!< in: Tokens contain words */
/*************************************************************//**
-This function check the words in result document are close to each
-other enough (within proximity rnage). This is used for proximity search.
-@return TRUE if words are close to each other, FALSE if otherwise */
+This function checks whether words in result documents are close to
+each other (within proximity range as specified by "distance").
+If "distance" is MAX_ULINT, then it will find all combinations of
+positions of matching words and store min and max positions
+in the "qualified_pos" for later verification.
+@return true if words are close to each other, false if otherwise */
static
-ulint
-fts_proximity_check_position(
-/*=========================*/
- fts_match_t** match, /*!< in: query instance */
- ulint num_match, /*!< in: number of matching
- items */
- ulint distance); /*!< in: distance value
- for proximity search */
+bool
+fts_proximity_get_positions(
+/*========================*/
+ fts_match_t** match, /*!< in: query instance */
+ ulint num_match, /*!< in: number of matching
+ items */
+ ulint distance, /*!< in: distance value
+ for proximity search */
+ fts_proximity_t* qualified_pos); /*!< out: the position info
+ records ranges containing
+ all matching words. */
#if 0
/********************************************************************
Get the total number of words in a documents. */
@@ -954,8 +977,8 @@ cont_search:
/*****************************************************************//**
Set difference.
@return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_query_difference(
/*=================*/
fts_query_t* query, /*!< in: query instance */
@@ -993,15 +1016,21 @@ fts_query_difference(
ut_a(index_cache != NULL);
/* Search the cache for a matching word first. */
- nodes = fts_cache_find_word(index_cache, token);
+ if (query->cur_node->term.wildcard
+ && query->flags != FTS_PROXIMITY
+ && query->flags != FTS_PHRASE) {
+ fts_cache_find_wildcard(query, index_cache, token);
+ } else {
+ nodes = fts_cache_find_word(index_cache, token);
- for (i = 0; nodes && i < ib_vector_size(nodes); ++i) {
- const fts_node_t* node;
+ for (i = 0; nodes && i < ib_vector_size(nodes); ++i) {
+ const fts_node_t* node;
- node = static_cast<const fts_node_t*>(
- ib_vector_get_const(nodes, i));
+ node = static_cast<const fts_node_t*>(
+ ib_vector_get_const(nodes, i));
- fts_query_check_node(query, token, node);
+ fts_query_check_node(query, token, node);
+ }
}
rw_lock_x_unlock(&cache->lock);
@@ -1026,8 +1055,8 @@ fts_query_difference(
/*****************************************************************//**
Intersect the token doc ids with the current set.
@return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_query_intersect(
/*================*/
fts_query_t* query, /*!< in: query instance */
@@ -1216,8 +1245,8 @@ fts_query_cache(
/*****************************************************************//**
Set union.
@return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_query_union(
/*============*/
fts_query_t* query, /*!< in: query instance */
@@ -1248,13 +1277,7 @@ fts_query_union(
/* Single '%' would confuse parser in pars_like_rebind(). In addition,
our wildcard search only supports prefix search */
- if (*token->f_str == '%') {
- if (token->f_len == 1) {
- return(query->error);
- }
- token->f_str++;
- token->f_len--;
- }
+ ut_ad(*token->f_str != '%');
fts_query_cache(query, token);
@@ -1485,6 +1508,67 @@ fts_query_match_phrase_terms(
}
/*****************************************************************//**
+Callback function to count the number of words in position ranges,
+and see whether the word count is in specified "phrase->distance"
+@return true if the number of characters is less than the "distance" */
+static
+bool
+fts_proximity_is_word_in_range(
+/*===========================*/
+ const fts_phrase_t*
+ phrase, /*!< in: phrase with the search info */
+ byte* start, /*!< in: text to search */
+ ulint total_len) /*!< in: length of text */
+{
+ fts_proximity_t* proximity_pos = phrase->proximity_pos;
+
+ /* Search each matched position pair (with min and max positions)
+ and count the number of words in the range */
+ for (ulint i = 0; i < proximity_pos->n_pos; i++) {
+ ulint cur_pos = proximity_pos->min_pos[i];
+ ulint n_word = 0;
+
+ ut_ad(proximity_pos->max_pos[i] <= total_len);
+
+ /* Walk through words in the range and count them */
+ while (cur_pos <= proximity_pos->max_pos[i]) {
+ ulint len;
+ fts_string_t str;
+ ulint offset = 0;
+
+ len = innobase_mysql_fts_get_token(
+ phrase->charset,
+ start + cur_pos,
+ start + total_len, &str, &offset);
+
+ if (len == 0) {
+ break;
+ }
+
+ /* Advances position with "len" bytes */
+ cur_pos += len;
+
+ /* Record the number of words */
+ if (str.f_n_char > 0) {
+ n_word++;
+ }
+
+ if (n_word > phrase->distance) {
+ break;
+ }
+ }
+
+ /* Check if the number of words is less than specified
+ "distance" */
+ if (n_word && n_word <= phrase->distance) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/*****************************************************************//**
Callback function to fetch and search the document.
@return TRUE if matched else FALSE */
static
@@ -1594,31 +1678,77 @@ fts_query_fetch_document(
sel_node_t* node = static_cast<sel_node_t*>(row);
fts_phrase_t* phrase = static_cast<fts_phrase_t*>(user_arg);
ulint prev_len = 0;
+ ulint total_len = 0;
+ byte* document_text = NULL;
exp = node->select_list;
phrase->found = FALSE;
+ /* For proximity search, we will need to get the whole document
+ from all fields, so first count the total length of the document
+ from all the fields */
+ if (phrase->proximity_pos) {
+ while (exp) {
+ ulint field_len;
+ dfield_t* dfield = que_node_get_val(exp);
+ byte* data = static_cast<byte*>(
+ dfield_get_data(dfield));
+
+ if (dfield_is_ext(dfield)) {
+ ulint local_len = dfield_get_len(dfield);
+
+ local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+ field_len = mach_read_from_4(
+ data + local_len + BTR_EXTERN_LEN + 4);
+ } else {
+ field_len = dfield_get_len(dfield);
+ }
+
+ if (field_len != UNIV_SQL_NULL) {
+ total_len += field_len + 1;
+ }
+
+ exp = que_node_get_next(exp);
+ }
+
+ document_text = static_cast<byte*>(mem_heap_zalloc(
+ phrase->heap, total_len));
+
+ if (!document_text) {
+ return(FALSE);
+ }
+ }
+
+ exp = node->select_list;
+
while (exp) {
dfield_t* dfield = que_node_get_val(exp);
- void* data = NULL;
+ byte* data = static_cast<byte*>(
+ dfield_get_data(dfield));
ulint cur_len;
if (dfield_is_ext(dfield)) {
data = btr_copy_externally_stored_field(
- &cur_len, static_cast<const byte*>(data),
- phrase->zip_size,
+ &cur_len, data, phrase->zip_size,
dfield_get_len(dfield), phrase->heap);
} else {
- data = dfield_get_data(dfield);
cur_len = dfield_get_len(dfield);
}
if (cur_len != UNIV_SQL_NULL && cur_len != 0) {
- phrase->found =
- fts_query_match_phrase(
- phrase, static_cast<byte*>(data),
- cur_len, prev_len, phrase->heap);
+ if (phrase->proximity_pos) {
+ memcpy(document_text + prev_len, data, cur_len);
+ } else {
+ /* For phrase search */
+ phrase->found =
+ fts_query_match_phrase(
+ phrase,
+ static_cast<byte*>(data),
+ cur_len, prev_len,
+ phrase->heap);
+ }
}
if (phrase->found) {
@@ -1633,6 +1763,13 @@ fts_query_fetch_document(
exp = que_node_get_next(exp);
}
+ if (phrase->proximity_pos) {
+ ut_ad(prev_len <= total_len);
+
+ phrase->found = fts_proximity_is_word_in_range(
+ phrase, document_text, total_len);
+ }
+
return(phrase->found);
}
@@ -1689,13 +1826,12 @@ fts_query_select(
/********************************************************************
Read the rows from the FTS index, that match word and where the
-doc id is between first and last doc id. */
-static
-ulint
+doc id is between first and last doc id.
+@return DB_SUCCESS if all went well else error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_query_find_term(
/*================*/
- /*!< out: DB_SUCCESS if all went well
- else error code */
fts_query_t* query, /*!< in: FTS query state */
que_t** graph, /*!< in: prepared statement */
const fts_string_t* word, /*!< in: the word to fetch */
@@ -1705,7 +1841,7 @@ fts_query_find_term(
ibool* found) /*!< out: TRUE if found else FALSE */
{
pars_info_t* info;
- ulint error;
+ dberr_t error;
fts_select_t select;
doc_id_t match_doc_id;
trx_t* trx = query->trx;
@@ -1830,19 +1966,18 @@ fts_query_sum(
}
/********************************************************************
-Calculate the total documents that contain a particular word (term). */
-static
-ulint
+Calculate the total documents that contain a particular word (term).
+@return DB_SUCCESS if all went well else error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_query_total_docs_containing_term(
/*=================================*/
- /*!< out: DB_SUCCESS if all went well
- else error code */
fts_query_t* query, /*!< in: FTS query state */
const fts_string_t* word, /*!< in: the word to check */
ulint* total) /*!< out: documents containing word */
{
pars_info_t* info;
- ulint error;
+ dberr_t error;
que_t* graph;
ulint selected;
trx_t* trx = query->trx;
@@ -1910,19 +2045,18 @@ fts_query_total_docs_containing_term(
}
/********************************************************************
-Get the total number of words in a documents. */
-static
-ulint
+Get the total number of words in a documents.
+@return DB_SUCCESS if all went well else error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_query_terms_in_document(
/*========================*/
- /*!< out: DB_SUCCESS if all went well
- else error code */
fts_query_t* query, /*!< in: FTS query state */
doc_id_t doc_id, /*!< in: the word to check */
ulint* total) /*!< out: total words in document */
{
pars_info_t* info;
- ulint error;
+ dberr_t error;
que_t* graph;
doc_id_t read_doc_id;
trx_t* trx = query->trx;
@@ -1993,9 +2127,9 @@ fts_query_terms_in_document(
/*****************************************************************//**
Retrieve the document and match the phrase tokens.
-@return TRUE if matches else FALSE */
-static
-ulint
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_query_match_document(
/*=====================*/
ib_vector_t* tokens, /*!< in: phrase tokens */
@@ -2004,7 +2138,7 @@ fts_query_match_document(
ulint distance, /*!< in: proximity distance */
ibool* found) /*!< out: TRUE if phrase found */
{
- ulint error;
+ dberr_t error;
fts_phrase_t phrase;
memset(&phrase, 0x0, sizeof(phrase));
@@ -2025,8 +2159,8 @@ fts_query_match_document(
if (error != DB_SUCCESS) {
ut_print_timestamp(stderr);
- fprintf(stderr, "InnoDB: Error: (%lu) matching document.\n",
- error);
+ fprintf(stderr, "InnoDB: Error: (%s) matching document.\n",
+ ut_strerr(error));
} else {
*found = phrase.found;
}
@@ -2037,11 +2171,66 @@ fts_query_match_document(
}
/*****************************************************************//**
+This function fetches the original documents and count the
+words in between matching words to see that is in specified distance
+@return DB_SUCCESS if all OK */
+static __attribute__((nonnull, warn_unused_result))
+bool
+fts_query_is_in_proximity_range(
+/*============================*/
+ const fts_query_t* query, /*!< in: query instance */
+ fts_match_t** match, /*!< in: query instance */
+ fts_proximity_t* qualified_pos) /*!< in: position info for
+ qualified ranges */
+{
+ fts_get_doc_t get_doc;
+ fts_cache_t* cache = query->index->table->fts->cache;
+ dberr_t err;
+ fts_phrase_t phrase;
+
+ memset(&get_doc, 0x0, sizeof(get_doc));
+ memset(&phrase, 0x0, sizeof(phrase));
+
+ rw_lock_x_lock(&cache->lock);
+ get_doc.index_cache = fts_find_index_cache(cache, query->index);
+ rw_lock_x_unlock(&cache->lock);
+ ut_a(get_doc.index_cache != NULL);
+
+ phrase.distance = query->distance;
+ phrase.charset = get_doc.index_cache->charset;
+ phrase.zip_size = dict_table_zip_size(
+ get_doc.index_cache->index->table);
+ phrase.heap = mem_heap_create(512);
+ phrase.proximity_pos = qualified_pos;
+ phrase.found = FALSE;
+
+ err = fts_doc_fetch_by_doc_id(
+ &get_doc, match[0]->doc_id, NULL, FTS_FETCH_DOC_BY_ID_EQUAL,
+ fts_query_fetch_document, &phrase);
+
+ if (err != DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Error: (%s) in verification phase of proximity "
+ "search", ut_strerr(err));
+ }
+
+ /* Free the prepared statement. */
+ if (get_doc.get_document_graph) {
+ fts_que_graph_free(get_doc.get_document_graph);
+ get_doc.get_document_graph = NULL;
+ }
+
+ mem_heap_free(phrase.heap);
+
+ return(err == DB_SUCCESS && phrase.found);
+}
+
+/*****************************************************************//**
Iterate over the matched document ids and search the for the
actual phrase in the text.
@return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_query_search_phrase(
/*====================*/
fts_query_t* query, /*!< in: query instance */
@@ -2050,8 +2239,6 @@ fts_query_search_phrase(
ulint i;
fts_get_doc_t get_doc;
ulint n_matched;
- // FIXME: Debug code
- ulint searched = 0;
fts_cache_t* cache = query->index->table->fts->cache;
n_matched = ib_vector_size(query->matched);
@@ -2061,9 +2248,7 @@ fts_query_search_phrase(
rw_lock_x_lock(&cache->lock);
- // FIXME: We shouldn't have to cast here.
- get_doc.index_cache = (fts_index_cache_t*)
- fts_find_index_cache(cache, query->index);
+ get_doc.index_cache = fts_find_index_cache(cache, query->index);
/* Must find the index cache */
ut_a(get_doc.index_cache != NULL);
@@ -2089,9 +2274,6 @@ fts_query_search_phrase(
an earlier pass. */
if (match->doc_id != 0) {
- // FIXME: Debug code
- ++searched;
-
query->error = fts_query_match_document(
tokens, &get_doc,
match, query->distance, &found);
@@ -2119,18 +2301,14 @@ fts_query_search_phrase(
get_doc.get_document_graph = NULL;
}
- // FIXME: Debug code
- ut_print_timestamp(stderr);
- printf(" End: %lu, %lu\n", searched, ib_vector_size(query->matched));
-
return(query->error);
}
/*****************************************************************//**
Text/Phrase search.
-@return count of doc ids added */
-static
-ulint
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_query_phrase_search(
/*====================*/
fts_query_t* query, /*!< in: query instance */
@@ -2290,7 +2468,7 @@ fts_query_phrase_search(
/* If we are doing proximity search, verify the distance
between all words, and check they are in specified distance. */
if (query->flags & FTS_PROXIMITY) {
- fts_check_phrase_proximity(query, tokens);
+ fts_phrase_or_proximity_search(query, tokens);
} else {
ibool matched;
@@ -2301,7 +2479,7 @@ fts_query_phrase_search(
and then doing a search through the text. Isolated
testing shows this also helps in mitigating disruption
of the buffer cache. */
- matched = fts_check_phrase_proximity(query, tokens);
+ matched = fts_phrase_or_proximity_search(query, tokens);
query->matched = query->match_array[0];
/* Read the actual text in and search for the phrase. */
@@ -2329,8 +2507,8 @@ func_exit:
/*****************************************************************//**
Find the word and evaluate.
@return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_query_execute(
/*==============*/
fts_query_t* query, /*!< in: query instance */
@@ -2477,13 +2655,12 @@ fts_query_visitor(
/*****************************************************************//**
Process (nested) sub-expression, create a new result set to store the
sub-expression result by processing nodes under current sub-expression
-list. Merge the sub-expression result with that of parent expression list. */
-
-ulint
+list. Merge the sub-expression result with that of parent expression list.
+@return DB_SUCCESS if all went well */
+UNIV_INTERN
+dberr_t
fts_ast_visit_sub_exp(
/*==================*/
- /*!< out: DB_SUCCESS if all
- went well */
fts_ast_node_t* node, /*!< in,out: current root node */
fts_ast_callback visitor, /*!< in: callback function */
void* arg) /*!< in,out: arg for callback */
@@ -2492,8 +2669,9 @@ fts_ast_visit_sub_exp(
fts_query_t* query = static_cast<fts_query_t*>(arg);
ib_rbt_t* parent_doc_ids;
ib_rbt_t* subexpr_doc_ids;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ibool inited = query->inited;
+ bool will_be_ignored = false;
ut_a(node->type == FTS_AST_SUBEXP_LIST);
@@ -2521,7 +2699,8 @@ fts_ast_visit_sub_exp(
/* Process nodes in current sub-expression and store its
result set in query->doc_ids we created above. */
- error = fts_ast_visit(FTS_NONE, node->next, visitor, arg);
+ error = fts_ast_visit(FTS_NONE, node->next, visitor,
+ arg, &will_be_ignored);
/* Reinstate parent node state and prepare for merge. */
query->inited = inited;
@@ -2757,6 +2936,8 @@ fts_query_read_node(
ut_a(query->cur_node->type == FTS_AST_TERM ||
query->cur_node->type == FTS_AST_TEXT);
+ memset(&node, 0, sizeof(node));
+
/* Need to consider the wildcard search case, the word frequency
is created on the search string not the actual word. So we need
to assign the frequency on search string behalf. */
@@ -2879,8 +3060,8 @@ fts_query_calculate_idf(
/*====================*/
fts_query_t* query) /*!< in: Query state */
{
- const ib_rbt_node_t* node;
- double total_docs = query->total_docs;
+ const ib_rbt_node_t* node;
+ ib_uint64_t total_docs = query->total_docs;
/* We need to free any instances of fts_doc_freq_t that we
may have allocated. */
@@ -2893,7 +3074,7 @@ fts_query_calculate_idf(
word_freq = rbt_value(fts_word_freq_t, node);
if (word_freq->doc_count > 0) {
- if (total_docs == (double) word_freq->doc_count) {
+ if (total_docs == word_freq->doc_count) {
/* QP assume ranking > 0 if we find
a match. Since Log10(1) = 0, we cannot
make IDF a zero value if do find a
@@ -2907,10 +3088,13 @@ fts_query_calculate_idf(
}
}
- fprintf(stderr,"'%s' -> %lu/%lu %6.5lf\n",
- word_freq->word,
- query->total_docs, word_freq->doc_count,
- word_freq->idf);
+ if (fts_enable_diag_print) {
+ fprintf(stderr,"'%s' -> " UINT64PF "/" UINT64PF
+ " %6.5lf\n",
+ word_freq->word,
+ query->total_docs, word_freq->doc_count,
+ word_freq->idf);
+ }
}
}
@@ -3017,7 +3201,7 @@ fts_retrieve_ranking(
ranking = rbt_value(fts_ranking_t, parent.last);
- return (ranking->rank);
+ return(ranking->rank);
}
return(0);
@@ -3184,7 +3368,7 @@ fts_query_parse(
FTS Query entry point.
@return DB_SUCCESS if successful otherwise error code */
UNIV_INTERN
-ulint
+dberr_t
fts_query(
/*======*/
trx_t* trx, /*!< in: transaction */
@@ -3196,7 +3380,7 @@ fts_query(
fts_result_t** result) /*!< in/out: result doc ids */
{
fts_query_t query;
- ulint error;
+ dberr_t error = DB_SUCCESS;
byte* lc_query_str;
ulint lc_query_str_len;
ulint result_len;
@@ -3204,6 +3388,7 @@ fts_query(
trx_t* query_trx;
CHARSET_INFO* charset;
ulint start_time_ms;
+ bool will_be_ignored = false;
boolean_mode = flags & FTS_BOOL;
@@ -3237,20 +3422,24 @@ fts_query(
/* Setup the RB tree that will be used to collect per term
statistics. */
query.word_freqs = rbt_create_arg_cmp(
- sizeof(fts_word_freq_t), innobase_fts_string_cmp, charset);
+ sizeof(fts_word_freq_t), innobase_fts_string_cmp,
+ (void*) charset);
- query.total_docs = fts_get_total_document_count(index->table);
+ query.total_docs = dict_table_get_n_rows(index->table);
- error = fts_get_total_word_count(trx, query.index, &query.total_words);
+#ifdef FTS_DOC_STATS_DEBUG
+ if (ft_enable_diag_print) {
+ error = fts_get_total_word_count(
+ trx, query.index, &query.total_words);
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ }
-#ifdef FTS_INTERNAL_DIAG_PRINT
- fprintf(stderr, "Total docs: %lu Total words: %lu\n",
- query.total_docs, query.total_words);
-#endif
+ fprintf(stderr, "Total docs: " UINT64PF " Total words: %lu\n",
+ query.total_docs, query.total_words);
+ }
+#endif /* FTS_DOC_STATS_DEBUG */
query.fts_common_table.suffix = "DELETED";
@@ -3299,13 +3488,14 @@ fts_query(
sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
/* Parse the input query string. */
- if (fts_query_parse(&query, lc_query_str, query_len)) {
+ if (fts_query_parse(&query, lc_query_str, result_len)) {
fts_ast_node_t* ast = query.root;
/* Traverse the Abstract Syntax Tree (AST) and execute
the query. */
query.error = fts_ast_visit(
- FTS_NONE, ast, fts_query_visitor, &query);
+ FTS_NONE, ast, fts_query_visitor,
+ &query, &will_be_ignored);
/* If query expansion is requested, extend the search
with first search pass result */
@@ -3453,8 +3643,8 @@ words in documents found in the first search pass will be used as
search arguments to search the document again, thus "expand"
the search result set.
@return DB_SUCCESS if success, otherwise the error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
fts_expand_query(
/*=============*/
dict_index_t* index, /*!< in: FTS index to search */
@@ -3463,7 +3653,7 @@ fts_expand_query(
const ib_rbt_node_t* node;
const ib_rbt_node_t* token_node;
fts_doc_t result_doc;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
const fts_index_cache_t*index_cache;
/* If no doc is found in first search pass, return */
@@ -3482,7 +3672,7 @@ fts_expand_query(
result_doc.tokens = rbt_create_arg_cmp(
sizeof(fts_token_t), innobase_fts_text_cmp,
- index_cache->charset);
+ (void*) index_cache->charset);
result_doc.charset = index_cache->charset;
@@ -3557,14 +3747,16 @@ fts_expand_query(
/*************************************************************//**
This function finds documents that contain all words in a
phrase or proximity search. And if proximity search, verify
-the words are close to each other enough, as in specified distance.
+the words are close enough to each other, as in specified distance.
This function is called for phrase and proximity search.
@return TRUE if documents are found, FALSE if otherwise */
static
ibool
-fts_check_phrase_proximity(
-/*=======================*/
- fts_query_t* query, /*!< in: query instance */
+fts_phrase_or_proximity_search(
+/*===========================*/
+ fts_query_t* query, /*!< in/out: query instance.
+ query->doc_ids might be instantiated
+ with qualified doc IDs */
ib_vector_t* tokens) /*!< in: Tokens contain words */
{
ulint n_matched;
@@ -3581,8 +3773,13 @@ fts_check_phrase_proximity(
walk through the list and find common documents that
contain all the matching words. */
for (i = 0; i < n_matched; i++) {
- ulint j;
- ulint k = 0;
+ ulint j;
+ ulint k = 0;
+ fts_proximity_t qualified_pos;
+ ulint qualified_pos_buf[MAX_PROXIMITY_ITEM * 2];
+
+ qualified_pos.min_pos = &qualified_pos_buf[0];
+ qualified_pos.max_pos = &qualified_pos_buf[MAX_PROXIMITY_ITEM];
match[0] = static_cast<fts_match_t*>(
ib_vector_get(query->match_array[0], i));
@@ -3647,24 +3844,31 @@ fts_check_phrase_proximity(
/* For this matching doc, we need to further
verify whether the words in the doc are close
- to each other, and with in distance specified
+ to each other, and within the distance specified
in the proximity search */
if (query->flags & FTS_PHRASE) {
matched = TRUE;
- } else if (fts_proximity_check_position(
- match, num_token, query->distance)) {
- ulint z;
- /* If so, mark we find a matching doc */
- fts_query_process_doc_id(query, match[0]->doc_id, 0);
+ } else if (fts_proximity_get_positions(
+ match, num_token, ULINT_MAX, &qualified_pos)) {
+
+ /* Fetch the original documents and count the
+ words in between matching words to see that is in
+ specified distance */
+ if (fts_query_is_in_proximity_range(
+ query, match, &qualified_pos)) {
+ /* If so, mark we find a matching doc */
+ fts_query_process_doc_id(
+ query, match[0]->doc_id, 0);
- matched = TRUE;
- for (z = 0; z < num_token; z++) {
- fts_string_t* token;
- token = static_cast<fts_string_t*>(
- ib_vector_get(tokens, z));
- fts_query_add_word_to_document(
- query, match[0]->doc_id,
- token->f_str);
+ matched = TRUE;
+ for (ulint z = 0; z < num_token; z++) {
+ fts_string_t* token;
+ token = static_cast<fts_string_t*>(
+ ib_vector_get(tokens, z));
+ fts_query_add_word_to_document(
+ query, match[0]->doc_id,
+ token->f_str);
+ }
}
}
@@ -3678,24 +3882,32 @@ func_exit:
}
/*************************************************************//**
-This function check the words in result document are close to each
-other (within proximity range). This is used for proximity search.
-@return TRUE if words are close to each other, FALSE if otherwise */
+This function checks whether words in result documents are close to
+each other (within proximity range as specified by "distance").
+If "distance" is MAX_ULINT, then it will find all combinations of
+positions of matching words and store min and max positions
+in the "qualified_pos" for later verification.
+@return true if words are close to each other, false if otherwise */
static
-ulint
-fts_proximity_check_position(
-/*=========================*/
- fts_match_t** match, /*!< in: query instance */
- ulint num_match, /*!< in: number of matching
- items */
- ulint distance) /*!< in: distance value
- for proximity search */
+bool
+fts_proximity_get_positions(
+/*========================*/
+ fts_match_t** match, /*!< in: query instance */
+ ulint num_match, /*!< in: number of matching
+ items */
+ ulint distance, /*!< in: distance value
+ for proximity search */
+ fts_proximity_t* qualified_pos) /*!< out: the position info
+ records ranges containing
+ all matching words. */
{
ulint i;
ulint idx[MAX_PROXIMITY_ITEM];
ulint num_pos[MAX_PROXIMITY_ITEM];
ulint min_idx;
+ qualified_pos->n_pos = 0;
+
ut_a(num_match < MAX_PROXIMITY_ITEM);
/* Each word could appear multiple times in a doc. So
@@ -3747,14 +3959,21 @@ fts_proximity_check_position(
find a good match */
if (max_pos - min_pos <= distance
&& (i >= num_match || position[i] != ULINT_UNDEFINED)) {
- return(TRUE);
- } else {
- /* Otherwise, move to the next position is the
- list for the word with the smallest position */
- idx[min_idx]++;
+ /* The charset has variable character
+ length encoding, record the min_pos and
+ max_pos, we will need to verify the actual
+ number of characters */
+ qualified_pos->min_pos[qualified_pos->n_pos] = min_pos;
+ qualified_pos->max_pos[qualified_pos->n_pos] = max_pos;
+ qualified_pos->n_pos++;
}
+
+ /* Otherwise, move to the next position is the
+ list for the word with the smallest position */
+ idx[min_idx]++;
}
- /* Failed to find all words within the range for the doc */
- return(FALSE);
+ ut_ad(qualified_pos->n_pos <= MAX_PROXIMITY_ITEM);
+
+ return(qualified_pos->n_pos != 0);
}
diff --git a/storage/innobase/fts/fts0sql.cc b/storage/innobase/fts/fts0sql.cc
index 8e60a5f1132..03c19d93af6 100644
--- a/storage/innobase/fts/fts0sql.cc
+++ b/storage/innobase/fts/fts0sql.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -262,7 +262,7 @@ fts_parse_sql_no_dict_lock(
Evaluate an SQL query graph.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_eval_sql(
/*=========*/
trx_t* trx, /*!< in: transaction */
@@ -327,16 +327,16 @@ fts_get_select_columns_str(
Commit a transaction.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_sql_commit(
/*===========*/
trx_t* trx) /*!< in: transaction */
{
- ulint error;
+ dberr_t error;
error = trx_commit_for_mysql(trx);
- /* Commit above returns 0 on success, it should always succeed */
+ /* Commit should always succeed */
ut_a(error == DB_SUCCESS);
return(DB_SUCCESS);
@@ -346,7 +346,7 @@ fts_sql_commit(
Rollback a transaction.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_sql_rollback(
/*=============*/
trx_t* trx) /*!< in: transaction */
diff --git a/storage/innobase/fts/fts0tlex.cc b/storage/innobase/fts/fts0tlex.cc
index 69b859716d5..44434c4ea25 100644
--- a/storage/innobase/fts/fts0tlex.cc
+++ b/storage/innobase/fts/fts0tlex.cc
@@ -35,7 +35,7 @@
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS 1
@@ -247,7 +247,7 @@ struct yy_buffer_state
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
-
+
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
@@ -368,10 +368,10 @@ struct yy_trans_info
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
-static yyconst flex_int16_t yy_accept[16] =
+static yyconst flex_int16_t yy_accept[17] =
{ 0,
- 4, 4, 7, 4, 1, 5, 1, 6, 2, 4,
- 1, 1, 0, 3, 0
+ 4, 4, 7, 4, 1, 5, 1, 6, 6, 2,
+ 4, 1, 1, 0, 3, 0
} ;
static yyconst flex_int32_t yy_ec[256] =
@@ -379,8 +379,8 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 4, 1, 5, 1, 1, 1, 1, 1, 1,
- 1, 6, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 4, 1, 5, 1, 1, 6, 1, 1, 1,
+ 1, 7, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -406,35 +406,35 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1
} ;
-static yyconst flex_int32_t yy_meta[7] =
+static yyconst flex_int32_t yy_meta[8] =
{ 0,
- 1, 2, 3, 4, 5, 1
+ 1, 2, 3, 4, 5, 5, 1
} ;
-static yyconst flex_int16_t yy_base[19] =
+static yyconst flex_int16_t yy_base[20] =
{ 0,
- 0, 0, 17, 0, 5, 20, 0, 8, 0, 0,
- 0, 0, 3, 20, 20, 9, 10, 14
+ 0, 0, 18, 0, 6, 21, 0, 9, 21, 0,
+ 0, 0, 0, 4, 21, 21, 10, 11, 15
} ;
-static yyconst flex_int16_t yy_def[19] =
+static yyconst flex_int16_t yy_def[20] =
{ 0,
- 15, 1, 15, 16, 16, 15, 17, 18, 16, 16,
- 5, 17, 18, 15, 0, 15, 15, 15
+ 16, 1, 16, 17, 17, 16, 18, 19, 16, 17,
+ 17, 5, 18, 19, 16, 0, 16, 16, 16
} ;
-static yyconst flex_int16_t yy_nxt[27] =
+static yyconst flex_int16_t yy_nxt[29] =
{ 0,
- 4, 5, 6, 7, 8, 9, 11, 14, 12, 10,
- 10, 12, 14, 12, 13, 13, 15, 13, 13, 3,
- 15, 15, 15, 15, 15, 15
+ 4, 5, 6, 7, 8, 9, 10, 12, 15, 13,
+ 11, 11, 13, 15, 13, 14, 14, 16, 14, 14,
+ 3, 16, 16, 16, 16, 16, 16, 16
} ;
-static yyconst flex_int16_t yy_chk[27] =
+static yyconst flex_int16_t yy_chk[29] =
{ 0,
- 1, 1, 1, 1, 1, 1, 5, 13, 5, 16,
- 16, 17, 8, 17, 18, 18, 3, 18, 18, 15,
- 15, 15, 15, 15, 15, 15
+ 1, 1, 1, 1, 1, 1, 1, 5, 14, 5,
+ 17, 17, 18, 8, 18, 19, 19, 3, 19, 19,
+ 16, 16, 16, 16, 16, 16, 16, 16
} ;
/* The intent behind this definition is that it'll catch
@@ -699,7 +699,7 @@ YY_DECL
register yy_state_type yy_current_state;
register char *yy_cp, *yy_bp;
register int yy_act;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
#line 44 "fts0tlex.l"
@@ -757,13 +757,13 @@ yy_match:
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 16 )
+ if ( yy_current_state >= 17 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
++yy_cp;
}
- while ( yy_current_state != 15 );
+ while ( yy_current_state != 16 );
yy_cp = yyg->yy_last_accepting_cpos;
yy_current_state = yyg->yy_last_accepting_state;
@@ -969,7 +969,7 @@ case YY_STATE_EOF(INITIAL):
*/
static int yy_get_next_buffer (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
register char *source = yyg->yytext_ptr;
register int number_to_move, i;
@@ -1035,9 +1035,9 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
else
b->yy_buf_size *= 2;
- b->yy_ch_buf = (char*)
+ b->yy_ch_buf = (char *)
/* Include room in for 2 EOB chars. */
- fts0trealloc((void*) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner );
+ fts0trealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner );
}
else
/* Can't grow it, we don't own it. */
@@ -1086,7 +1086,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
if ((yy_size_t) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
/* Extend the array by 50%, plus the number we really need. */
yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1);
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char*) fts0trealloc((void*) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner );
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) fts0trealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner );
if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
}
@@ -1106,7 +1106,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
{
register yy_state_type yy_current_state;
register char *yy_cp;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yy_current_state = yyg->yy_start;
@@ -1121,7 +1121,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 16 )
+ if ( yy_current_state >= 17 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -1138,7 +1138,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner)
{
register int yy_is_jam;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner; /* This var may be unused depending upon options. */
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */
register char *yy_cp = yyg->yy_c_buf_p;
register YY_CHAR yy_c = 1;
@@ -1150,11 +1150,11 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 16 )
+ if ( yy_current_state >= 17 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 15);
+ yy_is_jam = (yy_current_state == 16);
return yy_is_jam ? 0 : yy_current_state;
}
@@ -1168,7 +1168,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
{
int c;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
*yyg->yy_c_buf_p = yyg->yy_hold_char;
@@ -1226,7 +1226,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
}
}
- c = *(unsigned char*) yyg->yy_c_buf_p; /* cast for 8-bit char's */
+ c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */
*yyg->yy_c_buf_p = '\0'; /* preserve yytext */
yyg->yy_hold_char = *++yyg->yy_c_buf_p;
@@ -1241,7 +1241,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
*/
void fts0trestart (FILE * input_file , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if ( ! YY_CURRENT_BUFFER ){
fts0tensure_buffer_stack (yyscanner);
@@ -1259,7 +1259,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
*/
void fts0t_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
/* TODO. We should be able to replace this entire function body
* with
@@ -1291,7 +1291,7 @@ static int yy_get_next_buffer (yyscan_t yyscanner)
static void fts0t_load_buffer_state (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
@@ -1317,7 +1317,7 @@ static void fts0t_load_buffer_state (yyscan_t yyscanner)
/* yy_ch_buf has to be 2 characters longer than the size given because
* we need to put in 2 end-of-buffer characters.
*/
- b->yy_ch_buf = (char*) fts0talloc(b->yy_buf_size + 2 ,yyscanner );
+ b->yy_ch_buf = (char *) fts0talloc(b->yy_buf_size + 2 ,yyscanner );
if ( ! b->yy_ch_buf )
YY_FATAL_ERROR( "out of dynamic memory in fts0t_create_buffer()" );
@@ -1334,7 +1334,7 @@ static void fts0t_load_buffer_state (yyscan_t yyscanner)
*/
void fts0t_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if ( ! b )
return;
@@ -1343,9 +1343,9 @@ static void fts0t_load_buffer_state (yyscan_t yyscanner)
YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
if ( b->yy_is_our_buffer )
- fts0tfree((void*) b->yy_ch_buf ,yyscanner );
+ fts0tfree((void *) b->yy_ch_buf ,yyscanner );
- fts0tfree((void*) b ,yyscanner );
+ fts0tfree((void *) b ,yyscanner );
}
/* Initializes or reinitializes a buffer.
@@ -1356,7 +1356,7 @@ static void fts0t_load_buffer_state (yyscan_t yyscanner)
{
int oerrno = errno;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
fts0t_flush_buffer(b ,yyscanner);
@@ -1383,7 +1383,7 @@ static void fts0t_load_buffer_state (yyscan_t yyscanner)
*/
void fts0t_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if ( ! b )
return;
@@ -1413,7 +1413,7 @@ static void fts0t_load_buffer_state (yyscan_t yyscanner)
*/
void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if (new_buffer == NULL)
return;
@@ -1444,7 +1444,7 @@ void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
*/
void fts0tpop_buffer_state (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if (!YY_CURRENT_BUFFER)
return;
@@ -1465,7 +1465,7 @@ void fts0tpop_buffer_state (yyscan_t yyscanner)
static void fts0tensure_buffer_stack (yyscan_t yyscanner)
{
int num_to_alloc;
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if (!yyg->yy_buffer_stack) {
@@ -1474,7 +1474,7 @@ static void fts0tensure_buffer_stack (yyscan_t yyscanner)
* immediate realloc on the next call.
*/
num_to_alloc = 1;
- yyg->yy_buffer_stack = (struct yy_buffer_state**) fts0talloc
+ yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0talloc
(num_to_alloc * sizeof(struct yy_buffer_state*)
, yyscanner);
if ( ! yyg->yy_buffer_stack )
@@ -1493,7 +1493,7 @@ static void fts0tensure_buffer_stack (yyscan_t yyscanner)
int grow_size = 8 /* arbitrary grow size */;
num_to_alloc = yyg->yy_buffer_stack_max + grow_size;
- yyg->yy_buffer_stack = (struct yy_buffer_state**) fts0trealloc
+ yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0trealloc
(yyg->yy_buffer_stack,
num_to_alloc * sizeof(struct yy_buffer_state*)
, yyscanner);
@@ -1510,7 +1510,7 @@ static void fts0tensure_buffer_stack (yyscan_t yyscanner)
* @param base the character buffer
* @param size the size in bytes of the character buffer
* @param yyscanner The scanner object.
- * @return the newly allocated buffer state object.
+ * @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE fts0t_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner)
{
@@ -1571,7 +1571,7 @@ YY_BUFFER_STATE fts0t_scan_bytes (yyconst char * yybytes, int _yybytes_len , y
/* Get memory for full buffer, including space for trailing EOB's. */
n = _yybytes_len + 2;
- buf = (char*) fts0talloc(n ,yyscanner );
+ buf = (char *) fts0talloc(n ,yyscanner );
if ( ! buf )
YY_FATAL_ERROR( "out of dynamic memory in fts0t_scan_bytes()" );
@@ -1626,7 +1626,7 @@ static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute_
*/
YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yyextra;
}
@@ -1635,7 +1635,7 @@ YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner)
*/
int fts0tget_lineno (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if (! YY_CURRENT_BUFFER)
return 0;
@@ -1648,7 +1648,7 @@ int fts0tget_lineno (yyscan_t yyscanner)
*/
int fts0tget_column (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
if (! YY_CURRENT_BUFFER)
return 0;
@@ -1661,7 +1661,7 @@ int fts0tget_column (yyscan_t yyscanner)
*/
FILE *fts0tget_in (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yyin;
}
@@ -1670,7 +1670,7 @@ FILE *fts0tget_in (yyscan_t yyscanner)
*/
FILE *fts0tget_out (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yyout;
}
@@ -1679,7 +1679,7 @@ FILE *fts0tget_out (yyscan_t yyscanner)
*/
int fts0tget_leng (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yyleng;
}
@@ -1689,7 +1689,7 @@ int fts0tget_leng (yyscan_t yyscanner)
char *fts0tget_text (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yytext;
}
@@ -1699,7 +1699,7 @@ char *fts0tget_text (yyscan_t yyscanner)
*/
void fts0tset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yyextra = user_defined ;
}
@@ -1709,11 +1709,11 @@ void fts0tset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner)
*/
void fts0tset_lineno (int line_number , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
/* lineno is only valid if an input buffer exists. */
if (! YY_CURRENT_BUFFER )
- yy_fatal_error( "fts0tset_lineno called with no buffer" , yyscanner);
+ yy_fatal_error( "fts0tset_lineno called with no buffer" , yyscanner);
yylineno = line_number;
}
@@ -1724,11 +1724,11 @@ void fts0tset_lineno (int line_number , yyscan_t yyscanner)
*/
void fts0tset_column (int column_no , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
/* column is only valid if an input buffer exists. */
if (! YY_CURRENT_BUFFER )
- yy_fatal_error( "fts0tset_column called with no buffer" , yyscanner);
+ yy_fatal_error( "fts0tset_column called with no buffer" , yyscanner);
yycolumn = column_no;
}
@@ -1741,25 +1741,25 @@ void fts0tset_column (int column_no , yyscan_t yyscanner)
*/
void fts0tset_in (FILE * in_str , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yyin = in_str ;
}
void fts0tset_out (FILE * out_str , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yyout = out_str ;
}
int fts0tget_debug (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
return yy_flex_debug;
}
void fts0tset_debug (int bdebug , yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
yy_flex_debug = bdebug ;
}
@@ -1819,19 +1819,19 @@ int fts0tlex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals )
errno = ENOMEM;
return 1;
}
-
+
/* By setting to 0xAA, we expose bugs in
yy_init_globals. Leave at 0x00 for releases. */
memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
fts0tset_extra (yy_user_defined, *ptr_yy_globals);
-
+
return yy_init_globals ( *ptr_yy_globals );
}
static int yy_init_globals (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
/* Initialization is the same as for the non-reentrant scanner.
* This function is called from fts0tlex_destroy(), so don't allocate here.
*/
@@ -1839,7 +1839,7 @@ static int yy_init_globals (yyscan_t yyscanner)
yyg->yy_buffer_stack = 0;
yyg->yy_buffer_stack_top = 0;
yyg->yy_buffer_stack_max = 0;
- yyg->yy_c_buf_p = (char*) 0;
+ yyg->yy_c_buf_p = (char *) 0;
yyg->yy_init = 0;
yyg->yy_start = 0;
@@ -1852,8 +1852,8 @@ static int yy_init_globals (yyscan_t yyscanner)
yyin = stdin;
yyout = stdout;
#else
- yyin = (FILE*) 0;
- yyout = (FILE*) 0;
+ yyin = (FILE *) 0;
+ yyout = (FILE *) 0;
#endif
/* For future reference: Set errno on error, since we are called by
@@ -1865,7 +1865,7 @@ static int yy_init_globals (yyscan_t yyscanner)
/* fts0tlex_destroy is for both reentrant and non-reentrant scanners. */
int fts0tlex_destroy (yyscan_t yyscanner)
{
- struct yyguts_t * yyg = (struct yyguts_t*) yyscanner;
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
/* Pop the buffer stack, destroying each element. */
while(YY_CURRENT_BUFFER){
@@ -1918,24 +1918,24 @@ static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__(
void *fts0talloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)))
{
- return (void*) malloc( size );
+ return (void *) malloc( size );
}
void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)))
{
- /* The cast to (char*) in the following accommodates both
+ /* The cast to (char *) in the following accommodates both
* implementations that use char* generic pointers, and those
* that use void* generic pointers. It works with the latter
* because both ANSI C and C++ allow castless assignment from
* any pointer type to void*, and deal with argument conversions
* as though doing an assignment.
*/
- return (void*) realloc( (char*) ptr, size );
+ return (void *) realloc( (char *) ptr, size );
}
void fts0tfree (void * ptr , yyscan_t yyscanner __attribute__((unused)))
{
- free( (char*) ptr ); /* see fts0trealloc() for (char*) cast */
+ free( (char *) ptr ); /* see fts0trealloc() for (char *) cast */
}
#define YYTABLES_NAME "yytables"
diff --git a/storage/innobase/fts/fts0tlex.l b/storage/innobase/fts/fts0tlex.l
index 8b04a9fecf1..8c42678ac7a 100644
--- a/storage/innobase/fts/fts0tlex.l
+++ b/storage/innobase/fts/fts0tlex.l
@@ -57,7 +57,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
return(FTS_TEXT);
}
-[^" \n]* {
+[^" \n\%]* {
val->token = strdup(fts0tget_text(yyscanner));
return(FTS_TERM);
diff --git a/storage/innobase/ha/ha0ha.cc b/storage/innobase/ha/ha0ha.cc
index b58dc486cfa..3ec778f3bec 100644
--- a/storage/innobase/ha/ha0ha.cc
+++ b/storage/innobase/ha/ha0ha.cc
@@ -32,9 +32,7 @@ Created 8/22/1994 Heikki Tuuri
#ifdef UNIV_DEBUG
# include "buf0buf.h"
#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
# include "btr0sea.h"
-#endif /* !UNIV_HOTBACKUP */
#include "page0page.h"
/*************************************************************//**
@@ -79,7 +77,6 @@ ha_create_func(
return(table);
}
-#ifndef UNIV_HOTBACKUP
if (type == MEM_HEAP_FOR_PAGE_HASH) {
/* We create a hash table protected by rw_locks for
buf_pool->page_hash. */
@@ -97,7 +94,6 @@ ha_create_func(
table->heaps[i] = mem_heap_create_typed(4096, type);
ut_a(table->heaps[i]);
}
-#endif /* !UNIV_HOTBACKUP */
return(table);
}
@@ -120,7 +116,6 @@ ha_clear(
|| rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
#endif /* UNIV_SYNC_DEBUG */
-#ifndef UNIV_HOTBACKUP
/* Free the memory heaps. */
n = table->n_sync_obj;
@@ -151,7 +146,6 @@ ha_clear(
table->n_sync_obj = 0;
table->type = HASH_TABLE_SYNC_NONE;
-#endif /* !UNIV_HOTBACKUP */
/* Clear the hash table. */
n = hash_get_n_cells(table);
@@ -179,7 +173,7 @@ ha_insert_for_fold_func(
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
buf_block_t* block, /*!< in: buffer block containing the data */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- const rec_t* data) /*!< in: data, must not be NULL */
+ rec_t* data) /*!< in: data, must not be NULL */
{
hash_cell_t* cell;
ha_node_t* node;
@@ -215,7 +209,7 @@ ha_insert_for_fold_func(
prev_node->block = block;
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- prev_node->data = (rec_t*) data;
+ prev_node->data = data;
return(TRUE);
}
@@ -237,7 +231,7 @@ ha_insert_for_fold_func(
return(FALSE);
}
- ha_node_set_data(node, block, (rec_t*) data);
+ ha_node_set_data(node, block, data);
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
if (table->adaptive) {
diff --git a/storage/innobase/ha/hash0hash.cc b/storage/innobase/ha/hash0hash.cc
index 99128a676d5..174b6bcb57e 100644
--- a/storage/innobase/ha/hash0hash.cc
+++ b/storage/innobase/ha/hash0hash.cc
@@ -106,14 +106,14 @@ void
hash_mutex_exit_all_but(
/*====================*/
hash_table_t* table, /*!< in: hash table */
- mutex_t* keep_mutex) /*!< in: mutex to keep */
+ ib_mutex_t* keep_mutex) /*!< in: mutex to keep */
{
ulint i;
ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
for (i = 0; i < table->n_sync_obj; i++) {
- mutex_t* mutex = table->sync_obj.mutexes + i;
+ ib_mutex_t* mutex = table->sync_obj.mutexes + i;
if (UNIV_LIKELY(keep_mutex != mutex)) {
mutex_exit(mutex);
}
@@ -373,8 +373,8 @@ hash_create_sync_obj_func(
switch (type) {
case HASH_TABLE_SYNC_MUTEX:
- table->sync_obj.mutexes = static_cast<mutex_t*>(
- mem_alloc(n_sync_obj * sizeof(mutex_t)));
+ table->sync_obj.mutexes = static_cast<ib_mutex_t*>(
+ mem_alloc(n_sync_obj * sizeof(ib_mutex_t)));
for (i = 0; i < n_sync_obj; i++) {
mutex_create(hash_table_mutex_key,
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index e19fe47e81a..44bbe20c8d3 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -3,6 +3,7 @@
Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
+Copyright (c) 2012, Facebook Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -36,8 +37,10 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include <sql_acl.h> // PROCESS_ACL
#include <debug_sync.h> // DEBUG_SYNC
+#include <my_base.h> // HA_OPTION_*
#include <mysys_err.h>
#include <innodb_priv.h>
+
#ifdef _WIN32
#include <io.h>
#endif
@@ -57,8 +60,10 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include "srv0srv.h"
#include "trx0roll.h"
#include "trx0trx.h"
+
#include "trx0sys.h"
#include "mtr0mtr.h"
+#include "rem0types.h"
#include "row0ins.h"
#include "row0mysql.h"
#include "row0sel.h"
@@ -75,14 +80,24 @@ this program; if not, write to the Free Software Foundation, Inc.,
#include "row0merge.h"
#include "dict0boot.h"
#include "dict0stats.h"
+#include "dict0stats_bg.h"
#include "ha_prototypes.h"
#include "ut0mem.h"
#include "ibuf0ibuf.h"
#include "dict0dict.h"
#include "srv0mon.h"
+#include "api0api.h"
+#include "api0misc.h"
#include "pars0pars.h"
#include "fts0fts.h"
#include "fts0types.h"
+#include "row0import.h"
+#include "row0quiesce.h"
+#ifdef UNIV_DEBUG
+#include "trx0purge.h"
+#endif /* UNIV_DEBUG */
+#include "fts0priv.h"
+#include "page0zip.h"
#include "ha_innodb.h"
#include "i_s.h"
@@ -112,11 +127,9 @@ static const long AUTOINC_NEW_STYLE_LOCKING = 1;
static const long AUTOINC_NO_LOCKING = 2;
static long innobase_mirrored_log_groups;
-static long innobase_log_files_in_group;
static long innobase_log_buffer_size;
static long innobase_additional_mem_pool_size;
static long innobase_file_io_threads;
-static long innobase_force_recovery;
static long innobase_open_files;
static long innobase_autoinc_lock_mode;
static ulong innobase_commit_concurrency = 0;
@@ -134,12 +147,13 @@ static uint innobase_old_blocks_pct;
of the buffer pool. */
static uint innobase_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE;
+static ulong innobase_compression_level = DEFAULT_COMPRESSION_LEVEL;
+
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
static char* innobase_data_home_dir = NULL;
static char* innobase_data_file_path = NULL;
-static char* innobase_log_group_home_dir = NULL;
static char* innobase_file_format_name = NULL;
static char* innobase_change_buffering = NULL;
static char* innobase_enable_monitor_counter = NULL;
@@ -176,7 +190,6 @@ static my_bool innobase_stats_on_metadata = TRUE;
static my_bool innobase_large_prefix = FALSE;
static my_bool innodb_optimize_fulltext_only = FALSE;
-
static char* internal_innobase_data_file_path = NULL;
static char* innodb_version_str = (char*) INNODB_VERSION_STR;
@@ -250,6 +263,11 @@ const struct _ft_vft ft_vft_result = {NULL,
innobase_fts_retrieve_ranking,
NULL};
+const struct _ft_vft_ext ft_vft_ext_result = {innobase_fts_get_version,
+ innobase_fts_flags,
+ innobase_fts_retrieve_docid,
+ innobase_fts_count_matches};
+
#ifdef HAVE_PSI_INTERFACE
/* Keys to register pthread mutexes/cond in the current file with
performance schema */
@@ -262,8 +280,7 @@ static mysql_pfs_key_t pending_checkpoint_mutex_key;
static PSI_mutex_info all_pthread_mutexes[] = {
{&commit_threads_m_key, "commit_threads_m", 0},
{&commit_cond_mutex_key, "commit_cond_mutex", 0},
- {&innobase_share_mutex_key, "innobase_share_mutex", 0},
- {&pending_checkpoint_mutex_key, "pending_checkpoint_mutex", 0}
+ {&innobase_share_mutex_key, "innobase_share_mutex", 0}
};
static PSI_cond_info all_innodb_conds[] = {
@@ -306,8 +323,10 @@ static PSI_mutex_info all_innodb_mutexes[] = {
# endif /* UNIV_MEM_DEBUG */
{&mem_pool_mutex_key, "mem_pool_mutex", 0},
{&mutex_list_mutex_key, "mutex_list_mutex", 0},
+ {&page_zip_stat_per_index_mutex_key, "page_zip_stat_per_index_mutex", 0},
{&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0},
{&recv_sys_mutex_key, "recv_sys_mutex", 0},
+ {&recv_writer_mutex_key, "recv_writer_mutex", 0},
{&rseg_mutex_key, "rseg_mutex", 0},
# ifdef UNIV_SYNC_DEBUG
{&rw_lock_debug_mutex_key, "rw_lock_debug_mutex", 0},
@@ -336,8 +355,12 @@ static PSI_mutex_info all_innodb_mutexes[] = {
#ifndef HAVE_ATOMIC_BUILTINS
{&srv_conc_mutex_key, "srv_conc_mutex", 0},
#endif /* !HAVE_ATOMIC_BUILTINS */
+#ifndef HAVE_ATOMIC_BUILTINS_64
+ {&monitor_mutex_key, "monitor_mutex", 0},
+#endif /* !HAVE_ATOMIC_BUILTINS_64 */
{&ut_list_mutex_key, "ut_list_mutex", 0},
{&trx_sys_mutex_key, "trx_sys_mutex", 0},
+ {&zip_pad_mutex_key, "zip_pad_mutex", 0},
};
# endif /* UNIV_PFS_MUTEX */
@@ -364,6 +387,7 @@ static PSI_rwlock_info all_innodb_rwlocks[] = {
{&trx_i_s_cache_lock_key, "trx_i_s_cache_lock", 0},
{&trx_purge_latch_key, "trx_purge_latch", 0},
{&index_tree_rw_lock_key, "index_tree_rw_lock", 0},
+ {&index_online_log_key, "index_online_log", 0},
{&dict_table_stats_latch_key, "dict_table_stats", 0},
{&hash_table_rw_lock_key, "hash table locks", 0}
};
@@ -381,7 +405,8 @@ static PSI_thread_info all_innodb_threads[] = {
{&srv_monitor_thread_key, "srv_monitor_thread", 0},
{&srv_master_thread_key, "srv_master_thread", 0},
{&srv_purge_thread_key, "srv_purge_thread", 0},
- {&buf_page_cleaner_thread_key, "page_cleaner_thread", 0}
+ {&buf_page_cleaner_thread_key, "page_cleaner_thread", 0},
+ {&recv_writer_thread_key, "recovery writer thread", 0}
};
# endif /* UNIV_PFS_THREAD */
@@ -396,6 +421,70 @@ static PSI_file_info all_innodb_files[] = {
# endif /* UNIV_PFS_IO */
#endif /* HAVE_PSI_INTERFACE */
+/** Always normalize table name to lower case on Windows */
+#ifdef __WIN__
+#define normalize_table_name(norm_name, name) \
+ normalize_table_name_low(norm_name, name, TRUE)
+#else
+#define normalize_table_name(norm_name, name) \
+ normalize_table_name_low(norm_name, name, FALSE)
+#endif /* __WIN__ */
+
+/** Set up InnoDB API callback function array */
+ib_cb_t innodb_api_cb[] = {
+ (ib_cb_t) ib_cursor_open_table,
+ (ib_cb_t) ib_cursor_read_row,
+ (ib_cb_t) ib_cursor_insert_row,
+ (ib_cb_t) ib_cursor_delete_row,
+ (ib_cb_t) ib_cursor_update_row,
+ (ib_cb_t) ib_cursor_moveto,
+ (ib_cb_t) ib_cursor_first,
+ (ib_cb_t) ib_cursor_next,
+ (ib_cb_t) ib_cursor_last,
+ (ib_cb_t) ib_cursor_set_match_mode,
+ (ib_cb_t) ib_sec_search_tuple_create,
+ (ib_cb_t) ib_clust_read_tuple_create,
+ (ib_cb_t) ib_tuple_delete,
+ (ib_cb_t) ib_tuple_copy,
+ (ib_cb_t) ib_tuple_read_u32,
+ (ib_cb_t) ib_tuple_write_u32,
+ (ib_cb_t) ib_tuple_read_u64,
+ (ib_cb_t) ib_tuple_write_u64,
+ (ib_cb_t) ib_tuple_read_i32,
+ (ib_cb_t) ib_tuple_write_i32,
+ (ib_cb_t) ib_tuple_read_i64,
+ (ib_cb_t) ib_tuple_write_i64,
+ (ib_cb_t) ib_tuple_get_n_cols,
+ (ib_cb_t) ib_col_set_value,
+ (ib_cb_t) ib_col_get_value,
+ (ib_cb_t) ib_col_get_meta,
+ (ib_cb_t) ib_trx_begin,
+ (ib_cb_t) ib_trx_commit,
+ (ib_cb_t) ib_trx_rollback,
+ (ib_cb_t) ib_trx_start,
+ (ib_cb_t) ib_trx_release,
+ (ib_cb_t) ib_trx_state,
+ (ib_cb_t) ib_cursor_lock,
+ (ib_cb_t) ib_cursor_close,
+ (ib_cb_t) ib_cursor_new_trx,
+ (ib_cb_t) ib_cursor_reset,
+ (ib_cb_t) ib_open_table_by_name,
+ (ib_cb_t) ib_col_get_name,
+ (ib_cb_t) ib_table_truncate,
+ (ib_cb_t) ib_cursor_open_index_using_name,
+ (ib_cb_t) ib_close_thd,
+ (ib_cb_t) ib_cfg_get_cfg,
+ (ib_cb_t) ib_cursor_set_cluster_access,
+ (ib_cb_t) ib_cursor_commit_trx,
+ (ib_cb_t) ib_cfg_trx_level,
+ (ib_cb_t) ib_tuple_get_n_user_cols,
+ (ib_cb_t) ib_cursor_set_lock_mode,
+ (ib_cb_t) ib_cursor_clear_trx,
+ (ib_cb_t) ib_get_idx_field_name,
+ (ib_cb_t) ib_trx_get_start_time,
+ (ib_cb_t) ib_cfg_bk_commit_interval
+};
+
/*************************************************************//**
Check whether valid argument given to innodb_ft_*_stopword_table.
This function is registered as a callback with MySQL.
@@ -410,24 +499,10 @@ innodb_stopword_table_validate(
void* save, /*!< out: immediate result
for update function */
struct st_mysql_value* value); /*!< in: incoming string */
-/****************************************************************//**
-Update the session variable innodb_session_stopword_table
-with the "saved" stopword table name value. This function
-is registered as a callback with MySQL. */
-static
-void
-innodb_session_stopword_update(
-/*===========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save); /*!< in: immediate result
- from check function */
-/** "GEN_CLUST_INDEX" is the name reserved for Innodb default
-system primary index. */
-static const char innobase_index_reserve_name[]= "GEN_CLUST_INDEX";
+
+/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
+system clustered index when there is no primary key. */
+const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX";
static const char innobase_hton_name[]= "InnoDB";
@@ -450,19 +525,14 @@ static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG,
NULL, NULL,
/* default */ TRUE);
-static MYSQL_THDVAR_BOOL(analyze_is_persistent, PLUGIN_VAR_OPCMDARG,
- "ANALYZE TABLE in InnoDB uses a more precise (and slow) sampling "
- "algorithm and saves the results persistently.",
- /* check_func */ NULL, /* update_func */ NULL,
- /* default */ FALSE);
-
static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
"Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
-static MYSQL_THDVAR_STR(ft_user_stopword_table, PLUGIN_VAR_OPCMDARG,
+static MYSQL_THDVAR_STR(ft_user_stopword_table,
+ PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
"User supplied stopword table name, effective in the session level.",
- innodb_stopword_table_validate, innodb_session_stopword_update, NULL);
+ innodb_stopword_table_validate, NULL, NULL);
static SHOW_VAR innodb_status_variables[]= {
{"buffer_pool_dump_status",
@@ -471,8 +541,12 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_buffer_pool_load_status, SHOW_CHAR},
{"buffer_pool_pages_data",
(char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG},
+ {"buffer_pool_bytes_data",
+ (char*) &export_vars.innodb_buffer_pool_bytes_data, SHOW_LONG},
{"buffer_pool_pages_dirty",
(char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG},
+ {"buffer_pool_bytes_dirty",
+ (char*) &export_vars.innodb_buffer_pool_bytes_dirty, SHOW_LONG},
{"buffer_pool_pages_flushed",
(char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG},
{"buffer_pool_pages_free",
@@ -567,6 +641,12 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_truncated_status_writes, SHOW_LONG},
{"available_undo_logs",
(char*) &export_vars.innodb_available_undo_logs, SHOW_LONG},
+#ifdef UNIV_DEBUG
+ {"purge_trx_id_age",
+ (char*) &export_vars.innodb_purge_trx_id_age, SHOW_LONG},
+ {"purge_view_trx_id_age",
+ (char*) &export_vars.innodb_purge_view_trx_id_age, SHOW_LONG},
+#endif /* UNIV_DEBUG */
{NullS, NullS, SHOW_LONG}
};
@@ -598,18 +678,8 @@ innobase_close_connection(
THD* thd); /*!< in: MySQL thread handle for
which to close the connection */
-static
-void
-innobase_commit_ordered(
-/*======================*/
- handlerton *hton, /*!< in/out: Innodb handlerton */
- THD* thd, /*!< in: MySQL thread handle */
- bool all); /*!< in: TRUE - commit transaction
- FALSE - the current SQL statement
- ended */
-static
-void
-innobase_kill_query(handlerton *hton, THD* thd, enum thd_kill_levels level);
+static void innobase_kill_query(handlerton *hton, THD* thd, enum thd_kill_levels level);
+static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
/*****************************************************************//**
Commits a transaction in an InnoDB database or marks an SQL statement
@@ -684,14 +754,7 @@ innobase_release_savepoint(
savepoint should be released */
void* savepoint); /*!< in: savepoint data */
-/*****************************************************************//**
-Handle a commit checkpoint request from server layer.
-We simply flush the redo log immediately and do the notify call.*/
-static
-void
-innobase_checkpoint_request(
- handlerton *hton,
- void *cookie);
+static void innobase_checkpoint_request(handlerton *hton, void *cookie);
/************************************************************************//**
Function for constructing an InnoDB table handler instance. */
@@ -745,13 +808,6 @@ int
innobase_file_format_validate_and_set(
/*==================================*/
const char* format_max); /*!< in: parameter value */
-/****************************************************************//**
-Return alter table flags supported in an InnoDB database. */
-static
-uint
-innobase_alter_table_flags(
-/*=======================*/
- uint flags);
/*******************************************************************//**
This function is used to prepare an X/Open XA distributed transaction.
@@ -925,6 +981,21 @@ innodb_enable_monitor_at_startup(
/*=============================*/
char* str); /*!< in: monitor counter enable list */
+/*********************************************************************
+Normalizes a table name string. A normalized name consists of the
+database name catenated to '/' and table name. An example:
+test/mytable. On Windows normalization puts both the database name and the
+table name always to lower case if "set_lower_case" is set to TRUE. */
+static
+void
+normalize_table_name_low(
+/*=====================*/
+ char* norm_name, /* out: normalized name as a
+ null-terminated string */
+ const char* name, /* in: table name string */
+ ibool set_lower_case); /* in: TRUE if we want to set
+ name to lower case */
+
/*************************************************************//**
Check for a valid value of innobase_commit_concurrency.
@return 0 for valid innodb_commit_concurrency */
@@ -967,7 +1038,7 @@ innobase_create_handler(
TABLE_SHARE* table,
MEM_ROOT* mem_root)
{
- return new (mem_root) ha_innobase(hton, table);
+ return(new (mem_root) ha_innobase(hton, table));
}
/* General functions */
@@ -1008,9 +1079,22 @@ UNIV_INTERN
ibool
thd_is_replication_slave_thread(
/*============================*/
- void* thd) /*!< in: thread handle (THD*) */
+ THD* thd) /*!< in: thread handle */
+{
+ return((ibool) thd_slave_thread(thd));
+}
+
+/******************************************************************//**
+Gets information on the durability property requested by thread.
+Used when writing either a prepare or commit record to the log
+buffer. @return the durability property. */
+UNIV_INTERN
+enum durability_properties
+thd_requested_durability(
+/*=====================*/
+ const THD* thd) /*!< in: thread handle */
{
- return((ibool) thd_slave_thread((THD*) thd));
+ return(thd_get_durability_property(thd));
}
/******************************************************************//**
@@ -1020,10 +1104,9 @@ UNIV_INTERN
ibool
thd_trx_is_read_only(
/*=================*/
- void* thd) /*!< in: thread handle (THD*) */
+ THD* thd) /*!< in: thread handle */
{
- /* Waiting on WL#6046 to complete. */
- return(FALSE);
+ return(thd != 0 && thd_tx_is_read_only(thd));
}
/******************************************************************//**
@@ -1034,11 +1117,11 @@ UNIV_INTERN
ibool
thd_trx_is_auto_commit(
/*===================*/
- void* thd) /*!< in: thread handle (THD*) can be NULL */
+ THD* thd) /*!< in: thread handle, can be NULL */
{
return(thd != NULL
&& !thd_test_options(
- static_cast<THD*>(thd),
+ thd,
OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
&& thd_is_select(thd));
}
@@ -1114,6 +1197,17 @@ innobase_srv_conc_force_exit_innodb(
}
/******************************************************************//**
+Returns the NUL terminated value of glob_hostname.
+@return pointer to glob_hostname. */
+UNIV_INTERN
+const char*
+server_get_hostname()
+/*=================*/
+{
+ return(glob_hostname);
+}
+
+/******************************************************************//**
Returns true if the transaction this thread is processing has edited
non-transactional tables. Used by the deadlock detector when deciding
which transaction to rollback in case of a deadlock - we try to avoid
@@ -1123,9 +1217,9 @@ UNIV_INTERN
ibool
thd_has_edited_nontrans_tables(
/*===========================*/
- void* thd) /*!< in: thread handle (THD*) */
+ THD* thd) /*!< in: thread handle */
{
- return((ibool) thd_non_transactional_update((THD*) thd));
+ return((ibool) thd_non_transactional_update(thd));
}
/******************************************************************//**
@@ -1135,9 +1229,9 @@ UNIV_INTERN
ibool
thd_is_select(
/*==========*/
- const void* thd) /*!< in: thread handle (THD*) */
+ const THD* thd) /*!< in: thread handle */
{
- return(thd_sql_command((const THD*) thd) == SQLCOM_SELECT);
+ return(thd_sql_command(thd) == SQLCOM_SELECT);
}
/******************************************************************//**
@@ -1148,10 +1242,10 @@ UNIV_INTERN
ibool
thd_supports_xa(
/*============*/
- void* thd) /*!< in: thread handle (THD*), or NULL to query
+ THD* thd) /*!< in: thread handle, or NULL to query
the global innodb_supports_xa */
{
- return(THDVAR((THD*) thd, support_xa));
+ return(THDVAR(thd, support_xa));
}
/******************************************************************//**
@@ -1161,12 +1255,12 @@ UNIV_INTERN
ulong
thd_lock_wait_timeout(
/*==================*/
- void* thd) /*!< in: thread handle (THD*), or NULL to query
+ THD* thd) /*!< in: thread handle, or NULL to query
the global innodb_lock_wait_timeout */
{
/* According to <mysql/plugin.h>, passing thd == NULL
returns the global value of the session variable. */
- return(THDVAR((THD*) thd, lock_wait_timeout));
+ return(THDVAR(thd, lock_wait_timeout));
}
/******************************************************************//**
@@ -1175,17 +1269,18 @@ UNIV_INTERN
void
thd_set_lock_wait_time(
/*===================*/
- void* thd, /*!< in: thread handle (THD*) */
+ THD* thd, /*!< in/out: thread handle */
ulint value) /*!< in: time waited for the lock */
{
if (thd) {
- thd_storage_lock_wait((THD*) thd, value);
+ thd_storage_lock_wait(thd, value);
}
}
/********************************************************************//**
Obtain the InnoDB transaction of a MySQL thread.
@return reference to transaction pointer */
+__attribute__((warn_unused_result, nonnull))
static inline
trx_t*&
thd_to_trx(
@@ -1245,11 +1340,11 @@ Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock.
@return MySQL error code */
-UNIV_INTERN
+static
int
convert_error_code_to_mysql(
/*========================*/
- int error, /*!< in: InnoDB error code */
+ dberr_t error, /*!< in: InnoDB error code */
ulint flags, /*!< in: InnoDB table flags, or 0 */
THD* thd) /*!< in: user thread handle or NULL */
{
@@ -1287,7 +1382,7 @@ convert_error_code_to_mysql(
return(HA_ERR_FOUND_DUPP_KEY);
case DB_READ_ONLY:
- return(HA_ERR_READ_ONLY_TRANSACTION);
+ return(HA_ERR_TABLE_READONLY);
case DB_FOREIGN_DUPLICATE_KEY:
return(HA_ERR_FOREIGN_DUPLICATE_KEY);
@@ -1344,12 +1439,19 @@ convert_error_code_to_mysql(
case DB_OUT_OF_FILE_SPACE:
return(HA_ERR_RECORD_FILE_FULL);
+ case DB_TABLE_IN_FK_CHECK:
+ return(HA_ERR_TABLE_IN_FK_CHECK);
+
case DB_TABLE_IS_BEING_USED:
return(HA_ERR_WRONG_COMMAND);
+ case DB_TABLESPACE_DELETED:
case DB_TABLE_NOT_FOUND:
return(HA_ERR_NO_SUCH_TABLE);
+ case DB_TABLESPACE_NOT_FOUND:
+ return(HA_ERR_NO_SUCH_TABLE);
+
case DB_TOO_BIG_RECORD: {
/* If prefix is true then a 768-byte prefix is stored
locally for BLOB fields. Refer to dict_table_get_format() */
@@ -1365,7 +1467,7 @@ convert_error_code_to_mysql(
"or ROW_FORMAT=COMPRESSED ": "",
prefix ? DICT_MAX_FIXED_COL_LEN : 0);
return(HA_ERR_TO_BIG_ROW);
- }
+ }
case DB_TOO_BIG_INDEX_COL:
my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
@@ -1386,27 +1488,21 @@ convert_error_code_to_mysql(
return(HA_ERR_LOCK_TABLE_FULL);
- case DB_PRIMARY_KEY_IS_NULL:
- return(ER_PRIMARY_CANT_HAVE_NULL);
-
case DB_FTS_INVALID_DOCID:
return(HA_FTS_INVALID_DOCID);
case DB_TOO_MANY_CONCURRENT_TRXS:
- /* New error code HA_ERR_TOO_MANY_CONCURRENT_TRXS is only
- available in 5.1.38 and later, but the plugin should still
- work with previous versions of MySQL. */
-#ifdef HA_ERR_TOO_MANY_CONCURRENT_TRXS
return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
-#else /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
- return(HA_ERR_RECORD_FILE_FULL);
-#endif /* HA_ERR_TOO_MANY_CONCURRENT_TRXS */
case DB_UNSUPPORTED:
return(HA_ERR_UNSUPPORTED);
case DB_INDEX_CORRUPT:
return(HA_ERR_INDEX_CORRUPT);
case DB_UNDO_RECORD_TOO_BIG:
return(HA_ERR_UNDO_REC_TOO_BIG);
+ case DB_OUT_OF_MEMORY:
+ return(HA_ERR_OUT_OF_MEM);
+ case DB_TABLESPACE_EXISTS:
+ return(HA_ERR_TABLESPACE_EXISTS);
}
}
@@ -1417,18 +1513,30 @@ void
innobase_mysql_print_thd(
/*=====================*/
FILE* f, /*!< in: output stream */
- void* thd, /*!< in: pointer to a MySQL THD object */
+ THD* thd, /*!< in: MySQL THD object */
uint max_query_len) /*!< in: max query length to print, or 0 to
use the default max length */
{
char buffer[1024];
- fputs(thd_security_context((THD*) thd, buffer, sizeof buffer,
+ fputs(thd_security_context(thd, buffer, sizeof buffer,
max_query_len), f);
putc('\n', f);
}
/******************************************************************//**
+Get the error message format string.
+@return the format string or 0 if not found. */
+UNIV_INTERN
+const char*
+innobase_get_err_msg(
+/*=================*/
+ int error_code) /*!< in: MySQL error code */
+{
+ return(my_get_err_msg(error_code));
+}
+
+/******************************************************************//**
Get the variable length bounds of the given character set. */
UNIV_INTERN
void
@@ -1456,7 +1564,7 @@ innobase_get_cset_width(
/* Fix bug#46256: allow tables to be dropped if the
collation is not found, but issue a warning. */
- if ((global_system_variables.log_warnings)
+ if ((log_warnings)
&& (cset != 0)){
sql_print_warning(
@@ -1572,9 +1680,9 @@ UNIV_INTERN
struct charset_info_st*
innobase_get_charset(
/*=================*/
- void* mysql_thd) /*!< in: MySQL thread handle */
+ THD* mysql_thd) /*!< in: MySQL thread handle */
{
- return(thd_charset((THD*) mysql_thd));
+ return(thd_charset(mysql_thd));
}
/**********************************************************************//**
@@ -1584,12 +1692,12 @@ UNIV_INTERN
const char*
innobase_get_stmt(
/*==============*/
- void* mysql_thd, /*!< in: MySQL thread handle */
+ THD* thd, /*!< in: MySQL thread handle */
size_t* length) /*!< out: length of the SQL statement */
{
LEX_STRING* stmt;
- stmt = thd_query_string((THD*) mysql_thd);
+ stmt = thd_query_string(thd);
*length = stmt->length;
return(stmt->str);
}
@@ -1621,99 +1729,6 @@ innobase_get_lower_case_table_names(void)
return(lower_case_table_names);
}
-#if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN)
-extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list;
-/*******************************************************************//**
-Map an OS error to an errno value. The OS error number is stored in
-_doserrno and the mapped value is stored in errno) */
-void __cdecl
-_dosmaperr(
- unsigned long); /*!< in: OS error value */
-
-/*********************************************************************//**
-Creates a temporary file.
-@return temporary file descriptor, or < 0 on error */
-UNIV_INTERN
-int
-innobase_mysql_tmpfile(void)
-/*========================*/
-{
- int fd; /* handle of opened file */
- HANDLE osfh; /* OS handle of opened file */
- char* tmpdir; /* point to the directory
- where to create file */
- TCHAR path_buf[MAX_PATH - 14]; /* buffer for tmp file path.
- The length cannot be longer
- than MAX_PATH - 14, or
- GetTempFileName will fail. */
- char filename[MAX_PATH]; /* name of the tmpfile */
- DWORD fileaccess = GENERIC_READ /* OS file access */
- | GENERIC_WRITE
- | DELETE;
- DWORD fileshare = FILE_SHARE_READ /* OS file sharing mode */
- | FILE_SHARE_WRITE
- | FILE_SHARE_DELETE;
- DWORD filecreate = CREATE_ALWAYS; /* OS method of open/create */
- DWORD fileattrib = /* OS file attribute flags */
- FILE_ATTRIBUTE_NORMAL
- | FILE_FLAG_DELETE_ON_CLOSE
- | FILE_ATTRIBUTE_TEMPORARY
- | FILE_FLAG_SEQUENTIAL_SCAN;
-
- DBUG_ENTER("innobase_mysql_tmpfile");
-
- tmpdir = my_tmpdir(&mysql_tmpdir_list);
-
- /* The tmpdir parameter can not be NULL for GetTempFileName. */
- if (!tmpdir) {
- uint ret;
-
- /* Use GetTempPath to determine path for temporary files. */
- ret = GetTempPath(sizeof(path_buf), path_buf);
- if (ret > sizeof(path_buf) || (ret == 0)) {
-
- _dosmaperr(GetLastError()); /* map error */
- DBUG_RETURN(-1);
- }
-
- tmpdir = path_buf;
- }
-
- /* Use GetTempFileName to generate a unique filename. */
- if (!GetTempFileName(tmpdir, "ib", 0, filename)) {
-
- _dosmaperr(GetLastError()); /* map error */
- DBUG_RETURN(-1);
- }
-
- DBUG_PRINT("info", ("filename: %s", filename));
-
- /* Open/Create the file. */
- osfh = CreateFile(filename, fileaccess, fileshare, NULL,
- filecreate, fileattrib, NULL);
- if (osfh == INVALID_HANDLE_VALUE) {
-
- /* open/create file failed! */
- _dosmaperr(GetLastError()); /* map error */
- DBUG_RETURN(-1);
- }
-
- do {
- /* Associates a CRT file descriptor with the OS file handle. */
- fd = _open_osfhandle((intptr_t) osfh, 0);
- } while (fd == -1 && errno == EINTR);
-
- if (fd == -1) {
- /* Open failed, close the file handle. */
-
- _dosmaperr(GetLastError()); /* map error */
- CloseHandle(osfh); /* no need to check if
- CloseHandle fails */
- }
-
- DBUG_RETURN(fd);
-}
-#else
/*********************************************************************//**
Creates a temporary file.
@return temporary file descriptor, or < 0 on error */
@@ -1724,6 +1739,9 @@ innobase_mysql_tmpfile(void)
{
int fd2 = -1;
File fd = mysql_tmpfile("ib");
+
+ DBUG_EXECUTE_IF("innobase_tmpfile_creation_failure", return(-1););
+
if (fd >= 0) {
/* Copy the file descriptor, so that the additional resources
allocated by create_temp_file() can be freed by invoking
@@ -1767,7 +1785,6 @@ innobase_mysql_tmpfile(void)
}
return(fd2);
}
-#endif /* defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) */
/*********************************************************************//**
Wrapper around MySQL's copy_and_convert function.
@@ -1845,11 +1862,11 @@ values we want to reserve for multi-value inserts e.g.,
INSERT INTO T VALUES(), (), ();
-innobase_next_autoinc() will be called with increment set to
-n * 3 where autoinc_lock_mode != TRADITIONAL because we want
-to reserve 3 values for the multi-value INSERT above.
+innobase_next_autoinc() will be called with increment set to 3 where
+autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
+the multi-value INSERT above.
@return the next value */
-static
+UNIV_INTERN
ulonglong
innobase_next_autoinc(
/*==================*/
@@ -1886,6 +1903,7 @@ innobase_next_autoinc(
in reality a negative value.The visual studio compilers converts
large double values automatically into unsigned long long datatype
maximum value */
+
if (block >= max_value
|| offset > max_value
|| current >= max_value
@@ -2055,7 +2073,7 @@ trx_deregister_from_2pc(
trx_t* trx) /* in: transaction */
{
trx->is_registered = 0;
- trx->active_commit_ordered = 0;
+ trx->active_commit_ordered = 0;
}
/*********************************************************************//**
@@ -2082,6 +2100,78 @@ trx_is_started(
}
/*********************************************************************//**
+Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
+Those flags are stored in .frm file and end up in the MySQL table object,
+but are frequently used inside InnoDB so we keep their copies into the
+InnoDB table object. */
+UNIV_INTERN
+void
+innobase_copy_frm_flags_from_create_info(
+/*=====================================*/
+ dict_table_t* innodb_table, /*!< in/out: InnoDB table */
+ HA_CREATE_INFO* create_info) /*!< in: create info */
+{
+ ibool ps_on;
+ ibool ps_off;
+
+ if (dict_table_is_temporary(innodb_table) || srv_read_only_mode) {
+ /* Temp tables do not use persistent stats. */
+ ps_on = FALSE;
+ ps_off = TRUE;
+ } else {
+ ps_on = create_info->table_options
+ & HA_OPTION_STATS_PERSISTENT;
+ ps_off = create_info->table_options
+ & HA_OPTION_NO_STATS_PERSISTENT;
+ }
+
+ dict_stats_set_persistent(innodb_table, ps_on, ps_off);
+
+ dict_stats_auto_recalc_set(
+ innodb_table,
+ create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
+ create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
+
+ innodb_table->stats_sample_pages = create_info->stats_sample_pages;
+}
+
+/*********************************************************************//**
+Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
+Those flags are stored in .frm file and end up in the MySQL table object,
+but are frequently used inside InnoDB so we keep their copies into the
+InnoDB table object. */
+UNIV_INTERN
+void
+innobase_copy_frm_flags_from_table_share(
+/*=====================================*/
+ dict_table_t* innodb_table, /*!< in/out: InnoDB table */
+ TABLE_SHARE* table_share) /*!< in: table share */
+{
+ ibool ps_on;
+ ibool ps_off;
+
+ if (dict_table_is_temporary(innodb_table) || srv_read_only_mode) {
+ /* Temp tables do not use persistent stats */
+ ps_on = FALSE;
+ ps_off = TRUE;
+ } else {
+ ps_on = table_share->db_create_options
+ & HA_OPTION_STATS_PERSISTENT;
+ ps_off = table_share->db_create_options
+ & HA_OPTION_NO_STATS_PERSISTENT;
+ }
+
+ dict_stats_set_persistent(innodb_table, ps_on, ps_off);
+
+ dict_stats_auto_recalc_set(
+ innodb_table,
+ table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
+ table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
+
+ innodb_table->stats_sample_pages = table_share->stats_sample_pages;
+}
+
+/*********************************************************************//**
Construct ha_innobase handler. */
UNIV_INTERN
ha_innobase::ha_innobase(
@@ -2090,14 +2180,15 @@ ha_innobase::ha_innobase(
TABLE_SHARE* table_arg)
:handler(hton, table_arg),
int_table_flags(HA_REC_NOT_IN_SEQ |
- HA_NULL_IN_KEY | HA_CAN_VIRTUAL_COLUMNS |
+ HA_NULL_IN_KEY |
HA_CAN_INDEX_BLOBS |
HA_CAN_SQL_HANDLER |
HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
HA_PRIMARY_KEY_IN_READ_INDEX |
HA_BINLOG_ROW_CAPABLE |
HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ |
- HA_TABLE_SCAN_ON_INDEX | HA_CAN_FULLTEXT),
+ HA_TABLE_SCAN_ON_INDEX | HA_CAN_FULLTEXT |
+ HA_CAN_FULLTEXT_EXT | HA_CAN_EXPORT),
start_of_scan(0),
num_write_row(0)
{}
@@ -2122,6 +2213,9 @@ ha_innobase::update_thd(
{
trx_t* trx;
+ /* The table should have been opened in ha_innobase::open(). */
+ DBUG_ASSERT(prebuilt->table->n_ref_count > 0);
+
trx = check_trx_exists(thd);
if (prebuilt->trx != trx) {
@@ -2209,7 +2303,9 @@ invalidation to the transaction commit.
2) To store or retrieve a value from the query cache of an InnoDB table TBL,
any query must first ask InnoDB's permission. We must pass the thd as a
parameter because InnoDB will look at the trx id, if any, associated with
-that thd.
+that thd. Also the full_name which is used as key to search for the table
+object. The full_name is a string containing the normalized path to the
+table in the canonical format.
3) Use of the query cache for InnoDB tables is now allowed also when
AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
@@ -2244,11 +2340,9 @@ innobase_query_caching_of_table_permitted(
THD* thd, /*!< in: thd of the user who is trying to
store a result to the query cache or
retrieve it */
- char* full_name, /*!< in: concatenation of database name,
- the null character NUL, and the table
- name */
- uint full_name_len, /*!< in: length of the full name, i.e.
- len(dbname) + len(tablename) + 1 */
+ char* full_name, /*!< in: normalized path to the table */
+ uint full_name_len, /*!< in: length of the normalized path
+ to the table */
ulonglong *unused) /*!< unused for this engine */
{
ibool is_autocommit;
@@ -2308,16 +2402,7 @@ innobase_query_caching_of_table_permitted(
}
/* Normalize the table name to InnoDB format */
-
- memcpy(norm_name, full_name, full_name_len);
-
- norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
- separator between db and
- table */
- norm_name[full_name_len] = '\0';
-#ifdef __WIN__
- innobase_casedn_str(norm_name);
-#endif
+ normalize_table_name(norm_name, full_name);
innobase_register_trx(innodb_hton_ptr, thd, trx);
@@ -2355,7 +2440,7 @@ innobase_invalidate_query_cache(
/* Argument TRUE below means we are using transactions */
#ifdef HAVE_QUERY_CACHE
- mysql_query_cache_invalidate4((THD*) trx->mysql_thd,
+ mysql_query_cache_invalidate4(trx->mysql_thd,
full_name,
(uint32) full_name_len,
TRUE);
@@ -2374,7 +2459,7 @@ innobase_convert_identifier(
ulint buflen, /*!< in: length of buf, in bytes */
const char* id, /*!< in: identifier to convert */
ulint idlen, /*!< in: length of id, in bytes */
- void* thd, /*!< in: MySQL connection thread, or NULL */
+ THD* thd, /*!< in: MySQL connection thread, or NULL */
ibool file_id)/*!< in: TRUE=id is a table or database name;
FALSE=id is an UTF-8 string */
{
@@ -2397,7 +2482,7 @@ innobase_convert_identifier(
nz[idlen] = 0;
s = nz2;
- idlen = explain_filename((THD*) thd, nz, nz2, sizeof nz2,
+ idlen = explain_filename(thd, nz, nz2, sizeof nz2,
EXPLAIN_PARTITIONS_AS_COMMENT);
goto no_quote;
}
@@ -2406,7 +2491,7 @@ innobase_convert_identifier(
if (UNIV_UNLIKELY(!thd)) {
q = '"';
} else {
- q = get_quote_char_for_identifier((THD*) thd, s, (int) idlen);
+ q = get_quote_char_for_identifier(thd, s, (int) idlen);
}
if (q == EOF) {
@@ -2462,7 +2547,7 @@ innobase_convert_name(
ulint buflen, /*!< in: length of buf, in bytes */
const char* id, /*!< in: identifier to convert */
ulint idlen, /*!< in: length of id, in bytes */
- void* thd, /*!< in: MySQL connection thread, or NULL */
+ THD* thd, /*!< in: MySQL connection thread, or NULL */
ibool table_id)/*!< in: TRUE=id is a table or database name;
FALSE=id is an index name */
{
@@ -2504,14 +2589,13 @@ no_db_name:
}
return(s);
-
}
/*****************************************************************//**
A wrapper function of innobase_convert_name(), convert a table or
index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
@return pointer to the end of buf */
-static inline
+UNIV_INTERN
void
innobase_format_name(
/*==================*/
@@ -2537,9 +2621,9 @@ UNIV_INTERN
ibool
trx_is_interrupted(
/*===============*/
- trx_t* trx) /*!< in: transaction */
+ const trx_t* trx) /*!< in: transaction */
{
- return(trx && trx->mysql_thd && thd_kill_level((THD*) trx->mysql_thd));
+ return(trx && trx->mysql_thd && thd_kill_level(trx->mysql_thd));
}
/**********************************************************************//**
@@ -2551,8 +2635,20 @@ trx_is_strict(
/*==========*/
trx_t* trx) /*!< in: transaction */
{
- return(trx && trx->mysql_thd
- && THDVAR((THD*) trx->mysql_thd, strict_mode));
+ return(trx && trx->mysql_thd && THDVAR(trx->mysql_thd, strict_mode));
+}
+
+/**********************************************************************//**
+Determines if the current MySQL thread is running in strict mode.
+If thd==NULL, THDVAR returns the global value of innodb-strict-mode.
+@return TRUE if strict */
+UNIV_INLINE
+ibool
+thd_is_strict(
+/*==========*/
+ THD* thd) /*!< in: MySQL thread descriptor */
+{
+ return(THDVAR(thd, strict_mode));
}
/**************************************************************//**
@@ -2568,6 +2664,7 @@ ha_innobase::reset_template(void)
prebuilt->keep_other_fields_on_keyread = 0;
prebuilt->read_just_key = 0;
+ prebuilt->in_fts_query = 0;
/* Reset index condition pushdown state. */
if (prebuilt->idx_cond) {
prebuilt->idx_cond = NULL;
@@ -2663,14 +2760,14 @@ innobase_init(
innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint;
innobase_hton->savepoint_release = innobase_release_savepoint;
innobase_hton->prepare_ordered= NULL;
- innobase_hton->commit_ordered= innobase_commit_ordered;
+ innobase_hton->commit_ordered= innobase_commit_ordered;
innobase_hton->commit = innobase_commit;
innobase_hton->rollback = innobase_rollback;
innobase_hton->prepare = innobase_xa_prepare;
innobase_hton->recover = innobase_xa_recover;
innobase_hton->commit_by_xid = innobase_commit_by_xid;
innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
- innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
+ innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
innobase_hton->create_cursor_read_view = innobase_create_cursor_view;
innobase_hton->set_cursor_read_view = innobase_set_cursor_view;
innobase_hton->close_cursor_read_view = innobase_close_cursor_view;
@@ -2687,9 +2784,8 @@ innobase_init(
innobase_hton->release_temporary_latches =
innobase_release_temporary_latches;
-
- innobase_hton->alter_table_flags = innobase_alter_table_flags;
- innobase_hton->kill_query = innobase_kill_query;
+ innobase_hton->kill_query = innobase_kill_query;
+ innobase_hton->data = &innodb_api_cb;
ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
@@ -2756,12 +2852,12 @@ innobase_init(
srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
default_path);
- /* Set default InnoDB data file size to 10 MB and let it be
+ /* Set default InnoDB data file size to 12 MB and let it be
auto-extending. Thus users can use InnoDB in >= 4.0 without having
to specify any startup options. */
if (!innobase_data_file_path) {
- innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
+ innobase_data_file_path = (char*) "ibdata1:12M:autoextend";
}
/* Since InnoDB edits the argument in the next call, we make another
@@ -2785,8 +2881,8 @@ mem_free_and_error:
/* The default dir for log files is the datadir of MySQL */
- if (!innobase_log_group_home_dir) {
- innobase_log_group_home_dir = default_path;
+ if (!srv_log_group_home_dir) {
+ srv_log_group_home_dir = default_path;
}
#ifdef UNIV_LOG_ARCHIVE
@@ -2799,12 +2895,12 @@ mem_free_and_error:
srv_arch_dir = innobase_log_arch_dir;
#endif /* UNIG_LOG_ARCHIVE */
- ret = (bool)
- srv_parse_log_group_home_dirs(innobase_log_group_home_dir);
+ srv_normalize_path_for_win(srv_log_group_home_dir);
- if (ret == FALSE || innobase_mirrored_log_groups != 1) {
- sql_print_error("syntax error in innodb_log_group_home_dir, or a "
- "wrong number of mirrored log groups");
+ if (strchr(srv_log_group_home_dir, ';')
+ || innobase_mirrored_log_groups != 1) {
+ sql_print_error("syntax error in innodb_log_group_home_dir, "
+ "or a wrong number of mirrored log groups");
goto mem_free_and_error;
}
@@ -2896,12 +2992,52 @@ innobase_change_buffering_inited_ok:
innobase_change_buffering = (char*)
innobase_change_buffering_values[ibuf_use];
+ /* Check that interdependent parameters have sane values. */
+ if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) {
+ sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm"
+ " cannot be set higher than"
+ " innodb_max_dirty_pages_pct.\n"
+ "InnoDB: Setting"
+ " innodb_max_dirty_pages_pct_lwm to %lu\n",
+ srv_max_buf_pool_modified_pct);
+
+ srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct;
+ }
+
+ if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) {
+
+ if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) {
+ /* Avoid overflow. */
+ srv_max_io_capacity = SRV_MAX_IO_CAPACITY_LIMIT;
+ } else {
+ /* The user has not set the value. We should
+ set it based on innodb_io_capacity. */
+ srv_max_io_capacity =
+ ut_max(2 * srv_io_capacity, 2000);
+ }
+
+ } else if (srv_max_io_capacity < srv_io_capacity) {
+ sql_print_warning("InnoDB: innodb_io_capacity"
+ " cannot be set higher than"
+ " innodb_io_capacity_max.\n"
+ "InnoDB: Setting"
+ " innodb_io_capacity to %lu\n",
+ srv_max_io_capacity);
+
+ srv_io_capacity = srv_max_io_capacity;
+ }
+
+ if (!is_filename_allowed(srv_buf_dump_filename,
+ strlen(srv_buf_dump_filename), FALSE)) {
+ sql_print_error("InnoDB: innodb_buffer_pool_filename"
+ " cannot have colon (:) in the file name.");
+ goto mem_free_and_error;
+ }
+
/* --------------------------------------------------*/
srv_file_flush_method_str = innobase_file_flush_method;
- srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
- srv_n_log_files = (ulint) innobase_log_files_in_group;
srv_log_file_size = (ib_uint64_t) innobase_log_file_size;
#ifdef UNIV_LOG_ARCHIVE
@@ -2927,6 +3063,18 @@ innobase_change_buffering_inited_ok:
srv_log_buffer_size = (ulint) innobase_log_buffer_size;
+ if (innobase_buffer_pool_instances == 0) {
+ innobase_buffer_pool_instances = 8;
+
+#if defined(__WIN__) && !defined(_WIN64)
+ if (innobase_buffer_pool_size > 1331 * 1024 * 1024) {
+ innobase_buffer_pool_instances
+ = ut_min(MAX_BUFFER_POOLS,
+ (long) (innobase_buffer_pool_size
+ / (128 * 1024 * 1024)));
+ }
+#endif /* defined(__WIN__) && !defined(_WIN64) */
+ }
srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
@@ -2959,9 +3107,10 @@ innobase_change_buffering_inited_ok:
srv_n_read_io_threads = (ulint) innobase_read_io_threads;
srv_n_write_io_threads = (ulint) innobase_write_io_threads;
- srv_force_recovery = (ulint) innobase_force_recovery;
-
srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
+
+ page_compression_level = (ulint) innobase_compression_level;
+
if (!innobase_use_checksums) {
ut_print_timestamp(stderr);
fprintf(stderr,
@@ -2992,6 +3141,12 @@ innobase_change_buffering_inited_ok:
"level instead, see " REFMAN "set-transaction.html.\n");
}
+ if (innobase_open_files < 10) {
+ innobase_open_files = 300;
+ if (srv_file_per_table && table_cache_size > 300) {
+ innobase_open_files = table_cache_size;
+ }
+ }
srv_max_n_open_files = (ulint) innobase_open_files;
srv_innodb_status = (ibool) innobase_create_status_file;
@@ -3059,7 +3214,7 @@ innobase_change_buffering_inited_ok:
/* Since we in this module access directly the fields of a trx
struct, and due to different headers and flags it might happen that
- mutex_t has a different size in this module and in InnoDB
+ ib_mutex_t has a different size in this module and in InnoDB
modules, we check at run time that the size is the same in
these compilation modules. */
@@ -3174,28 +3329,13 @@ innobase_flush_logs(
DBUG_ENTER("innobase_flush_logs");
DBUG_ASSERT(hton == innodb_hton_ptr);
- log_buffer_flush_to_disk();
+ if (!srv_read_only_mode) {
+ log_buffer_flush_to_disk();
+ }
DBUG_RETURN(result);
}
-/****************************************************************//**
-Return alter table flags supported in an InnoDB database. */
-static
-uint
-innobase_alter_table_flags(
-/*=======================*/
- uint flags)
-{
- return(HA_INPLACE_ADD_INDEX_NO_READ_WRITE
- | HA_INPLACE_ADD_INDEX_NO_WRITE
- | HA_INPLACE_DROP_INDEX_NO_READ_WRITE
- | HA_INPLACE_ADD_UNIQUE_INDEX_NO_READ_WRITE
- | HA_INPLACE_ADD_UNIQUE_INDEX_NO_WRITE
- | HA_INPLACE_DROP_UNIQUE_INDEX_NO_READ_WRITE
- | HA_INPLACE_ADD_PK_INDEX_NO_READ_WRITE);
-}
-
/*****************************************************************//**
Commits a transaction in an InnoDB database. */
static
@@ -3410,9 +3550,6 @@ innobase_commit(
innobase_commit_ordered_2(trx, thd);
}
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
/* We did the first part already in innobase_commit_ordered(),
Now finish by doing a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
@@ -3462,7 +3599,7 @@ innobase_rollback(
transaction FALSE - rollback the current
statement only */
{
- int error = 0;
+ dberr_t error;
trx_t* trx;
DBUG_ENTER("innobase_rollback");
@@ -3511,7 +3648,7 @@ innobase_rollback_trx(
/*==================*/
trx_t* trx) /*!< in: transaction */
{
- int error = 0;
+ dberr_t error = DB_SUCCESS;
DBUG_ENTER("innobase_rollback_trx");
DBUG_PRINT("trans", ("aborting transaction"));
@@ -3610,6 +3747,7 @@ innobase_checkpoint_request(
Log code calls this whenever log has been written and/or flushed up
to a new position. We use this to notify upper layer of a new commit
checkpoint when necessary.*/
+extern "C" UNIV_INTERN
void
innobase_mysql_log_notify(
/*===============*/
@@ -3692,7 +3830,7 @@ innobase_rollback_to_savepoint(
void* savepoint) /*!< in: savepoint data */
{
ib_int64_t mysql_binlog_cache_pos;
- int error = 0;
+ dberr_t error;
trx_t* trx;
char name[64];
@@ -3713,7 +3851,7 @@ innobase_rollback_to_savepoint(
longlong2str((ulint) savepoint, name, 36);
- error = (int) trx_rollback_to_savepoint_for_mysql(
+ error = trx_rollback_to_savepoint_for_mysql(
trx, name, &mysql_binlog_cache_pos);
if (error == DB_SUCCESS && trx->fts_trx != NULL) {
@@ -3737,7 +3875,7 @@ innobase_release_savepoint(
savepoint should be released */
void* savepoint) /*!< in: savepoint data */
{
- int error = 0;
+ dberr_t error;
trx_t* trx;
char name[64];
@@ -3750,7 +3888,7 @@ innobase_release_savepoint(
longlong2str((ulint) savepoint, name, 36);
- error = (int) trx_release_savepoint_for_mysql(trx, name);
+ error = trx_release_savepoint_for_mysql(trx, name);
if (error == DB_SUCCESS && trx->fts_trx != NULL) {
fts_savepoint_release(trx, name);
@@ -3770,7 +3908,7 @@ innobase_savepoint(
THD* thd, /*!< in: handle to the MySQL thread */
void* savepoint) /*!< in: savepoint data */
{
- int error = 0;
+ dberr_t error;
trx_t* trx;
DBUG_ENTER("innobase_savepoint");
@@ -3797,7 +3935,7 @@ innobase_savepoint(
char name[64];
longlong2str((ulint) savepoint,name,36);
- error = (int) trx_savepoint_for_mysql(trx, name, (ib_int64_t)0);
+ error = trx_savepoint_for_mysql(trx, name, (ib_int64_t)0);
if (error == DB_SUCCESS && trx->fts_trx != NULL) {
fts_savepoint_take(trx, name);
@@ -3831,7 +3969,7 @@ innobase_close_connection(
"but transaction is active");
}
- if (trx_is_started(trx) && global_system_variables.log_warnings) {
+ if (trx_is_started(trx) && log_warnings) {
sql_print_warning(
"MySQL is closing a connection that has an active "
@@ -3848,6 +3986,27 @@ innobase_close_connection(
}
/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return 0 or error number */
+UNIV_INTERN
+int
+innobase_close_thd(
+/*===============*/
+ THD* thd) /*!< in: handle to the MySQL thread of the user
+ whose resources should be free'd */
+{
+ trx_t* trx = thd_to_trx(thd);
+
+ if (!trx) {
+ return(0);
+ }
+
+ return(innobase_close_connection(innodb_hton_ptr, thd));
+}
+
+UNIV_INTERN void lock_cancel_waiting_and_release(lock_t* lock);
+
+/*****************************************************************//**
Cancel any pending lock request associated with the current THD. */
static
void
@@ -3862,10 +4021,17 @@ innobase_kill_query(
DBUG_ASSERT(hton == innodb_hton_ptr);
trx = thd_to_trx(thd);
- /* Cancel a pending lock request. */
- if (trx) {
- lock_trx_handle_wait(trx);
- }
+
+ if (trx)
+ {
+ /* Cancel a pending lock request. */
+ lock_mutex_enter();
+ trx_mutex_enter(trx);
+ if (trx->lock.wait_lock)
+ lock_cancel_waiting_and_release(trx->lock.wait_lock);
+ trx_mutex_exit(trx);
+ lock_mutex_exit();
+ }
DBUG_VOID_RETURN;
}
@@ -3981,9 +4147,9 @@ ha_innobase::index_flags(
uint,
bool) const
{
- ulong extra_flag= 0;
- if (key == table_share->primary_key)
- extra_flag= HA_CLUSTERED_INDEX;
+ ulong extra_flag= 0;
+ if (table && key == table->s->primary_key)
+ extra_flag= HA_CLUSTERED_INDEX;
return((table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT)
? 0
: (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
@@ -4065,19 +4231,10 @@ ha_innobase::primary_key_is_clustered()
return(true);
}
-/** Always normalize table name to lower case on Windows */
-#ifdef __WIN__
-#define normalize_table_name(norm_name, name) \
- normalize_table_name_low(norm_name, name, TRUE)
-#else
-#define normalize_table_name(norm_name, name) \
- normalize_table_name_low(norm_name, name, FALSE)
-#endif /* __WIN__ */
-
/*****************************************************************//**
Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. An example:
-test/mytable. On Windows normalization puts both the database name and the
+database name catenated to '/' and table name. Example: test/mytable.
+On Windows normalization puts both the database name and the
table name always to lower case if "set_lower_case" is set to TRUE. */
static
void
@@ -4090,9 +4247,11 @@ normalize_table_name_low(
to lower case */
{
char* name_ptr;
+ ulint name_len;
char* db_ptr;
ulint db_len;
char* ptr;
+ ulint norm_len;
/* Scan name from the end */
@@ -4104,6 +4263,7 @@ normalize_table_name_low(
}
name_ptr = ptr + 1;
+ name_len = strlen(name_ptr);
/* skip any number of path separators */
while (ptr >= name && (*ptr == '\\' || *ptr == '/')) {
@@ -4122,11 +4282,15 @@ normalize_table_name_low(
db_ptr = ptr + 1;
+ norm_len = db_len + name_len + sizeof "/";
+ ut_a(norm_len < FN_REFLEN - 1);
+
memcpy(norm_name, db_ptr, db_len);
norm_name[db_len] = '/';
- memcpy(norm_name + db_len + 1, name_ptr, strlen(name_ptr) + 1);
+ /* Copy the name and null-byte. */
+ memcpy(norm_name + db_len + 1, name_ptr, name_len + 1);
if (set_lower_case) {
innobase_casedn_str(norm_name);
@@ -4141,7 +4305,7 @@ void
test_normalize_table_name_low()
/*===========================*/
{
- char norm_name[128];
+ char norm_name[FN_REFLEN];
const char* test_data[][2] = {
/* input, expected result */
{"./mysqltest/t1", "mysqltest/t1"},
@@ -4197,12 +4361,84 @@ test_normalize_table_name_low()
}
}
}
+
+/*********************************************************************
+Test ut_format_name(). */
+static
+void
+test_ut_format_name()
+/*=================*/
+{
+ char buf[NAME_LEN * 3];
+
+ struct {
+ const char* name;
+ ibool is_table;
+ ulint buf_size;
+ const char* expected;
+ } test_data[] = {
+ {"test/t1", TRUE, sizeof(buf), "\"test\".\"t1\""},
+ {"test/t1", TRUE, 12, "\"test\".\"t1\""},
+ {"test/t1", TRUE, 11, "\"test\".\"t1"},
+ {"test/t1", TRUE, 10, "\"test\".\"t"},
+ {"test/t1", TRUE, 9, "\"test\".\""},
+ {"test/t1", TRUE, 8, "\"test\"."},
+ {"test/t1", TRUE, 7, "\"test\""},
+ {"test/t1", TRUE, 6, "\"test"},
+ {"test/t1", TRUE, 5, "\"tes"},
+ {"test/t1", TRUE, 4, "\"te"},
+ {"test/t1", TRUE, 3, "\"t"},
+ {"test/t1", TRUE, 2, "\""},
+ {"test/t1", TRUE, 1, ""},
+ {"test/t1", TRUE, 0, "BUF_NOT_CHANGED"},
+ {"table", TRUE, sizeof(buf), "\"table\""},
+ {"ta'le", TRUE, sizeof(buf), "\"ta'le\""},
+ {"ta\"le", TRUE, sizeof(buf), "\"ta\"\"le\""},
+ {"ta`le", TRUE, sizeof(buf), "\"ta`le\""},
+ {"index", FALSE, sizeof(buf), "\"index\""},
+ {"ind/ex", FALSE, sizeof(buf), "\"ind/ex\""},
+ };
+
+ for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
+
+ memcpy(buf, "BUF_NOT_CHANGED", strlen("BUF_NOT_CHANGED") + 1);
+
+ char* ret;
+
+ ret = ut_format_name(test_data[i].name,
+ test_data[i].is_table,
+ buf,
+ test_data[i].buf_size);
+
+ ut_a(ret == buf);
+
+ if (strcmp(buf, test_data[i].expected) == 0) {
+ fprintf(stderr,
+ "ut_format_name(%s, %s, buf, %lu), "
+ "expected %s, OK\n",
+ test_data[i].name,
+ test_data[i].is_table ? "TRUE" : "FALSE",
+ test_data[i].buf_size,
+ test_data[i].expected);
+ } else {
+ fprintf(stderr,
+ "ut_format_name(%s, %s, buf, %lu), "
+ "expected %s, ERROR: got %s\n",
+ test_data[i].name,
+ test_data[i].is_table ? "TRUE" : "FALSE",
+ test_data[i].buf_size,
+ test_data[i].expected,
+ buf);
+ ut_error;
+ }
+ }
+}
#endif /* !DBUG_OFF */
/********************************************************************//**
Get the upper limit of the MySQL integral and floating-point type.
@return maximum allowed value for the field */
-static
+UNIV_INTERN
ulonglong
innobase_get_int_col_max_value(
/*===========================*/
@@ -4282,12 +4518,13 @@ innobase_match_index_columns(
DBUG_ENTER("innobase_match_index_columns");
/* Check whether user defined index column count matches */
- if (key_info->key_parts != index_info->n_user_defined_cols) {
+ if (key_info->user_defined_key_parts !=
+ index_info->n_user_defined_cols) {
DBUG_RETURN(FALSE);
}
key_part = key_info->key_part;
- key_end = key_part + key_info->key_parts;
+ key_end = key_part + key_info->user_defined_key_parts;
innodb_idx_fld = index_info->fields;
innodb_idx_fld_end = index_info->fields + index_info->n_fields;
@@ -4546,6 +4783,7 @@ ha_innobase::innobase_initialize_autoinc()
auto_inc = innobase_next_autoinc(
read_auto_inc, 1, 1, 0, col_max_value);
+
break;
}
case DB_RECORD_NOT_FOUND:
@@ -4595,12 +4833,12 @@ ha_innobase::open(
uint test_if_locked) /*!< in: not used */
{
dict_table_t* ib_table;
- char norm_name[1000];
+ char norm_name[FN_REFLEN];
THD* thd;
ulint retries = 0;
char* is_part = NULL;
ibool par_case_name_set = FALSE;
- char par_case_name[MAX_FULL_NAME_LEN + 1];
+ char par_case_name[FN_REFLEN];
DBUG_ENTER("ha_innobase::open");
@@ -4642,7 +4880,31 @@ ha_innobase::open(
retry:
/* Get pointer to a table object in InnoDB dictionary cache */
- ib_table = dict_table_open_on_name(norm_name, FALSE);
+ ib_table = dict_table_open_on_name(norm_name, FALSE, TRUE,
+ DICT_ERR_IGNORE_NONE);
+
+ if (ib_table
+ && ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
+ && table->s->fields != dict_table_get_n_user_cols(ib_table))
+ || (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
+ && (table->s->fields
+ != dict_table_get_n_user_cols(ib_table) - 1)))) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "table %s contains %lu user defined columns "
+ "in InnoDB, but %lu columns in MySQL. Please "
+ "check INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and "
+ REFMAN "innodb-troubleshooting.html "
+ "for how to resolve it",
+ norm_name, (ulong) dict_table_get_n_user_cols(ib_table),
+ (ulong) table->s->fields);
+
+ /* Mark this table as corrupted, so the drop table
+ or force recovery can still use it, but not others. */
+ ib_table->corrupted = true;
+ dict_table_close(ib_table, FALSE, FALSE);
+ ib_table = NULL;
+ is_part = NULL;
+ }
if (NULL == ib_table) {
if (is_part && retries < 10) {
@@ -4656,13 +4918,13 @@ retry:
1) If boot against an installation from Windows
platform, then its partition table name could
- be all be in lower case in system tables. So we
- will need to check lower case name when load table.
+ be in lower case in system tables. So we will
+ need to check lower case name when load table.
- 2) If we boot an installation from other case
+ 2) If we boot an installation from other case
sensitive platform in Windows, we might need to
- check the existence of table name without lowering
- case them in the system table. */
+ check the existence of table name without lower
+ case in the system table. */
if (innobase_get_lower_case_table_names() == 1) {
if (!par_case_name_set) {
@@ -4670,9 +4932,7 @@ retry:
/* Check for the table using lower
case name, including the partition
separator "P" */
- memcpy(par_case_name, norm_name,
- strlen(norm_name));
- par_case_name[strlen(norm_name)] = 0;
+ strcpy(par_case_name, norm_name);
innobase_casedn_str(par_case_name);
#else
/* On Windows platfrom, check
@@ -4686,7 +4946,8 @@ retry:
}
ib_table = dict_table_open_on_name(
- par_case_name, FALSE);
+ par_case_name, FALSE, TRUE,
+ DICT_ERR_IGNORE_NONE);
}
if (!ib_table) {
@@ -4724,21 +4985,13 @@ retry:
retries);
}
- sql_print_error("Cannot find or open table %s from\n"
- "the internal data dictionary of InnoDB "
- "though the .frm file for the\n"
- "table exists. Maybe you have deleted and "
- "recreated InnoDB data\n"
- "files but have forgotten to delete the "
- "corresponding .frm files\n"
- "of InnoDB tables, or you have moved .frm "
- "files to another database?\n"
- "or, the table contains indexes that this "
- "version of the engine\n"
- "doesn't support.\n"
- "See " REFMAN "innodb-troubleshooting.html\n"
- "how you can resolve the problem.\n",
- norm_name);
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Cannot open table %s from the internal data "
+ "dictionary of InnoDB though the .frm file "
+ "for the table exists. See "
+ REFMAN "innodb-troubleshooting.html for how "
+ "you can resolve the problem.", norm_name);
+
free_share(share);
my_errno = ENOENT;
@@ -4747,21 +5000,47 @@ retry:
table_opened:
+ innobase_copy_frm_flags_from_table_share(ib_table, table->s);
+
+ dict_stats_init(ib_table);
+
MONITOR_INC(MONITOR_TABLE_OPEN);
- if (ib_table->ibd_file_missing && !thd_tablespace_op(thd)) {
- sql_print_error("MySQL is trying to open a table handle but "
- "the .ibd file for\ntable %s does not exist.\n"
- "Have you deleted the .ibd file from the "
- "database directory under\nthe MySQL datadir, "
- "or have you used DISCARD TABLESPACE?\n"
- "See " REFMAN "innodb-troubleshooting.html\n"
- "how you can resolve the problem.\n",
- norm_name);
+ bool no_tablespace;
+
+ if (dict_table_is_discarded(ib_table)) {
+
+ ib_senderrf(thd,
+ IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED,
+ table->s->table_name.str);
+
+ /* Allow an open because a proper DISCARD should have set
+ all the flags and index root page numbers to FIL_NULL that
+ should prevent any DML from running but it should allow DDL
+ operations. */
+
+ no_tablespace = false;
+
+ } else if (ib_table->ibd_file_missing) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN,
+ ER_TABLESPACE_MISSING, norm_name);
+
+ /* This means we have no idea what happened to the tablespace
+ file, best to play it safe. */
+
+ no_tablespace = true;
+ } else {
+ no_tablespace = false;
+ }
+
+ if (!thd_tablespace_op(thd) && no_tablespace) {
free_share(share);
my_errno = ENOENT;
- dict_table_close(ib_table, FALSE);
+ dict_table_close(ib_table, FALSE, FALSE);
+
DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
}
@@ -4909,7 +5188,9 @@ table_opened:
}
/* Only if the table has an AUTOINC column. */
- if (prebuilt->table != NULL && table->found_next_number_field != NULL) {
+ if (prebuilt->table != NULL
+ && !prebuilt->table->ibd_file_missing
+ && table->found_next_number_field != NULL) {
dict_table_autoinc_lock(prebuilt->table);
/* Since a table can already be "open" in InnoDB's internal
@@ -4930,6 +5211,31 @@ table_opened:
}
UNIV_INTERN
+handler*
+ha_innobase::clone(
+/*===============*/
+ const char* name, /*!< in: table name */
+ MEM_ROOT* mem_root) /*!< in: memory context */
+{
+ ha_innobase* new_handler;
+
+ DBUG_ENTER("ha_innobase::clone");
+
+ new_handler = static_cast<ha_innobase*>(handler::clone(name,
+ mem_root));
+ if (new_handler) {
+ DBUG_ASSERT(new_handler->prebuilt != NULL);
+ DBUG_ASSERT(new_handler->user_thd == user_thd);
+ DBUG_ASSERT(new_handler->prebuilt->trx == prebuilt->trx);
+
+ new_handler->prebuilt->select_lock_type
+ = prebuilt->select_lock_type;
+ }
+
+ DBUG_RETURN(new_handler);
+}
+
+UNIV_INTERN
uint
ha_innobase::max_supported_key_part_length() const
/*==============================================*/
@@ -4994,36 +5300,6 @@ get_field_offset(
return((uint) (field->ptr - table->record[0]));
}
-/**************************************************************//**
-Checks if a field in a record is SQL NULL. Uses the record format
-information in table to track the null bit in record.
-@return 1 if NULL, 0 otherwise */
-static inline
-uint
-field_in_record_is_null(
-/*====================*/
- TABLE* table, /*!< in: MySQL table object */
- Field* field, /*!< in: MySQL field object */
- char* record) /*!< in: a row in MySQL format */
-{
- int null_offset;
-
- if (!field->null_ptr) {
-
- return(0);
- }
-
- null_offset = (uint) ((char*) field->null_ptr
- - (char*) table->record[0]);
-
- if (record[null_offset] & field->null_bit) {
-
- return(1);
- }
-
- return(0);
-}
-
/*************************************************************//**
InnoDB uses this function to compare two data fields for which the data type
is such that we must use MySQL code to compare them. NOTE that the prototype
@@ -5483,6 +5759,7 @@ get_innobase_type_from_mysql_type(
case HA_KEYTYPE_END:
ut_error;
}
+
return(0);
}
@@ -5512,7 +5789,7 @@ innobase_read_from_2_little_endian(
/*===============================*/
const uchar* buf) /*!< in: from where to read */
{
- return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
+ return((uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1]))));
}
/*******************************************************************//**
@@ -5530,7 +5807,8 @@ ha_innobase::store_key_val_for_row(
{
KEY* key_info = table->key_info + keynr;
KEY_PART_INFO* key_part = key_info->key_part;
- KEY_PART_INFO* end = key_part + key_info->key_parts;
+ KEY_PART_INFO* end =
+ key_part + key_info->user_defined_key_parts;
char* buff_start = buff;
enum_field_types mysql_type;
Field* field;
@@ -5906,10 +6184,9 @@ build_template_field(
templ->rec_field_no = dict_index_get_nth_col_pos(index, i);
}
- if (field->null_ptr) {
+ if (field->real_maybe_null()) {
templ->mysql_null_byte_offset =
- (ulint) ((char*) field->null_ptr
- - (char*) table->record[0]);
+ field->null_offset();
templ->mysql_null_bit_mask = (ulint) field->null_bit;
} else {
@@ -6011,6 +6288,10 @@ ha_innobase::build_template(
prebuilt->need_to_access_clustered = (index == clust_index);
+ /* Either prebuilt->index should be a secondary index, or it
+ should be the clustered index. */
+ ut_ad(dict_index_is_clust(index) == (index == clust_index));
+
/* Below we check column by column if we need to access
the clustered index. */
@@ -6227,11 +6508,13 @@ min value of the autoinc interval. Once that is fixed we can get rid of
the special lock handling.
@return DB_SUCCESS if all OK else error code */
UNIV_INTERN
-ulint
+dberr_t
ha_innobase::innobase_lock_autoinc(void)
/*====================================*/
{
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
+
+ ut_ad(!srv_read_only_mode);
switch (innobase_autoinc_lock_mode) {
case AUTOINC_NO_LOCKING:
@@ -6276,19 +6559,19 @@ ha_innobase::innobase_lock_autoinc(void)
ut_error;
}
- return(ulong(error));
+ return(error);
}
/********************************************************************//**
Reset the autoinc value in the table.
@return DB_SUCCESS if all went well else error code */
UNIV_INTERN
-ulint
+dberr_t
ha_innobase::innobase_reset_autoinc(
/*================================*/
ulonglong autoinc) /*!< in: value to store */
{
- ulint error;
+ dberr_t error;
error = innobase_lock_autoinc();
@@ -6299,7 +6582,7 @@ ha_innobase::innobase_reset_autoinc(
dict_table_autoinc_unlock(prebuilt->table);
}
- return(ulong(error));
+ return(error);
}
/********************************************************************//**
@@ -6307,12 +6590,12 @@ Store the autoinc value in the table. The autoinc value is only set if
it's greater than the existing autoinc value in the table.
@return DB_SUCCESS if all went well else error code */
UNIV_INTERN
-ulint
+dberr_t
ha_innobase::innobase_set_max_autoinc(
/*==================================*/
ulonglong auto_inc) /*!< in: value to store */
{
- ulint error;
+ dberr_t error;
error = innobase_lock_autoinc();
@@ -6323,7 +6606,7 @@ ha_innobase::innobase_set_max_autoinc(
dict_table_autoinc_unlock(prebuilt->table);
}
- return(ulong(error));
+ return(error);
}
/********************************************************************//**
@@ -6336,7 +6619,7 @@ ha_innobase::write_row(
/*===================*/
uchar* record) /*!< in: a row in MySQL format */
{
- ulint error = 0;
+ dberr_t error;
int error_result= 0;
ibool auto_inc_used= FALSE;
ulint sql_command;
@@ -6344,7 +6627,10 @@ ha_innobase::write_row(
DBUG_ENTER("ha_innobase::write_row");
- if (prebuilt->trx != trx) {
+ if (srv_read_only_mode) {
+ ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ } else if (prebuilt->trx != trx) {
sql_print_error("The transaction object for the table handle "
"is at %p, but for the current thread it is at "
"%p",
@@ -6362,6 +6648,8 @@ ha_innobase::write_row(
++trx->will_lock;
}
+ ha_statistic_increment(&SSV::ha_write_count);
+
sql_command = thd_sql_command(user_thd);
if ((sql_command == SQLCOM_ALTER_TABLE
@@ -6441,7 +6729,7 @@ no_commit:
innobase_get_auto_increment(). */
prebuilt->autoinc_error = DB_SUCCESS;
- if ((error = update_auto_increment())) {
+ if ((error_result = update_auto_increment())) {
/* We don't want to mask autoinc overflow errors. */
/* Handle the case where the AUTOINC sub-system
@@ -6452,15 +6740,11 @@ no_commit:
my_error(ER_AUTOINC_READ_FAILED, MYF(0));
goto func_exit;
} else if (prebuilt->autoinc_error != DB_SUCCESS) {
- error = (int) prebuilt->autoinc_error;
+ error = prebuilt->autoinc_error;
goto report_error;
}
- /* MySQL errors are passed straight back. except for
- HA_ERR_AUTO_INC_READ_FAILED. This can only happen
- for values out of range.
- */
- error_result = (int) error;
+ /* MySQL errors are passed straight back. */
goto func_exit;
}
@@ -6479,10 +6763,10 @@ no_commit:
innobase_srv_conc_enter_innodb(prebuilt->trx);
error = row_insert_for_mysql((byte*) record, prebuilt);
+ DEBUG_SYNC(user_thd, "ib_after_row_insert");
/* Handle duplicate key errors */
if (auto_inc_used) {
- ulint err;
ulonglong auto_inc;
ulonglong col_max_value;
@@ -6544,6 +6828,7 @@ set_max_autoinc:
ulonglong offset;
ulonglong increment;
+ dberr_t err;
offset = prebuilt->autoinc_offset;
increment = prebuilt->autoinc_increment;
@@ -6562,13 +6847,22 @@ set_max_autoinc:
}
}
break;
+ default:
+ break;
}
}
innobase_srv_conc_exit_innodb(prebuilt->trx);
report_error:
- error_result = convert_error_code_to_mysql((int) error,
+ if (error == DB_TABLESPACE_DELETED) {
+ ib_senderrf(
+ trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_DISCARDED,
+ table->s->table_name.str);
+ }
+
+ error_result = convert_error_code_to_mysql(error,
prebuilt->table->flags,
user_thd);
@@ -6585,9 +6879,9 @@ func_exit:
/**********************************************************************//**
Checks which fields have changed in a row and stores information
of them to an update vector.
-@return error number or 0 */
+@return DB_SUCCESS or error code */
static
-int
+dberr_t
calc_row_difference(
/*================*/
upd_t* uvect, /*!< in/out: update vector */
@@ -6617,12 +6911,13 @@ calc_row_difference(
dfield_t dfield;
dict_index_t* clust_index;
uint i;
- ulint error = DB_SUCCESS;
ibool changes_fts_column = FALSE;
ibool changes_fts_doc_col = FALSE;
trx_t* trx = thd_to_trx(thd);
doc_id_t doc_id = FTS_NULL_DOC_ID;
+ ut_ad(!srv_read_only_mode);
+
n_fields = table->s->fields;
clust_index = dict_table_get_first_index(prebuilt->table);
@@ -6694,14 +6989,12 @@ calc_row_difference(
}
- if (field->null_ptr) {
- if (field_in_record_is_null(table, field,
- (char*) old_row)) {
+ if (field->real_maybe_null()) {
+ if (field->is_null_in_record(old_row)) {
o_len = UNIV_SQL_NULL;
}
- if (field_in_record_is_null(table, field,
- (char*) new_row)) {
+ if (field->is_null_in_record(new_row)) {
n_len = UNIV_SQL_NULL;
}
}
@@ -6838,13 +7131,7 @@ calc_row_difference(
fts_update_doc_id(
innodb_table, ufield, &trx->fts_next_doc_id);
- if (error == DB_SUCCESS) {
- ++n_changed;
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error (%lu) while updating "
- "doc id in calc_row_difference().\n", error);
- }
+ ++n_changed;
} else {
/* We have a Doc ID column, but none of FTS indexed
columns are touched, nor the Doc ID column, so set
@@ -6858,7 +7145,7 @@ calc_row_difference(
ut_a(buf <= (byte*) original_upd_buff + buff_len);
- return(error);
+ return(DB_SUCCESS);
}
/**********************************************************************//**
@@ -6877,14 +7164,17 @@ ha_innobase::update_row(
uchar* new_row) /*!< in: new row in MySQL format */
{
upd_t* uvect;
- int error = 0;
+ dberr_t error;
trx_t* trx = thd_to_trx(user_thd);
DBUG_ENTER("ha_innobase::update_row");
ut_a(prebuilt->trx == trx);
- if (!trx_is_started(trx)) {
+ if (srv_read_only_mode) {
+ ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ } else if (!trx_is_started(trx)) {
++trx->will_lock;
}
@@ -6905,6 +7195,8 @@ ha_innobase::update_row(
}
}
+ ha_statistic_increment(&SSV::ha_update_count);
+
if (prebuilt->upd_node) {
uvect = prebuilt->upd_node->update;
} else {
@@ -6972,18 +7264,18 @@ ha_innobase::update_row(
innobase_srv_conc_exit_innodb(trx);
func_exit:
- error = convert_error_code_to_mysql(error,
+ int err = convert_error_code_to_mysql(error,
prebuilt->table->flags, user_thd);
/* If success and no columns were updated. */
- if (error == 0 && uvect->n_fields == 0) {
+ if (err == 0 && uvect->n_fields == 0) {
/* This is the same as success, but instructs
MySQL that the row is not really updated and it
should not increase the count of updated rows.
This is fix for http://bugs.mysql.com/29157 */
- error = HA_ERR_RECORD_IS_THE_SAME;
- } else if (error == HA_FTS_INVALID_DOCID) {
+ err = HA_ERR_RECORD_IS_THE_SAME;
+ } else if (err == HA_FTS_INVALID_DOCID) {
my_error(HA_FTS_INVALID_DOCID, MYF(0));
}
@@ -6992,7 +7284,7 @@ func_exit:
innobase_active_small();
- DBUG_RETURN(error);
+ DBUG_RETURN(err);
}
/**********************************************************************//**
@@ -7004,17 +7296,22 @@ ha_innobase::delete_row(
/*====================*/
const uchar* record) /*!< in: a row in MySQL format */
{
- int error = 0;
+ dberr_t error;
trx_t* trx = thd_to_trx(user_thd);
DBUG_ENTER("ha_innobase::delete_row");
ut_a(prebuilt->trx == trx);
- if (!trx_is_started(trx)) {
+ if (srv_read_only_mode) {
+ ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ } else if (!trx_is_started(trx)) {
++trx->will_lock;
}
+ ha_statistic_increment(&SSV::ha_delete_count);
+
if (!prebuilt->upd_node) {
row_get_prebuilt_update_vector(prebuilt);
}
@@ -7029,15 +7326,13 @@ ha_innobase::delete_row(
innobase_srv_conc_exit_innodb(trx);
- error = convert_error_code_to_mysql(
- error, prebuilt->table->flags, user_thd);
-
/* Tell the InnoDB server that there might be work for
utility threads: */
innobase_active_small();
- DBUG_RETURN(error);
+ DBUG_RETURN(convert_error_code_to_mysql(
+ error, prebuilt->table->flags, user_thd));
}
/**********************************************************************//**
@@ -7270,21 +7565,19 @@ ha_innobase::index_read(
dict_index_t* index;
ulint match_mode = 0;
int error;
- ulint ret;
+ dberr_t ret;
DBUG_ENTER("index_read");
DEBUG_SYNC_C("ha_innobase_index_read_begin");
ut_a(prebuilt->trx == thd_to_trx(user_thd));
+ ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT);
+
+ ha_statistic_increment(&SSV::ha_read_key_count);
index = prebuilt->index;
if (UNIV_UNLIKELY(index == NULL) || dict_index_is_corrupted(index)) {
- DBUG_PRINT("error", ("index: %p index_corrupt: %d data_corrupt: %d",
- index,
- index ? test(index->type & DICT_CORRUPT) : 0,
- (index && index->table ?
- test(index->table->corrupted) : 0)));
prebuilt->index_usable = FALSE;
DBUG_RETURN(HA_ERR_CRASHED);
}
@@ -7357,6 +7650,7 @@ ha_innobase::index_read(
case DB_SUCCESS:
error = 0;
table->status = 0;
+ srv_stats.n_rows_read.add((size_t) prebuilt->trx->id, 1);
break;
case DB_RECORD_NOT_FOUND:
error = HA_ERR_KEY_NOT_FOUND;
@@ -7366,10 +7660,30 @@ ha_innobase::index_read(
error = HA_ERR_KEY_NOT_FOUND;
table->status = STATUS_NOT_FOUND;
break;
+ case DB_TABLESPACE_DELETED:
+
+ ib_senderrf(
+ prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_DISCARDED,
+ table->s->table_name.str);
+
+ table->status = STATUS_NOT_FOUND;
+ error = HA_ERR_NO_SUCH_TABLE;
+ break;
+ case DB_TABLESPACE_NOT_FOUND:
+
+ ib_senderrf(
+ prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_MISSING, MYF(0),
+ table->s->table_name.str);
+
+ table->status = STATUS_NOT_FOUND;
+ error = HA_ERR_NO_SUCH_TABLE;
+ break;
default:
- error = convert_error_code_to_mysql((int) ret,
- prebuilt->table->flags,
- user_thd);
+ error = convert_error_code_to_mysql(
+ ret, prebuilt->table->flags, user_thd);
+
table->status = STATUS_NOT_FOUND;
break;
}
@@ -7571,8 +7885,8 @@ ha_innobase::general_fetch(
uint match_mode) /*!< in: 0, ROW_SEL_EXACT, or
ROW_SEL_EXACT_PREFIX */
{
- ulint ret;
- int error = 0;
+ dberr_t ret;
+ int error;
DBUG_ENTER("general_fetch");
@@ -7589,6 +7903,7 @@ ha_innobase::general_fetch(
case DB_SUCCESS:
error = 0;
table->status = 0;
+ srv_stats.n_rows_read.add((size_t) prebuilt->trx->id, 1);
break;
case DB_RECORD_NOT_FOUND:
error = HA_ERR_END_OF_FILE;
@@ -7598,9 +7913,30 @@ ha_innobase::general_fetch(
error = HA_ERR_END_OF_FILE;
table->status = STATUS_NOT_FOUND;
break;
+ case DB_TABLESPACE_DELETED:
+
+ ib_senderrf(
+ prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_DISCARDED,
+ table->s->table_name.str);
+
+ table->status = STATUS_NOT_FOUND;
+ error = HA_ERR_NO_SUCH_TABLE;
+ break;
+ case DB_TABLESPACE_NOT_FOUND:
+
+ ib_senderrf(
+ prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_MISSING,
+ table->s->table_name.str);
+
+ table->status = STATUS_NOT_FOUND;
+ error = HA_ERR_NO_SUCH_TABLE;
+ break;
default:
error = convert_error_code_to_mysql(
- (int) ret, prebuilt->table->flags, user_thd);
+ ret, prebuilt->table->flags, user_thd);
+
table->status = STATUS_NOT_FOUND;
break;
}
@@ -7619,6 +7955,8 @@ ha_innobase::index_next(
uchar* buf) /*!< in/out: buffer for next row in MySQL
format */
{
+ ha_statistic_increment(&SSV::ha_read_next_count);
+
return(general_fetch(buf, ROW_SEL_NEXT, 0));
}
@@ -7633,6 +7971,8 @@ ha_innobase::index_next_same(
const uchar* key, /*!< in: key value */
uint keylen) /*!< in: key value length */
{
+ ha_statistic_increment(&SSV::ha_read_next_count);
+
return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}
@@ -7646,6 +7986,8 @@ ha_innobase::index_prev(
/*====================*/
uchar* buf) /*!< in/out: buffer for previous row in MySQL format */
{
+ ha_statistic_increment(&SSV::ha_read_prev_count);
+
return(general_fetch(buf, ROW_SEL_PREV, 0));
}
@@ -7662,6 +8004,7 @@ ha_innobase::index_first(
int error;
DBUG_ENTER("index_first");
+ ha_statistic_increment(&SSV::ha_read_first_count);
error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
@@ -7687,6 +8030,7 @@ ha_innobase::index_last(
int error;
DBUG_ENTER("index_last");
+ ha_statistic_increment(&SSV::ha_read_last_count);
error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
@@ -7756,6 +8100,7 @@ ha_innobase::rnd_next(
int error;
DBUG_ENTER("rnd_next");
+ ha_statistic_increment(&SSV::ha_read_rnd_next_count);
if (start_of_scan) {
error = index_first(buf);
@@ -7789,6 +8134,8 @@ ha_innobase::rnd_pos(
DBUG_ENTER("rnd_pos");
DBUG_DUMP("key", pos, ref_length);
+ ha_statistic_increment(&SSV::ha_read_rnd_count);
+
ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
/* Note that we assume the length of the row reference is fixed
@@ -7813,8 +8160,6 @@ ha_innobase::ft_init()
{
DBUG_ENTER("ft_init");
- fprintf(stderr, "ft_init()\n");
-
trx_t* trx = check_trx_exists(ha_thd());
/* FTS queries are not treated as autocommit non-locking selects.
@@ -7853,15 +8198,15 @@ ha_innobase::ft_init_ext(
ulint buf_tmp_used;
uint num_errors;
- fprintf(stderr, "ft_init_ext()\n");
-
- fprintf(stderr, "keynr=%u, '%.*s'\n",
- keynr, (int) key->length(), (byte*) key->ptr());
+ if (fts_enable_diag_print) {
+ fprintf(stderr, "keynr=%u, '%.*s'\n",
+ keynr, (int) key->length(), (byte*) key->ptr());
- if (flags & FT_BOOL) {
- fprintf(stderr, "BOOL search\n");
- } else {
- fprintf(stderr, "NL search\n");
+ if (flags & FT_BOOL) {
+ fprintf(stderr, "BOOL search\n");
+ } else {
+ fprintf(stderr, "NL search\n");
+ }
}
/* FIXME: utf32 and utf16 are not compatible with some
@@ -7908,7 +8253,7 @@ ha_innobase::ft_init_ext(
if (!index || index->type != DICT_FTS) {
my_error(ER_TABLE_HAS_NO_FT, MYF(0));
- return NULL;
+ return(NULL);
}
if (!(table->fts->fts_status & ADDED_TABLE_SYNCED)) {
@@ -7919,25 +8264,69 @@ ha_innobase::ft_init_ext(
error = fts_query(trx, index, flags, query, query_len, &result);
- prebuilt->result = result;
-
// FIXME: Proper error handling and diagnostic
if (error != DB_SUCCESS) {
fprintf(stderr, "Error processing query\n");
} else {
- /* Must return an instance of a result even if it's empty */
- ut_a(prebuilt->result);
-
/* Allocate FTS handler, and instantiate it before return */
fts_hdl = (NEW_FT_INFO*) my_malloc(sizeof(NEW_FT_INFO),
MYF(0));
fts_hdl->please = (struct _ft_vft*)(&ft_vft_result);
+ fts_hdl->could_you = (struct _ft_vft_ext*)(&ft_vft_ext_result);
fts_hdl->ft_prebuilt = prebuilt;
fts_hdl->ft_result = result;
+
+ /* FIXME: Re-evluate the condition when Bug 14469540
+ is resolved */
+ prebuilt->in_fts_query = true;
}
- return ((FT_INFO*) fts_hdl);
+ return((FT_INFO*) fts_hdl);
+}
+
+/*****************************************************************//**
+Set up search tuple for a query through FTS_DOC_ID_INDEX on
+supplied Doc ID. This is used by MySQL to retrieve the documents
+once the search result (Doc IDs) is available */
+static
+void
+innobase_fts_create_doc_id_key(
+/*===========================*/
+ dtuple_t* tuple, /* in/out: prebuilt->search_tuple */
+ const dict_index_t*
+ index, /* in: index (FTS_DOC_ID_INDEX) */
+ doc_id_t* doc_id) /* in/out: doc id to search, value
+ could be changed to storage format
+ used for search. */
+{
+ doc_id_t temp_doc_id;
+ dfield_t* dfield = dtuple_get_nth_field(tuple, 0);
+
+ ut_a(dict_index_get_n_unique(index) == 1);
+
+ dtuple_set_n_fields(tuple, index->n_fields);
+ dict_index_copy_types(tuple, index, index->n_fields);
+
+#ifdef UNIV_DEBUG
+ /* The unique Doc ID field should be an eight-bytes integer */
+ dict_field_t* field = dict_index_get_nth_field(index, 0);
+ ut_a(field->col->mtype == DATA_INT);
+ ut_ad(sizeof(*doc_id) == field->fixed_len);
+ ut_ad(innobase_strcasecmp(index->name, FTS_DOC_ID_INDEX_NAME) == 0);
+#endif /* UNIV_DEBUG */
+
+ /* Convert to storage byte order */
+ mach_write_to_8(reinterpret_cast<byte*>(&temp_doc_id), *doc_id);
+ *doc_id = temp_doc_id;
+ dfield_set_data(dfield, doc_id, sizeof(*doc_id));
+
+ dtuple_set_n_fields_cmp(tuple, 1);
+
+ for (ulint i = 1; i < index->n_fields; i++) {
+ dfield = dtuple_get_nth_field(tuple, i);
+ dfield_set_null(dfield);
+ }
}
/**********************************************************************//**
@@ -7984,6 +8373,14 @@ next_record:
if (result->current != NULL) {
dict_index_t* index;
dtuple_t* tuple = prebuilt->search_tuple;
+ doc_id_t search_doc_id;
+
+ /* If we only need information from result we can return
+ without fetching the table row */
+ if (ft_prebuilt->read_just_key) {
+ table->status= 0;
+ return(0);
+ }
index = dict_table_get_index_on_name(
prebuilt->table, FTS_DOC_ID_INDEX_NAME);
@@ -7997,48 +8394,74 @@ next_record:
fts_ranking_t* ranking = rbt_value(
fts_ranking_t, result->current);
- /* We pass a pointer to the doc_id because we need to
- convert it to storage byte order. */
- row_create_key(tuple, index, &ranking->doc_id);
+ search_doc_id = ranking->doc_id;
+
+ /* We pass a pointer of search_doc_id because it will be
+ converted to storage byte order used in the search
+ tuple. */
+ innobase_fts_create_doc_id_key(tuple, index, &search_doc_id);
innobase_srv_conc_enter_innodb(prebuilt->trx);
- ulint ret = row_search_for_mysql(
+ dberr_t ret = row_search_for_mysql(
(byte*) buf, PAGE_CUR_GE, prebuilt, ROW_SEL_EXACT, 0);
innobase_srv_conc_exit_innodb(prebuilt->trx);
-
- if (ret == DB_SUCCESS) {
+ switch (ret) {
+ case DB_SUCCESS:
error = 0;
table->status = 0;
-
- } else if (ret == DB_RECORD_NOT_FOUND) {
-
+ break;
+ case DB_RECORD_NOT_FOUND:
result->current = const_cast<ib_rbt_node_t*>(
rbt_next(result->rankings_by_rank,
result->current));
if (!result->current) {
- error = HA_ERR_KEY_NOT_FOUND;
+ /* exhaust the result set, should return
+ HA_ERR_END_OF_FILE just like
+ ha_innobase::general_fetch() and/or
+ ha_innobase::index_first() etc. */
+ error = HA_ERR_END_OF_FILE;
table->status = STATUS_NOT_FOUND;
} else {
goto next_record;
}
+ break;
+ case DB_END_OF_INDEX:
+ error = HA_ERR_END_OF_FILE;
+ table->status = STATUS_NOT_FOUND;
+ break;
+ case DB_TABLESPACE_DELETED:
- } else if (ret == DB_END_OF_INDEX) {
+ ib_senderrf(
+ prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_DISCARDED,
+ table->s->table_name.str);
- error = HA_ERR_KEY_NOT_FOUND;
table->status = STATUS_NOT_FOUND;
- } else {
+ error = HA_ERR_NO_SUCH_TABLE;
+ break;
+ case DB_TABLESPACE_NOT_FOUND:
+ ib_senderrf(
+ prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_MISSING,
+ table->s->table_name.str);
+
+ table->status = STATUS_NOT_FOUND;
+ error = HA_ERR_NO_SUCH_TABLE;
+ break;
+ default:
error = convert_error_code_to_mysql(
- (int) ret, 0, user_thd);
+ ret, 0, user_thd);
table->status = STATUS_NOT_FOUND;
+ break;
}
- return (error);
+ return(error);
}
return(HA_ERR_END_OF_FILE);
@@ -8052,11 +8475,6 @@ ha_innobase::ft_end()
{
fprintf(stderr, "ft_end()\n");
- if (prebuilt->result != NULL) {
- fts_query_free_result(prebuilt->result);
- prebuilt->result = NULL;
- }
-
rnd_end();
}
@@ -8110,23 +8528,21 @@ See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */
/*****************************************************************//**
Check whether there exist a column named as "FTS_DOC_ID", which is
reserved for InnoDB FTS Doc ID
-@return TRUE if there exist a "FTS_DOC_ID" column */
+@return true if there exist a "FTS_DOC_ID" column */
static
-ibool
+bool
create_table_check_doc_id_col(
/*==========================*/
trx_t* trx, /*!< in: InnoDB transaction handle */
- TABLE* form, /*!< in: information on table
+ const TABLE* form, /*!< in: information on table
columns and indexes */
ulint* doc_id_col) /*!< out: Doc ID column number if
- there exist a FTS_DOC_ID column, ULINT_UNDEFINED if column is of the
+ there exist a FTS_DOC_ID column,
+ ULINT_UNDEFINED if column is of the
wrong type/name/size */
{
- ibool find_doc_id = FALSE;
- ulint i;
-
- for (i = 0; i < form->s->fields; i++) {
- Field* field;
+ for (ulint i = 0; i < form->s->fields; i++) {
+ const Field* field;
ulint col_type;
ulint col_len;
ulint unsigned_type;
@@ -8141,21 +8557,19 @@ create_table_check_doc_id_col(
if (innobase_strcasecmp(field->field_name,
FTS_DOC_ID_COL_NAME) == 0) {
- find_doc_id = TRUE;
-
/* Note the name is case sensitive due to
our internal query parser */
if (col_type == DATA_INT
- && !field->null_ptr
+ && !field->real_maybe_null()
&& col_len == sizeof(doc_id_t)
&& (strcmp(field->field_name,
FTS_DOC_ID_COL_NAME) == 0)) {
*doc_id_col = i;
} else {
push_warning_printf(
- (THD*) trx->mysql_thd,
+ trx->mysql_thd,
Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: FTS_DOC_ID column must be "
"of BIGINT NOT NULL type, and named "
"in all capitalized characters");
@@ -8164,38 +8578,39 @@ create_table_check_doc_id_col(
*doc_id_col = ULINT_UNDEFINED;
}
- break;
+ return(true);
}
}
- return(find_doc_id);
+ return(false);
}
/*****************************************************************//**
Creates a table definition to an InnoDB database. */
-static
+static __attribute__((nonnull, warn_unused_result))
int
create_table_def(
/*=============*/
trx_t* trx, /*!< in: InnoDB transaction handle */
- TABLE* form, /*!< in: information on table
+ const TABLE* form, /*!< in: information on table
columns and indexes */
const char* table_name, /*!< in: table name */
- const char* path_of_temp_table,/*!< in: if this is a table explicitly
+ const char* temp_path, /*!< in: if this is a table explicitly
created by the user with the
TEMPORARY keyword, then this
parameter is the dir path where the
table should be placed if we create
an .ibd file for it (no .ibd extension
- in the path, though); otherwise this
- is NULL */
+ in the path, though). Otherwise this
+ is a zero length-string */
+ const char* remote_path, /*!< in: Remote path or zero length-string */
ulint flags, /*!< in: table flags */
ulint flags2) /*!< in: table flags2 */
{
- Field* field;
+ THD* thd = trx->mysql_thd;
dict_table_t* table;
ulint n_cols;
- int error;
+ dberr_t err;
ulint col_type;
ulint col_len;
ulint nulls_allowed;
@@ -8206,17 +8621,18 @@ create_table_def(
ulint i;
ulint doc_id_col = 0;
ibool has_doc_id_col = FALSE;
+ mem_heap_t* heap;
DBUG_ENTER("create_table_def");
DBUG_PRINT("enter", ("table_name: %s", table_name));
- ut_a(trx->mysql_thd != NULL);
+ DBUG_ASSERT(thd != NULL);
/* MySQL does the name length check. But we do additional check
on the name length here */
if (strlen(table_name) > MAX_FULL_NAME_LEN) {
push_warning_printf(
- (THD*) trx->mysql_thd, Sql_condition::WARN_LEVEL_WARN,
+ thd, Sql_condition::WARN_LEVEL_WARN,
ER_TABLE_NAME,
"InnoDB: Table Name or Database Name is too long");
@@ -8228,7 +8644,7 @@ create_table_def(
if (strcmp(strchr(table_name, '/') + 1,
"innodb_table_monitor") == 0) {
push_warning(
- (THD*) trx->mysql_thd, Sql_condition::WARN_LEVEL_WARN,
+ thd, Sql_condition::WARN_LEVEL_WARN,
HA_ERR_WRONG_COMMAND,
DEPRECATED_MSG_INNODB_TABLE_MONITOR);
}
@@ -8242,7 +8658,7 @@ create_table_def(
if (doc_id_col == ULINT_UNDEFINED) {
trx_commit_for_mysql(trx);
- error = DB_ERROR;
+ err = DB_ERROR;
goto error_ret;
} else {
has_doc_id_col = TRUE;
@@ -8270,42 +8686,41 @@ create_table_def(
flags, flags2);
}
- if (path_of_temp_table) {
+ if (flags2 & DICT_TF2_TEMPORARY) {
+ ut_a(strlen(temp_path));
table->dir_path_of_temp_table =
- mem_heap_strdup(table->heap, path_of_temp_table);
+ mem_heap_strdup(table->heap, temp_path);
+ }
+
+ if (DICT_TF_HAS_DATA_DIR(flags)) {
+ ut_a(strlen(remote_path));
+ table->data_dir_path = mem_heap_strdup(table->heap, remote_path);
+ } else {
+ table->data_dir_path = NULL;
}
+ heap = mem_heap_create(1000);
for (i = 0; i < n_cols; i++) {
- field = form->field[i];
+ Field* field = form->field[i];
col_type = get_innobase_type_from_mysql_type(&unsigned_type,
field);
if (!col_type) {
push_warning_printf(
- (THD*) trx->mysql_thd,
- Sql_condition::WARN_LEVEL_WARN,
+ thd, Sql_condition::WARN_LEVEL_WARN,
ER_CANT_CREATE_TABLE,
"Error creating table '%s' with "
"column '%s'. Please check its "
"column type and try to re-create "
"the table with an appropriate "
"column type.",
- table->name, (char*) field->field_name);
+ table->name, field->field_name);
goto err_col;
}
- if (field->null_ptr) {
- nulls_allowed = 0;
- } else {
- nulls_allowed = DATA_NOT_NULL;
- }
-
- if (field->binary()) {
- binary_type = DATA_BINARY_TYPE;
- } else {
- binary_type = 0;
- }
+ nulls_allowed = field->real_maybe_null() ? 0 : DATA_NOT_NULL;
+ binary_type = field->binary() ? DATA_BINARY_TYPE : 0;
charset_no = 0;
@@ -8317,13 +8732,13 @@ create_table_def(
/* in data0type.h we assume that the
number fits in one byte in prtype */
push_warning_printf(
- (THD*) trx->mysql_thd,
- Sql_condition::WARN_LEVEL_WARN,
+ thd, Sql_condition::WARN_LEVEL_WARN,
ER_CANT_CREATE_TABLE,
"In InnoDB, charset-collation codes"
" must be below 256."
" Unsupported code %lu.",
(ulong) charset_no);
+ mem_heap_free(heap);
DBUG_RETURN(ER_CANT_CREATE_TABLE);
}
}
@@ -8355,14 +8770,15 @@ create_table_def(
field->field_name);
err_col:
dict_mem_table_free(table);
+ mem_heap_free(heap);
trx_commit_for_mysql(trx);
- error = DB_ERROR;
+ err = DB_ERROR;
goto error_ret;
}
- dict_mem_table_add_col(table, table->heap,
- (char*) field->field_name,
+ dict_mem_table_add_col(table, heap,
+ field->field_name,
col_type,
dtype_form_prtype(
(ulint) field->type()
@@ -8374,25 +8790,33 @@ err_col:
/* Add the FTS doc_id hidden column. */
if (flags2 & DICT_TF2_FTS && !has_doc_id_col) {
- fts_add_doc_id_column(table);
+ fts_add_doc_id_column(table, heap);
}
- error = row_create_table_for_mysql(table, trx);
+ err = row_create_table_for_mysql(table, trx, false);
- if (error == DB_DUPLICATE_KEY) {
- char buf[100];
+ mem_heap_free(heap);
+
+ if (err == DB_DUPLICATE_KEY || err == DB_TABLESPACE_EXISTS) {
+ char display_name[FN_REFLEN];
char* buf_end = innobase_convert_identifier(
- buf, sizeof buf - 1, table_name, strlen(table_name),
- trx->mysql_thd, TRUE);
+ display_name, sizeof(display_name) - 1,
+ table_name, strlen(table_name),
+ thd, TRUE);
*buf_end = '\0';
- my_error(ER_TABLE_EXISTS_ERROR, MYF(0), buf);
+
+ my_error(err == DB_DUPLICATE_KEY
+ ? ER_TABLE_EXISTS_ERROR
+ : ER_TABLESPACE_EXISTS, MYF(0), display_name);
}
-error_ret:
- error = convert_error_code_to_mysql(error, flags, NULL);
+ if (err == DB_SUCCESS && (flags2 & DICT_TF2_FTS)) {
+ fts_optimize_add_table(table);
+ }
- DBUG_RETURN(error);
+error_ret:
+ DBUG_RETURN(convert_error_code_to_mysql(err, flags, thd));
}
/*****************************************************************//**
@@ -8402,108 +8826,113 @@ int
create_index(
/*=========*/
trx_t* trx, /*!< in: InnoDB transaction handle */
- TABLE* form, /*!< in: information on table
+ const TABLE* form, /*!< in: information on table
columns and indexes */
ulint flags, /*!< in: InnoDB table flags */
const char* table_name, /*!< in: table name */
uint key_num) /*!< in: index number */
{
- Field* field;
dict_index_t* index;
int error;
- ulint n_fields;
- KEY* key;
- KEY_PART_INFO* key_part;
+ const KEY* key;
ulint ind_type;
- ulint col_type;
- ulint prefix_len = 0;
- ulint is_unsigned;
- ulint i;
- ulint j;
- ulint* field_lengths = NULL;
+ ulint* field_lengths;
DBUG_ENTER("create_index");
key = form->key_info + key_num;
- n_fields = key->key_parts;
-
/* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
ut_a(innobase_strcasecmp(key->name, innobase_index_reserve_name) != 0);
- ind_type = 0;
-
if (key->flags & HA_FULLTEXT) {
- ind_type = DICT_FTS;
- } else {
- if (key_num == form->s->primary_key) {
- ind_type = ind_type | DICT_CLUSTERED;
+ index = dict_mem_index_create(table_name, key->name, 0,
+ DICT_FTS,
+ key->user_defined_key_parts);
+
+ for (ulint i = 0; i < key->user_defined_key_parts; i++) {
+ KEY_PART_INFO* key_part = key->key_part + i;
+ dict_mem_index_add_field(
+ index, key_part->field->field_name, 0);
}
- if (key->flags & HA_NOSAME ) {
- ind_type = ind_type | DICT_UNIQUE;
- }
- }
+ DBUG_RETURN(convert_error_code_to_mysql(
+ row_create_index_for_mysql(
+ index, trx, NULL),
+ flags, NULL));
- /* We pass 0 as the space id, and determine at a lower level the space
- id where to store the table */
+ }
- index = dict_mem_index_create(table_name, key->name, 0,
- ind_type, n_fields);
+ ind_type = 0;
- if (ind_type != DICT_FTS) {
- field_lengths = (ulint*) my_malloc(
- sizeof(ulint) * n_fields, MYF(MY_FAE));
+ if (key_num == form->s->primary_key) {
+ ind_type |= DICT_CLUSTERED;
+ }
- ut_ad(!(index->type & DICT_FTS));
+ if (key->flags & HA_NOSAME) {
+ ind_type |= DICT_UNIQUE;
}
- for (i = 0; i < n_fields; i++) {
- key_part = key->key_part + i;
+ field_lengths = (ulint*) my_malloc(
+ key->user_defined_key_parts * sizeof *
+ field_lengths, MYF(MY_FAE));
- if (ind_type != DICT_FTS) {
+ /* We pass 0 as the space id, and determine at a lower level the space
+ id where to store the table */
- /* (The flag HA_PART_KEY_SEG denotes in MySQL a
- column prefix field in an index: we only store a
- specified number of first bytes of the column to
- the index field.) The flag does not seem to be
- properly set by MySQL. Let us fall back on testing
- the length of the key part versus the column. */
+ index = dict_mem_index_create(table_name, key->name, 0,
+ ind_type, key->user_defined_key_parts);
- field = NULL;
+ for (ulint i = 0; i < key->user_defined_key_parts; i++) {
+ KEY_PART_INFO* key_part = key->key_part + i;
+ ulint prefix_len;
+ ulint col_type;
+ ulint is_unsigned;
- for (j = 0; j < form->s->fields; j++) {
- field = form->field[j];
+ /* (The flag HA_PART_KEY_SEG denotes in MySQL a
+ column prefix field in an index: we only store a
+ specified number of first bytes of the column to
+ the index field.) The flag does not seem to be
+ properly set by MySQL. Let us fall back on testing
+ the length of the key part versus the column. */
- if (0 == innobase_strcasecmp(
- field->field_name,
- key_part->field->field_name)) {
- /* Found the corresponding column */
+ Field* field = NULL;
- break;
- }
- }
+ for (ulint j = 0; j < form->s->fields; j++) {
- ut_a(j < form->s->fields);
+ field = form->field[j];
- col_type = get_innobase_type_from_mysql_type(
- &is_unsigned, key_part->field);
+ if (0 == innobase_strcasecmp(
+ field->field_name,
+ key_part->field->field_name)) {
+ /* Found the corresponding column */
- if (DATA_BLOB == col_type
- || (key_part->length < field->pack_length()
- && field->type() != MYSQL_TYPE_VARCHAR)
- || (field->type() == MYSQL_TYPE_VARCHAR
- && key_part->length < field->pack_length()
- - ((Field_varstring*) field)->length_bytes)) {
+ goto found;
+ }
+ }
+ ut_error;
+found:
+ col_type = get_innobase_type_from_mysql_type(
+ &is_unsigned, key_part->field);
+
+ if (DATA_BLOB == col_type
+ || (key_part->length < field->pack_length()
+ && field->type() != MYSQL_TYPE_VARCHAR)
+ || (field->type() == MYSQL_TYPE_VARCHAR
+ && key_part->length < field->pack_length()
+ - ((Field_varstring*) field)->length_bytes)) {
+
+ switch (col_type) {
+ default:
prefix_len = key_part->length;
-
- if (col_type == DATA_INT
- || col_type == DATA_FLOAT
- || col_type == DATA_DOUBLE
- || col_type == DATA_DECIMAL) {
- sql_print_error(
+ break;
+ case DATA_INT:
+ case DATA_FLOAT:
+ case DATA_DOUBLE:
+ case DATA_DECIMAL:
+ sql_print_error(
"MySQL is trying to create a column "
"prefix index field, on an "
"inappropriate data type. Table "
@@ -8511,17 +8940,16 @@ create_index(
table_name,
key_part->field->field_name);
- prefix_len = 0;
- }
- } else {
prefix_len = 0;
}
-
- field_lengths[i] = key_part->length;
+ } else {
+ prefix_len = 0;
}
- dict_mem_index_add_field(index,
- (char*) key_part->field->field_name, prefix_len);
+ field_lengths[i] = key_part->length;
+
+ dict_mem_index_add_field(
+ index, key_part->field->field_name, prefix_len);
}
ut_ad(key->flags & HA_FULLTEXT || !(index->type & DICT_FTS));
@@ -8529,9 +8957,10 @@ create_index(
/* Even though we've defined max_supported_key_part_length, we
still do our own checking using field_lengths to be absolutely
sure we don't create too long indexes. */
- error = row_create_index_for_mysql(index, trx, field_lengths);
- error = convert_error_code_to_mysql(error, flags, NULL);
+ error = convert_error_code_to_mysql(
+ row_create_index_for_mysql(index, trx, field_lengths),
+ flags, NULL);
my_free(field_lengths);
@@ -8550,7 +8979,7 @@ create_clustered_index_when_no_primary(
const char* table_name) /*!< in: table name */
{
dict_index_t* index;
- int error;
+ dberr_t error;
/* We pass 0 as the space id, and determine at a lower level the space
id where to store the table */
@@ -8560,9 +8989,7 @@ create_clustered_index_when_no_primary(
error = row_create_index_for_mysql(index, trx, NULL);
- error = convert_error_code_to_mysql(error, flags, NULL);
-
- return(error);
+ return(convert_error_code_to_mysql(error, flags, NULL));
}
/*****************************************************************//**
@@ -8599,11 +9026,11 @@ get_row_format_name(
if (!use_tablespace) { \
push_warning_printf( \
thd, Sql_condition::WARN_LEVEL_WARN, \
- HA_WRONG_CREATE_OPTION, \
+ ER_ILLEGAL_HA_CREATE_OPTION, \
"InnoDB: ROW_FORMAT=%s requires" \
" innodb_file_per_table.", \
get_row_format_name(row_format)); \
- ret = FALSE; \
+ ret = "ROW_FORMAT"; \
}
/** If file-format is Antelope, issue warning and set ret false */
@@ -8611,11 +9038,11 @@ get_row_format_name(
if (srv_file_format < UNIV_FORMAT_B) { \
push_warning_printf( \
thd, Sql_condition::WARN_LEVEL_WARN, \
- HA_WRONG_CREATE_OPTION, \
+ ER_ILLEGAL_HA_CREATE_OPTION, \
"InnoDB: ROW_FORMAT=%s requires" \
" innodb_file_format > Antelope.", \
get_row_format_name(row_format)); \
- ret = FALSE; \
+ ret = "ROW_FORMAT"; \
}
@@ -8624,11 +9051,11 @@ Validates the create options. We may build on this function
in future. For now, it checks two specifiers:
KEY_BLOCK_SIZE and ROW_FORMAT
If innodb_strict_mode is not set then this function is a no-op
-@return TRUE if valid. */
-static
-ibool
-create_options_are_valid(
-/*=====================*/
+@return NULL if valid, string if not. */
+UNIV_INTERN
+const char*
+create_options_are_invalid(
+/*=======================*/
THD* thd, /*!< in: connection thread. */
TABLE* form, /*!< in: information on table
columns and indexes */
@@ -8636,14 +9063,14 @@ create_options_are_valid(
bool use_tablespace) /*!< in: srv_file_per_table */
{
ibool kbs_specified = FALSE;
- ibool ret = TRUE;
+ const char* ret = NULL;
enum row_type row_format = form->s->row_type;
ut_ad(thd != NULL);
/* If innodb_strict_mode is not set don't do any validation. */
if (!(THDVAR(thd, strict_mode))) {
- return(TRUE);
+ return(NULL);
}
ut_ad(form != NULL);
@@ -8663,18 +9090,18 @@ create_options_are_valid(
if (!use_tablespace) {
push_warning(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: KEY_BLOCK_SIZE requires"
" innodb_file_per_table.");
- ret = FALSE;
+ ret = "KEY_BLOCK_SIZE";
}
if (srv_file_format < UNIV_FORMAT_B) {
push_warning(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: KEY_BLOCK_SIZE requires"
" innodb_file_format > Antelope.");
- ret = FALSE;
+ ret = "KEY_BLOCK_SIZE";
}
/* The maximum KEY_BLOCK_SIZE (KBS) is 16. But if
@@ -8686,22 +9113,22 @@ create_options_are_valid(
if (create_info->key_block_size > kbs_max) {
push_warning_printf(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: KEY_BLOCK_SIZE=%ld"
" cannot be larger than %ld.",
create_info->key_block_size,
kbs_max);
- ret = FALSE;
+ ret = "KEY_BLOCK_SIZE";
}
break;
default:
push_warning_printf(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: invalid KEY_BLOCK_SIZE = %lu."
" Valid values are [1, 2, 4, 8, 16]",
create_info->key_block_size);
- ret = FALSE;
+ ret = "KEY_BLOCK_SIZE";
break;
}
}
@@ -8722,11 +9149,11 @@ create_options_are_valid(
if (kbs_specified) {
push_warning_printf(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: cannot specify ROW_FORMAT = %s"
" with KEY_BLOCK_SIZE.",
get_row_format_name(row_format));
- ret = FALSE;
+ ret = "KEY_BLOCK_SIZE";
}
break;
case ROW_TYPE_DEFAULT:
@@ -8736,12 +9163,42 @@ create_options_are_valid(
case ROW_TYPE_NOT_USED:
push_warning(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION, \
+ ER_ILLEGAL_HA_CREATE_OPTION, \
"InnoDB: invalid ROW_FORMAT specifier.");
- ret = FALSE;
+ ret = "ROW_TYPE";
break;
}
+ /* Use DATA DIRECTORY only with file-per-table. */
+ if (create_info->data_file_name && !use_tablespace) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: DATA DIRECTORY requires"
+ " innodb_file_per_table.");
+ ret = "DATA DIRECTORY";
+ }
+
+ /* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
+ if (create_info->data_file_name
+ && create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: DATA DIRECTORY cannot be used"
+ " for TEMPORARY tables.");
+ ret = "DATA DIRECTORY";
+ }
+
+ /* Do not allow INDEX_DIRECTORY */
+ if (create_info->index_file_name) {
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: INDEX DIRECTORY is not supported");
+ ret = "INDEX DIRECTORY";
+ }
+
return(ret);
}
@@ -8757,11 +9214,18 @@ ha_innobase::update_create_info(
ha_innobase::info(HA_STATUS_AUTO);
create_info->auto_increment_value = stats.auto_increment_value;
}
+
+ /* Update the DATA DIRECTORY name from SYS_DATAFILES. */
+ dict_get_and_save_data_dir_path(prebuilt->table, false);
+
+ if (prebuilt->table->data_dir_path) {
+ create_info->data_file_name = prebuilt->table->data_dir_path;
+ }
}
/*****************************************************************//**
Initialize the table FTS stopword list
-@TRUE if succeed */
+@return TRUE if success */
UNIV_INTERN
ibool
innobase_fts_load_stopword(
@@ -8770,68 +9234,38 @@ innobase_fts_load_stopword(
trx_t* trx, /*!< in: transaction */
THD* thd) /*!< in: current thread */
{
- return (fts_load_stopword(table, trx,
- fts_server_stopword_table,
- THDVAR(thd, ft_user_stopword_table),
- THDVAR(thd, ft_enable_stopword), FALSE));
+ return(fts_load_stopword(table, trx,
+ fts_server_stopword_table,
+ THDVAR(thd, ft_user_stopword_table),
+ THDVAR(thd, ft_enable_stopword), FALSE));
}
+
/*****************************************************************//**
-Creates a new table to an InnoDB database.
-@return error number */
+Parses the table name into normal name and either temp path or remote path
+if needed.
+@return 0 if successful, otherwise, error number */
UNIV_INTERN
int
-ha_innobase::create(
-/*================*/
- const char* name, /*!< in: table name */
- TABLE* form, /*!< in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info) /*!< in: more information of the
+ha_innobase::parse_table_name(
+/*==========================*/
+ const char* name, /*!< in/out: table name provided*/
+ HA_CREATE_INFO* create_info, /*!< in: more information of the
created table, contains also the
create statement string */
+ ulint flags, /*!< in: flags*/
+ ulint flags2, /*!< in: flags2*/
+ char* norm_name, /*!< out: normalized table name */
+ char* temp_path, /*!< out: absolute path of table */
+ char* remote_path) /*!< out: remote path of table */
{
- int error;
- trx_t* parent_trx;
- trx_t* trx;
- int primary_key_no;
- uint i;
- char name2[FN_REFLEN];
- char norm_name[FN_REFLEN];
THD* thd = ha_thd();
- ib_int64_t auto_inc_value;
- ulint fts_indexes = 0;
- ibool zip_allowed = TRUE;
- enum row_type row_format;
- rec_format_t innodb_row_format = REC_FORMAT_COMPACT;
-
- /* Cache the global variable "srv_file_per_table" to a local
- variable before using it. Note that "srv_file_per_table"
- is not under dict_sys mutex protection, and could be changed
- while creating the table. So we read the current value here
- and make all further decisions based on this. */
- bool use_tablespace = srv_file_per_table;
-
- /* Zip Shift Size - log2 - 9 of compressed page size,
- zero for uncompressed */
- ulint zip_ssize = 0;
- ulint flags = 0;
- ulint flags2 = 0;
- dict_table_t* innobase_table = NULL;
-
- /* Cache the value of innodb_file_format, in case it is
- modified by another thread while the table is being created. */
- const ulint file_format_allowed = srv_file_format;
- const char* stmt;
- size_t stmt_len;
-
- DBUG_ENTER("ha_innobase::create");
-
- DBUG_ASSERT(thd != NULL);
- DBUG_ASSERT(create_info != NULL);
+ bool use_tablespace = flags2 & DICT_TF2_USE_TABLESPACE;
+ DBUG_ENTER("ha_innobase::parse_table_name");
#ifdef __WIN__
/* Names passed in from server are in two formats:
1. <database_name>/<table_name>: for normal table creation
- 2. full path: for temp table creation, or sym link
+ 2. full path: for temp table creation, or DATA DIRECTORY.
When srv_file_per_table is on and mysqld_embedded is off,
check for full path pattern, i.e.
@@ -8842,7 +9276,7 @@ ha_innobase::create(
if (use_tablespace
&& !mysqld_embedded
- && (!create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
+ && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
if ((name[1] == ':')
|| (name[0] == '\\' && name[1] == '\\')) {
@@ -8852,26 +9286,113 @@ ha_innobase::create(
}
#endif
- if (form->s->fields > 1000) {
- /* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
- but we play safe here */
+ normalize_table_name(norm_name, name);
+ temp_path[0] = '\0';
+ remote_path[0] = '\0';
- DBUG_RETURN(HA_ERR_TO_BIG_ROW);
+ /* A full path is used for TEMPORARY TABLE and DATA DIRECTORY.
+ In the case of;
+ CREATE TEMPORARY TABLE ... DATA DIRECTORY={path} ... ;
+ We ignore the DATA DIRECTORY. */
+ if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+ strncpy(temp_path, name, FN_REFLEN - 1);
}
+ if (create_info->data_file_name) {
+ bool ignore = false;
+
+ /* Use DATA DIRECTORY only with file-per-table. */
+ if (!use_tablespace) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: DATA DIRECTORY requires"
+ " innodb_file_per_table.");
+ ignore = true;
+ }
+
+ /* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
+ if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+ push_warning(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_ILLEGAL_HA_CREATE_OPTION,
+ "InnoDB: DATA DIRECTORY cannot be"
+ " used for TEMPORARY tables.");
+ ignore = true;
+ }
+
+ if (ignore) {
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ WARN_OPTION_IGNORED,
+ ER_DEFAULT(WARN_OPTION_IGNORED),
+ "DATA DIRECTORY");
+ } else {
+ strncpy(remote_path, create_info->data_file_name,
+ FN_REFLEN - 1);
+ }
+ }
+
+ if (create_info->index_file_name) {
+ push_warning_printf(
+ thd, Sql_condition::WARN_LEVEL_WARN,
+ WARN_OPTION_IGNORED,
+ ER_DEFAULT(WARN_OPTION_IGNORED),
+ "INDEX DIRECTORY");
+ }
+
+ DBUG_RETURN(0);
+}
+
+/*****************************************************************//**
+Determines InnoDB table flags.
+@retval true if successful, false if error */
+UNIV_INTERN
+bool
+innobase_table_flags(
+/*=================*/
+ const TABLE* form, /*!< in: table */
+ const HA_CREATE_INFO* create_info, /*!< in: information
+ on table columns and indexes */
+ THD* thd, /*!< in: connection */
+ bool use_tablespace, /*!< in: whether to create
+ outside system tablespace */
+ ulint* flags, /*!< out: DICT_TF flags */
+ ulint* flags2) /*!< out: DICT_TF2 flags */
+{
+ DBUG_ENTER("innobase_table_flags");
+
+ const char* fts_doc_id_index_bad = NULL;
+ bool zip_allowed = true;
+ ulint zip_ssize = 0;
+ enum row_type row_format;
+ rec_format_t innodb_row_format = REC_FORMAT_COMPACT;
+ bool use_data_dir;
+
+ /* Cache the value of innodb_file_format, in case it is
+ modified by another thread while the table is being created. */
+ const ulint file_format_allowed = srv_file_format;
+
+ *flags = 0;
+ *flags2 = 0;
+
/* Check if there are any FTS indexes defined on this table. */
- for (i = 0; i < form->s->keys; i++) {
- KEY* key = form->key_info + i;
+ for (uint i = 0; i < form->s->keys; i++) {
+ const KEY* key = &form->key_info[i];
if (key->flags & HA_FULLTEXT) {
- ++fts_indexes;
+ *flags2 |= DICT_TF2_FTS;
/* We don't support FTS indexes in temporary
tables. */
if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
my_error(ER_INNODB_NO_FT_TEMP_TABLE, MYF(0));
- DBUG_RETURN(-1);
+ DBUG_RETURN(false);
+ }
+
+ if (fts_doc_id_index_bad) {
+ goto index_bad;
}
}
@@ -8884,41 +9405,15 @@ ha_innobase::create(
|| strcmp(key->name, FTS_DOC_ID_INDEX_NAME)
|| strcmp(key->key_part[0].field->field_name,
FTS_DOC_ID_COL_NAME)) {
- push_warning_printf(thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_NAME_FOR_INDEX,
- " InnoDB: Index name %s is reserved"
- " for the unique index on"
- " FTS_DOC_ID column for FTS"
- " document ID indexing"
- " on table %s. Please check"
- " the index definition to"
- " make sure it is of correct"
- " type\n",
- FTS_DOC_ID_INDEX_NAME,
- name);
- my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
- FTS_DOC_ID_INDEX_NAME);
- DBUG_RETURN(-1);
+ fts_doc_id_index_bad = key->name;
}
- }
-
- strcpy(name2, name);
-
- normalize_table_name(norm_name, name2);
- /* Create the table definition in InnoDB */
-
- flags = 0;
-
- if (fts_indexes > 0) {
- flags2 = DICT_TF2_FTS;
- }
-
- /* Validate create options if innodb_strict_mode is set. */
- if (!create_options_are_valid(
- thd, form, create_info, use_tablespace)) {
- DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+ if (fts_doc_id_index_bad && (*flags2 & DICT_TF2_FTS)) {
+index_bad:
+ my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
+ fts_doc_id_index_bad);
+ DBUG_RETURN(false);
+ }
}
if (create_info->key_block_size) {
@@ -8942,7 +9437,7 @@ ha_innobase::create(
if (!use_tablespace) {
push_warning(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: KEY_BLOCK_SIZE requires"
" innodb_file_per_table.");
zip_allowed = FALSE;
@@ -8951,7 +9446,7 @@ ha_innobase::create(
if (file_format_allowed < UNIV_FORMAT_B) {
push_warning(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: KEY_BLOCK_SIZE requires"
" innodb_file_format > Antelope.");
zip_allowed = FALSE;
@@ -8962,7 +9457,7 @@ ha_innobase::create(
PAGE_ZIP_SSIZE_MAX)) {
push_warning_printf(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: ignoring KEY_BLOCK_SIZE=%lu.",
create_info->key_block_size);
}
@@ -8984,7 +9479,7 @@ ha_innobase::create(
with ALTER TABLE anyway. */
push_warning_printf(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: ignoring KEY_BLOCK_SIZE=%lu"
" unless ROW_FORMAT=COMPRESSED.",
create_info->key_block_size);
@@ -9012,14 +9507,14 @@ ha_innobase::create(
if (!use_tablespace) {
push_warning_printf(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: ROW_FORMAT=%s requires"
" innodb_file_per_table.",
get_row_format_name(row_format));
} else if (file_format_allowed == UNIV_FORMAT_A) {
push_warning_printf(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: ROW_FORMAT=%s requires"
" innodb_file_format > Antelope.",
get_row_format_name(row_format));
@@ -9036,7 +9531,7 @@ ha_innobase::create(
case ROW_TYPE_PAGE:
push_warning(
thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
+ ER_ILLEGAL_HA_CREATE_OPTION,
"InnoDB: assuming ROW_FORMAT=COMPACT.");
case ROW_TYPE_DEFAULT:
/* If we fell through, set row format to Compact. */
@@ -9049,12 +9544,100 @@ ha_innobase::create(
if (!zip_allowed) {
zip_ssize = 0;
}
- dict_tf_set(&flags, innodb_row_format, zip_ssize);
+
+ use_data_dir = use_tablespace
+ && ((create_info->data_file_name != NULL)
+ && !(create_info->options & HA_LEX_CREATE_TMP_TABLE));
+
+ dict_tf_set(flags, innodb_row_format, zip_ssize, use_data_dir);
+
+ if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+ *flags2 |= DICT_TF2_TEMPORARY;
+ }
+
+ if (use_tablespace) {
+ *flags2 |= DICT_TF2_USE_TABLESPACE;
+ }
+
+ DBUG_RETURN(true);
+}
+
+/*****************************************************************//**
+Creates a new table to an InnoDB database.
+@return error number */
+UNIV_INTERN
+int
+ha_innobase::create(
+/*================*/
+ const char* name, /*!< in: table name */
+ TABLE* form, /*!< in: information on table
+ columns and indexes */
+ HA_CREATE_INFO* create_info) /*!< in: more information of the
+ created table, contains also the
+ create statement string */
+{
+ int error;
+ trx_t* parent_trx;
+ trx_t* trx;
+ int primary_key_no;
+ uint i;
+ char norm_name[FN_REFLEN]; /* {database}/{tablename} */
+ char temp_path[FN_REFLEN]; /* absolute path of temp frm */
+ char remote_path[FN_REFLEN]; /* absolute path of table */
+ THD* thd = ha_thd();
+ ib_int64_t auto_inc_value;
+
+ /* Cache the global variable "srv_file_per_table" to a local
+ variable before using it. Note that "srv_file_per_table"
+ is not under dict_sys mutex protection, and could be changed
+ while creating the table. So we read the current value here
+ and make all further decisions based on this. */
+ bool use_tablespace = srv_file_per_table;
+
+ /* Zip Shift Size - log2 - 9 of compressed page size,
+ zero for uncompressed */
+ ulint flags;
+ ulint flags2;
+ dict_table_t* innobase_table = NULL;
+
+ const char* stmt;
+ size_t stmt_len;
+
+ DBUG_ENTER("ha_innobase::create");
+
+ DBUG_ASSERT(thd != NULL);
+ DBUG_ASSERT(create_info != NULL);
+
+ if (form->s->fields > REC_MAX_N_USER_FIELDS) {
+ DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS);
+ } else if (srv_read_only_mode) {
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ }
+
+ /* Create the table definition in InnoDB */
+
+ /* Validate create options if innodb_strict_mode is set. */
+ if (create_options_are_invalid(
+ thd, form, create_info, use_tablespace)) {
+ DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+ }
+
+ if (!innobase_table_flags(form, create_info,
+ thd, use_tablespace,
+ &flags, &flags2)) {
+ DBUG_RETURN(-1);
+ }
+
+ error = parse_table_name(name, create_info, flags, flags2,
+ norm_name, temp_path, remote_path);
+ if (error) {
+ DBUG_RETURN(error);
+ }
/* Look for a primary key */
primary_key_no = (form->s->primary_key != MAX_KEY ?
- (int) form->s->primary_key :
- -1);
+ (int) form->s->primary_key :
+ -1);
/* Our function innobase_get_mysql_key_number_for_index assumes
the primary key is always number 0, if it exists */
@@ -9071,14 +9654,6 @@ ha_innobase::create(
DBUG_RETURN(HA_ERR_GENERIC);
}
- if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
- flags2 |= DICT_TF2_TEMPORARY;
- }
-
- if (use_tablespace) {
- flags2 |= DICT_TF2_USE_TABLESPACE;
- }
-
/* Get the transaction associated with the current thd, or create one
if not yet created */
@@ -9097,10 +9672,8 @@ ha_innobase::create(
row_mysql_lock_data_dictionary(trx);
- error = create_table_def(trx, form, norm_name,
- create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
- flags, flags2);
-
+ error = create_table_def(trx, form, norm_name, temp_path,
+ remote_path, flags, flags2);
if (error) {
goto cleanup;
}
@@ -9130,20 +9703,20 @@ ha_innobase::create(
/* Create the ancillary tables that are common to all FTS indexes on
this table. */
- if (fts_indexes > 0) {
- ulint ret = 0;
+ if (flags2 & DICT_TF2_FTS) {
+ enum fts_doc_id_index_enum ret;
- innobase_table = dict_table_open_on_name_no_stats(
- norm_name, TRUE, DICT_ERR_IGNORE_NONE);
+ innobase_table = dict_table_open_on_name(
+ norm_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
ut_a(innobase_table);
- /* Check whether there alreadys exist FTS_DOC_ID_INDEX */
+ /* Check whether there already exists FTS_DOC_ID_INDEX */
ret = innobase_fts_check_doc_id_index_in_def(
form->s->keys, form->s->key_info);
- /* Raise error if FTS_DOC_ID_INDEX is of wrong format */
- if (ret == FTS_INCORRECT_DOC_ID_INDEX) {
+ switch (ret) {
+ case FTS_INCORRECT_DOC_ID_INDEX:
push_warning_printf(thd,
Sql_condition::WARN_LEVEL_WARN,
ER_WRONG_NAME_FOR_INDEX,
@@ -9162,20 +9735,23 @@ ha_innobase::create(
fts_free(innobase_table);
}
- dict_table_close(innobase_table, TRUE);
+ dict_table_close(innobase_table, TRUE, FALSE);
my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
FTS_DOC_ID_INDEX_NAME);
error = -1;
goto cleanup;
+ case FTS_EXIST_DOC_ID_INDEX:
+ case FTS_NOT_EXIST_DOC_ID_INDEX:
+ break;
}
- error = fts_create_common_tables(
+ dberr_t err = fts_create_common_tables(
trx, innobase_table, norm_name,
(ret == FTS_EXIST_DOC_ID_INDEX));
- error = convert_error_code_to_mysql(error, 0, NULL);
+ error = convert_error_code_to_mysql(err, 0, NULL);
- dict_table_close(innobase_table, TRUE);
+ dict_table_close(innobase_table, TRUE, FALSE);
if (error) {
goto cleanup;
@@ -9196,11 +9772,11 @@ ha_innobase::create(
stmt = innobase_get_stmt(thd, &stmt_len);
if (stmt) {
- error = row_table_add_foreign_constraints(
+ dberr_t err = row_table_add_foreign_constraints(
trx, stmt, stmt_len, norm_name,
create_info->options & HA_LEX_CREATE_TMP_TABLE);
- switch (error) {
+ switch (err) {
case DB_PARENT_NO_INDEX:
push_warning_printf(
@@ -9221,9 +9797,11 @@ ha_innobase::create(
" table where referencing columns appear"
" as the first columns.\n", norm_name);
break;
+ default:
+ break;
}
- error = convert_error_code_to_mysql(error, flags, NULL);
+ error = convert_error_code_to_mysql(err, flags, NULL);
if (error) {
goto cleanup;
@@ -9231,7 +9809,7 @@ ha_innobase::create(
}
/* Cache all the FTS indexes on this table in the FTS specific
structure. They are used for FTS indexed column update handling. */
- if (fts_indexes > 0) {
+ if (flags2 & DICT_TF2_FTS) {
fts_t* fts = innobase_table->fts;
ut_a(fts != NULL);
@@ -9249,10 +9827,15 @@ ha_innobase::create(
log_buffer_flush_to_disk();
- innobase_table = dict_table_open_on_name(norm_name, FALSE);
+ innobase_table = dict_table_open_on_name(
+ norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
DBUG_ASSERT(innobase_table != 0);
+ innobase_copy_frm_flags_from_create_info(innobase_table, create_info);
+
+ dict_stats_update(innobase_table, DICT_STATS_EMPTY_TABLE);
+
if (innobase_table) {
/* We update the highest file format in the system table
space, if this table has higher file format setting. */
@@ -9263,9 +9846,9 @@ ha_innobase::create(
}
/* Load server stopword into FTS cache */
- if (fts_indexes > 0) {
+ if (flags2 & DICT_TF2_FTS) {
if (!innobase_fts_load_stopword(innobase_table, NULL, thd)) {
- dict_table_close(innobase_table, FALSE);
+ dict_table_close(innobase_table, FALSE, FALSE);
srv_active_wake_master_thread();
trx_free_for_mysql(trx);
DBUG_RETURN(-1);
@@ -9302,7 +9885,7 @@ ha_innobase::create(
dict_table_autoinc_unlock(innobase_table);
}
- dict_table_close(innobase_table, FALSE);
+ dict_table_close(innobase_table, FALSE, FALSE);
/* Tell the InnoDB server that there might be work for
utility threads: */
@@ -9314,7 +9897,7 @@ ha_innobase::create(
DBUG_RETURN(0);
cleanup:
- innobase_commit_low(trx);
+ trx_rollback_for_mysql(trx);
row_mysql_unlock_data_dictionary(trx);
@@ -9332,9 +9915,8 @@ ha_innobase::discard_or_import_tablespace(
/*======================================*/
my_bool discard) /*!< in: TRUE if discard, else import */
{
+ dberr_t err;
dict_table_t* dict_table;
- trx_t* trx;
- int err;
DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
@@ -9342,18 +9924,85 @@ ha_innobase::discard_or_import_tablespace(
ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+ if (srv_read_only_mode) {
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ }
+
dict_table = prebuilt->table;
- trx = prebuilt->trx;
- if (discard) {
- err = row_discard_tablespace_for_mysql(dict_table->name, trx);
+ if (dict_table->space == TRX_SYS_SPACE) {
+
+ ib_senderrf(
+ prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_IN_SYSTEM_TABLESPACE,
+ table->s->table_name.str);
+
+ DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
+ }
+
+ trx_start_if_not_started(prebuilt->trx);
+
+ /* In case MySQL calls this in the middle of a SELECT query, release
+ possible adaptive hash latch to avoid deadlocks of threads. */
+ trx_search_latch_release_if_reserved(prebuilt->trx);
+
+ /* Obtain an exclusive lock on the table. */
+ err = row_mysql_lock_table(
+ prebuilt->trx, dict_table, LOCK_X,
+ discard ? "setting table lock for DISCARD TABLESPACE"
+ : "setting table lock for IMPORT TABLESPACE");
+
+ if (err != DB_SUCCESS) {
+ /* unable to lock the table: do nothing */
+ } else if (discard) {
+
+ /* Discarding an already discarded tablespace should be an
+ idempotent operation. Also, if the .ibd file is missing the
+ user may want to set the DISCARD flag in order to IMPORT
+ a new tablespace. */
+
+ if (dict_table->ibd_file_missing) {
+ ib_senderrf(
+ prebuilt->trx->mysql_thd,
+ IB_LOG_LEVEL_WARN, ER_TABLESPACE_MISSING,
+ table->s->table_name.str);
+ }
+
+ err = row_discard_tablespace_for_mysql(
+ dict_table->name, prebuilt->trx);
+
+ } else if (!dict_table->ibd_file_missing) {
+ /* Commit the transaction in order to
+ release the table lock. */
+ trx_commit_for_mysql(prebuilt->trx);
+
+ ib_senderrf(
+ prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_EXISTS, table->s->table_name.str);
+
+ DBUG_RETURN(HA_ERR_TABLE_EXIST);
} else {
- err = row_import_tablespace_for_mysql(dict_table->name, trx);
+ err = row_import_for_mysql(dict_table, prebuilt);
+
+ if (err == DB_SUCCESS) {
+
+ if (table->found_next_number_field) {
+ dict_table_autoinc_lock(dict_table);
+ innobase_initialize_autoinc();
+ dict_table_autoinc_unlock(dict_table);
+ }
+
+ info(HA_STATUS_TIME
+ | HA_STATUS_CONST
+ | HA_STATUS_VARIABLE
+ | HA_STATUS_AUTO);
+ }
}
- err = convert_error_code_to_mysql(err, dict_table->flags, NULL);
+ /* Commit the transaction in order to release the table lock. */
+ trx_commit_for_mysql(prebuilt->trx);
- DBUG_RETURN(err);
+ DBUG_RETURN(convert_error_code_to_mysql(err, dict_table->flags, NULL));
}
/*****************************************************************//**
@@ -9364,10 +10013,15 @@ int
ha_innobase::truncate()
/*===================*/
{
+ dberr_t err;
int error;
DBUG_ENTER("ha_innobase::truncate");
+ if (srv_read_only_mode) {
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ }
+
/* Get the transaction associated with the current thd, or create one
if not yet created, and update prebuilt->trx */
@@ -9378,11 +10032,28 @@ ha_innobase::truncate()
}
/* Truncate the table in InnoDB */
- error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
+ err = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
- error = convert_error_code_to_mysql(error, prebuilt->table->flags,
- NULL);
+ switch (err) {
+ case DB_TABLESPACE_DELETED:
+ case DB_TABLESPACE_NOT_FOUND:
+ ib_senderrf(
+ prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ (err == DB_TABLESPACE_DELETED ?
+ ER_TABLESPACE_DISCARDED : ER_TABLESPACE_MISSING),
+ table->s->table_name.str);
+ table->status = STATUS_NOT_FOUND;
+ error = HA_ERR_NO_SUCH_TABLE;
+ break;
+
+ default:
+ error = convert_error_code_to_mysql(
+ err, prebuilt->table->flags,
+ prebuilt->trx->mysql_thd);
+ table->status = STATUS_NOT_FOUND;
+ break;
+ }
DBUG_RETURN(error);
}
@@ -9400,12 +10071,11 @@ ha_innobase::delete_table(
const char* name) /*!< in: table name */
{
ulint name_len;
- int error;
+ dberr_t err;
trx_t* parent_trx;
trx_t* trx;
- THD *thd = ha_thd();
- char norm_name[1000];
- char errstr[1024];
+ THD* thd = ha_thd();
+ char norm_name[FN_REFLEN];
DBUG_ENTER("ha_innobase::delete_table");
@@ -9413,29 +10083,21 @@ ha_innobase::delete_table(
"test_normalize_table_name_low",
test_normalize_table_name_low();
);
+ DBUG_EXECUTE_IF(
+ "test_ut_format_name",
+ test_ut_format_name();
+ );
/* Strangely, MySQL passes the table name without the '.frm'
extension, in contrast to ::create */
normalize_table_name(norm_name, name);
- if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) {
+ if (srv_read_only_mode) {
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ } else if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) {
DBUG_RETURN(HA_ERR_GENERIC);
}
- /* Remove stats for this table and all of its indexes from the
- persistent storage if it exists and if there are stats for this
- table in there. This function creates its own trx and commits
- it. */
- error = dict_stats_delete_table_stats(norm_name,
- errstr, sizeof(errstr));
- if (error != DB_SUCCESS) {
- push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_LOCK_WAIT_TIMEOUT, errstr);
- }
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
parent_trx = check_trx_exists(thd);
/* In case MySQL calls this in the middle of a SELECT query, release
@@ -9456,14 +10118,14 @@ ha_innobase::delete_table(
/* We are doing a DDL operation. */
++trx->will_lock;
+ trx->ddl = true;
/* Drop the table in InnoDB */
- error = row_drop_table_for_mysql(norm_name, trx,
- thd_sql_command(thd)
- == SQLCOM_DROP_DB);
+ err = row_drop_table_for_mysql(
+ norm_name, trx, thd_sql_command(thd) == SQLCOM_DROP_DB);
- if (error == DB_TABLE_NOT_FOUND
+ if (err == DB_TABLE_NOT_FOUND
&& innobase_get_lower_case_table_names() == 1) {
char* is_part = NULL;
#ifdef __WIN__
@@ -9473,25 +10135,25 @@ ha_innobase::delete_table(
#endif /* __WIN__ */
if (is_part) {
- char par_case_name[MAX_FULL_NAME_LEN + 1];
+ char par_case_name[FN_REFLEN];
#ifndef __WIN__
/* Check for the table using lower
case name, including the partition
separator "P" */
- memcpy(par_case_name, norm_name, strlen(norm_name));
- par_case_name[strlen(norm_name)] = 0;
+ strcpy(par_case_name, norm_name);
innobase_casedn_str(par_case_name);
#else
/* On Windows platfrom, check
whether there exists table name in
system table whose name is
not being normalized to lower case */
- normalize_table_name_low(par_case_name, name, FALSE);
+ normalize_table_name_low(
+ par_case_name, name, FALSE);
#endif
- error = row_drop_table_for_mysql(par_case_name, trx,
- thd_sql_command(thd)
- == SQLCOM_DROP_DB);
+ err = row_drop_table_for_mysql(
+ par_case_name, trx,
+ thd_sql_command(thd) == SQLCOM_DROP_DB);
}
}
@@ -9510,9 +10172,7 @@ ha_innobase::delete_table(
trx_free_for_mysql(trx);
- error = convert_error_code_to_mysql(error, 0, NULL);
-
- DBUG_RETURN(error);
+ DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
}
/*****************************************************************//**
@@ -9538,6 +10198,10 @@ innobase_drop_database(
DBUG_ASSERT(hton == innodb_hton_ptr);
+ if (srv_read_only_mode) {
+ return;
+ }
+
/* In the Windows plugin, thd = current_thd is always NULL */
if (thd) {
trx_t* parent_trx = check_trx_exists(thd);
@@ -9593,36 +10257,36 @@ innobase_drop_database(
innobase_commit_low(trx);
trx_free_for_mysql(trx);
}
+
/*********************************************************************//**
Renames an InnoDB table.
-@return 0 or error code */
-static
-int
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
innobase_rename_table(
/*==================*/
trx_t* trx, /*!< in: transaction */
const char* from, /*!< in: old name of the table */
- const char* to, /*!< in: new name of the table */
- ibool lock_and_commit)
- /*!< in: TRUE=lock data dictionary and commit */
+ const char* to) /*!< in: new name of the table */
{
- int error;
- char* norm_to;
- char* norm_from;
+ dberr_t error;
+ char norm_to[FN_REFLEN];
+ char norm_from[FN_REFLEN];
- // Magic number 64 arbitrary
- norm_to = (char*) my_malloc(strlen(to) + 64, MYF(0));
- norm_from = (char*) my_malloc(strlen(from) + 64, MYF(0));
+ DBUG_ENTER("innobase_rename_table");
+ DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+
+ ut_ad(!srv_read_only_mode);
normalize_table_name(norm_to, to);
normalize_table_name(norm_from, from);
+ DEBUG_SYNC_C("innodb_rename_table_ready");
+
/* Serialize data dictionary operations with dictionary mutex:
no deadlocks can occur then in these operations */
- if (lock_and_commit) {
- row_mysql_lock_data_dictionary(trx);
- }
+ row_mysql_lock_data_dictionary(trx);
/* Transaction must be flagged as a locking transaction or it hasn't
been started yet. */
@@ -9630,7 +10294,7 @@ innobase_rename_table(
ut_a(trx->will_lock > 0);
error = row_rename_table_for_mysql(
- norm_from, norm_to, trx, lock_and_commit);
+ norm_from, norm_to, trx, TRUE);
if (error != DB_SUCCESS) {
if (error == DB_TABLE_NOT_FOUND
@@ -9643,39 +10307,36 @@ innobase_rename_table(
#endif /* __WIN__ */
if (is_part) {
- char par_case_name[MAX_FULL_NAME_LEN + 1];
-
+ char par_case_name[FN_REFLEN];
#ifndef __WIN__
/* Check for the table using lower
case name, including the partition
separator "P" */
- memcpy(par_case_name, norm_from,
- strlen(norm_from));
- par_case_name[strlen(norm_from)] = 0;
+ strcpy(par_case_name, norm_from);
innobase_casedn_str(par_case_name);
#else
/* On Windows platfrom, check
whether there exists table name in
system table whose name is
not being normalized to lower case */
- normalize_table_name_low(par_case_name,
- from, FALSE);
+ normalize_table_name_low(
+ par_case_name, from, FALSE);
#endif
error = row_rename_table_for_mysql(
- par_case_name, norm_to, trx,
- lock_and_commit);
-
+ par_case_name, norm_to, trx, TRUE);
}
}
if (error != DB_SUCCESS) {
- FILE* ef = dict_foreign_err_file;
-
- fputs("InnoDB: Renaming table ", ef);
- ut_print_name(ef, trx, TRUE, norm_from);
- fputs(" to ", ef);
- ut_print_name(ef, trx, TRUE, norm_to);
- fputs(" failed!\n", ef);
+ if (!srv_read_only_mode) {
+ FILE* ef = dict_foreign_err_file;
+
+ fputs("InnoDB: Renaming table ", ef);
+ ut_print_name(ef, trx, TRUE, norm_from);
+ fputs(" to ", ef);
+ ut_print_name(ef, trx, TRUE, norm_to);
+ fputs(" failed!\n", ef);
+ }
} else {
#ifndef __WIN__
sql_print_warning("Rename partition table %s "
@@ -9696,20 +10357,15 @@ innobase_rename_table(
}
}
- if (lock_and_commit) {
- row_mysql_unlock_data_dictionary(trx);
-
- /* Flush the log to reduce probability that the .frm
- files and the InnoDB data dictionary get out-of-sync
- if the user runs with innodb_flush_log_at_trx_commit = 0 */
+ row_mysql_unlock_data_dictionary(trx);
- log_buffer_flush_to_disk();
- }
+ /* Flush the log to reduce probability that the .frm
+ files and the InnoDB data dictionary get out-of-sync
+ if the user runs with innodb_flush_log_at_trx_commit = 0 */
- my_free(norm_to);
- my_free(norm_from);
+ log_buffer_flush_to_disk();
- return(error);
+ DBUG_RETURN(error);
}
/*********************************************************************//**
@@ -9723,12 +10379,17 @@ ha_innobase::rename_table(
const char* to) /*!< in: new name of the table */
{
trx_t* trx;
- int error;
+ dberr_t error;
trx_t* parent_trx;
THD* thd = ha_thd();
DBUG_ENTER("ha_innobase::rename_table");
+ if (srv_read_only_mode) {
+ ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ }
+
/* Get the transaction associated with the current thd, or create one
if not yet created */
@@ -9741,15 +10402,11 @@ ha_innobase::rename_table(
trx = innobase_trx_allocate(thd);
- /* Either the transaction is already flagged as a locking transaction
- or it hasn't been started yet. */
-
- ut_a(!trx_is_started(trx) || trx->will_lock > 0);
-
/* We are doing a DDL operation. */
++trx->will_lock;
+ trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
- error = innobase_rename_table(trx, from, to, TRUE);
+ error = innobase_rename_table(trx, from, to);
DEBUG_SYNC(thd, "after_innobase_rename_table");
@@ -9761,6 +10418,27 @@ ha_innobase::rename_table(
innobase_commit_low(trx);
trx_free_for_mysql(trx);
+ if (error == DB_SUCCESS) {
+ char norm_from[MAX_FULL_NAME_LEN];
+ char norm_to[MAX_FULL_NAME_LEN];
+ char errstr[512];
+ dberr_t ret;
+
+ normalize_table_name(norm_from, from);
+ normalize_table_name(norm_to, to);
+
+ ret = dict_stats_rename_table(norm_from, norm_to,
+ errstr, sizeof(errstr));
+
+ if (ret != DB_SUCCESS) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: %s\n", errstr);
+
+ push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_LOCK_WAIT_TIMEOUT, errstr);
+ }
+ }
+
/* Add a special case to handle the Duplicated Key error
and return DB_ERROR instead.
This is to avoid a possible SIGSEGV error from mysql error
@@ -9773,15 +10451,13 @@ ha_innobase::rename_table(
the dup key error here is due to an existing table whose name
is the one we are trying to rename to) and return the generic
error code. */
- if (error == (int) DB_DUPLICATE_KEY) {
+ if (error == DB_DUPLICATE_KEY) {
my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to);
error = DB_ERROR;
}
- error = convert_error_code_to_mysql(error, 0, NULL);
-
- DBUG_RETURN(error);
+ DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
}
/*********************************************************************//**
@@ -9840,7 +10516,7 @@ ha_innobase::records_in_range(
goto func_exit;
}
- key_parts= key->key_parts;
+ key_parts= key->ext_key_parts;
if ((min_key && min_key->keypart_map>=(key_part_map) (1<<key_parts)) ||
(max_key && max_key->keypart_map>=(key_part_map) (1<<key_parts)))
key_parts= key->ext_key_parts;
@@ -9848,11 +10524,11 @@ ha_innobase::records_in_range(
heap = mem_heap_create(2 * (key_parts * sizeof(dfield_t)
+ sizeof(dtuple_t)));
- range_start = dtuple_create(heap, key_parts);
- dict_index_copy_types(range_start, index, key_parts);
+ range_start = dtuple_create(heap, key_parts);
+ dict_index_copy_types(range_start, index, key_parts);
- range_end = dtuple_create(heap, key_parts);
- dict_index_copy_types(range_end, index, key_parts);
+ range_end = dtuple_create(heap, key_parts);
+ dict_index_copy_types(range_end, index, key_parts);
row_sel_convert_mysql_key_to_innobase(
range_start,
@@ -9921,10 +10597,10 @@ ha_rows
ha_innobase::estimate_rows_upper_bound()
/*====================================*/
{
- dict_index_t* index;
- ulonglong estimate;
- ulonglong local_data_file_length;
- ulint stat_n_leaf_pages;
+ const dict_index_t* index;
+ ulonglong estimate;
+ ulonglong local_data_file_length;
+ ulint stat_n_leaf_pages;
DBUG_ENTER("estimate_rows_upper_bound");
@@ -9934,8 +10610,7 @@ ha_innobase::estimate_rows_upper_bound()
update_thd(ha_thd());
- prebuilt->trx->op_info = (char*)
- "calculating upper bound for table rows";
+ prebuilt->trx->op_info = "calculating upper bound for table rows";
/* In case MySQL calls this in the middle of a SELECT query, release
possible adaptive hash latch to avoid deadlocks of threads */
@@ -9951,16 +10626,15 @@ ha_innobase::estimate_rows_upper_bound()
local_data_file_length =
((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE;
-
/* Calculate a minimum length for a clustered index record and from
that an upper bound for the number of rows. Since we only calculate
new statistics in row0mysql.cc when a table has grown by a threshold
factor, we must add a safety factor 2 in front of the formula below. */
- estimate = 2 * local_data_file_length /
- dict_index_calc_min_rec_len(index);
+ estimate = 2 * local_data_file_length
+ / dict_index_calc_min_rec_len(index);
- prebuilt->trx->op_info = (char*)"";
+ prebuilt->trx->op_info = "";
DBUG_RETURN((ha_rows) estimate);
}
@@ -9980,7 +10654,32 @@ ha_innobase::scan_time()
as a random disk read, that is, we do not divide the following
by 10, which would be physically realistic. */
- return((double) (prebuilt->table->stat_clustered_index_size));
+ /* The locking below is disabled for performance reasons. Without
+ it we could end up returning uninitialized value to the caller,
+ which in the worst case could make some query plan go bogus or
+ issue a Valgrind warning. */
+#if 0
+ /* avoid potential lock order violation with dict_table_stats_lock()
+ below */
+ update_thd(ha_thd());
+ trx_search_latch_release_if_reserved(prebuilt->trx);
+#endif
+
+ ulint stat_clustered_index_size;
+
+#if 0
+ dict_table_stats_lock(prebuilt->table, RW_S_LATCH);
+#endif
+
+ ut_a(prebuilt->table->stat_initialized);
+
+ stat_clustered_index_size = prebuilt->table->stat_clustered_index_size;
+
+#if 0
+ dict_table_stats_unlock(prebuilt->table, RW_S_LATCH);
+#endif
+
+ return((double) stat_clustered_index_size);
}
/******************************************************************//**
@@ -10016,6 +10715,16 @@ ha_innobase::read_time(
return(ranges + (double) rows / (double) total_rows * time_for_scan);
}
+/******************************************************************//**
+Return the size of the InnoDB memory buffer. */
+UNIV_INTERN
+longlong
+ha_innobase::get_memory_buffer_size() const
+/*=======================================*/
+{
+ return(innobase_buffer_pool_size);
+}
+
/*********************************************************************//**
Calculates the key number used inside MySQL for an Innobase index. We will
first check the "index translation table" for a match of the index to get
@@ -10041,9 +10750,6 @@ innobase_get_mysql_key_number_for_index(
unsigned int i;
ut_a(index);
- /*
- ut_ad(strcmp(index->table->name, ib_table->name) == 0);
- */
/* If index does not belong to the table object of share structure
(ib_table comes from the share structure) search the index->table
@@ -10074,12 +10780,9 @@ innobase_get_mysql_key_number_for_index(
}
}
- /* If index_count in translation table is set to 0, it
- is possible we are in the process of rebuilding table,
- do not spit error in this case */
- if (share->idx_trans_tbl.index_count) {
- /* Print an error message if we cannot find the index
- ** in the "index translation table". */
+ /* Print an error message if we cannot find the index
+ in the "index translation table". */
+ if (*index->name != TEMP_INDEX_PREFIX) {
sql_print_error("Cannot find index %s in InnoDB index "
"translation table.", index->name);
}
@@ -10103,10 +10806,16 @@ innobase_get_mysql_key_number_for_index(
ind != NULL;
ind = dict_table_get_next_index(ind)) {
if (index == ind) {
- sql_print_error("Find index %s in InnoDB index list "
+ /* Temp index is internal to InnoDB, that is
+ not present in the MySQL index list, so no
+ need to print such mismatch warning. */
+ if (*(index->name) != TEMP_INDEX_PREFIX) {
+ sql_print_warning(
+ "Find index %s in InnoDB index list "
"but not its MySQL index number "
"It could be an InnoDB internal index.",
index->name);
+ }
return(-1);
}
}
@@ -10130,45 +10839,49 @@ innodb_rec_per_key(
ha_rows records) /*!< in: estimated total records */
{
ha_rows rec_per_key;
+ ib_uint64_t n_diff;
+
+ ut_a(index->table->stat_initialized);
ut_ad(i < dict_index_get_n_unique(index));
- /* Note the stat_n_diff_key_vals[] stores the diff value with
- n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */
- if (index->stat_n_diff_key_vals[i + 1] == 0) {
+ n_diff = index->stat_n_diff_key_vals[i];
+
+ if (n_diff == 0) {
rec_per_key = records;
} else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
- ib_uint64_t num_null;
+ ib_uint64_t n_null;
+ ib_uint64_t n_non_null;
+
+ n_non_null = index->stat_n_non_null_key_vals[i];
/* In theory, index->stat_n_non_null_key_vals[i]
should always be less than the number of records.
Since this is statistics value, the value could
have slight discrepancy. But we will make sure
the number of null values is not a negative number. */
- if (records < index->stat_n_non_null_key_vals[i]) {
- num_null = 0;
+ if (records < n_non_null) {
+ n_null = 0;
} else {
- num_null = records - index->stat_n_non_null_key_vals[i];
+ n_null = records - n_non_null;
}
/* If the number of NULL values is the same as or
large than that of the distinct values, we could
consider that the table consists mostly of NULL value.
Set rec_per_key to 1. */
- if (index->stat_n_diff_key_vals[i + 1] <= num_null) {
+ if (n_diff <= n_null) {
rec_per_key = 1;
} else {
/* Need to exclude rows with NULL values from
rec_per_key calculation */
- rec_per_key = (ha_rows)(
- (records - num_null)
- / (index->stat_n_diff_key_vals[i + 1]
- - num_null));
+ rec_per_key = (ha_rows)
+ ((records - n_null) / (n_diff - n_null));
}
} else {
- rec_per_key = (ha_rows)
- (records / index->stat_n_diff_key_vals[i + 1]);
+ DEBUG_SYNC_C("after_checking_for_0");
+ rec_per_key = (ha_rows) (records / n_diff);
}
return(rec_per_key);
@@ -10182,17 +10895,12 @@ UNIV_INTERN
int
ha_innobase::info_low(
/*==================*/
- uint flag, /*!< in: what information MySQL
- requests */
- dict_stats_upd_option_t stats_upd_option)
- /*!< in: whether to (re) calc
- the stats or to fetch them from
- the persistent storage */
+ uint flag, /*!< in: what information is requested */
+ bool is_analyze)
{
dict_table_t* ib_table;
- dict_index_t* index;
ha_rows rec_per_key;
- ib_int64_t n_rows;
+ ib_uint64_t n_rows;
char path[FN_REFLEN];
os_file_stat_t stat_info;
@@ -10216,37 +10924,52 @@ ha_innobase::info_low(
trx_search_latch_release_if_reserved(prebuilt->trx);
ib_table = prebuilt->table;
+ DBUG_ASSERT(ib_table->n_ref_count > 0);
if (flag & HA_STATUS_TIME) {
- if (stats_upd_option != DICT_STATS_FETCH
- || innobase_stats_on_metadata) {
- /* In sql_show we call with this flag: update
- then statistics so that they are up-to-date */
- enum db_err ret;
+ if (is_analyze || innobase_stats_on_metadata) {
+
+ dict_stats_upd_option_t opt;
+ dberr_t ret;
prebuilt->trx->op_info = "updating table statistics";
+ if (dict_stats_is_persistent_enabled(ib_table)) {
+
+ ut_ad(!srv_read_only_mode);
+
+ if (is_analyze) {
+ opt = DICT_STATS_RECALC_PERSISTENT;
+ } else {
+ /* This is e.g. 'SHOW INDEXES', fetch
+ the persistent stats from disk. */
+ opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
+ }
+ } else {
+ opt = DICT_STATS_RECALC_TRANSIENT;
+ }
+
ut_ad(!mutex_own(&dict_sys->mutex));
- ret = dict_stats_update(ib_table, stats_upd_option,
- FALSE);
+ ret = dict_stats_update(ib_table, opt);
if (ret != DB_SUCCESS) {
prebuilt->trx->op_info = "";
DBUG_RETURN(HA_ERR_GENERIC);
}
- prebuilt->trx->op_info = "returning various info to MySQL";
+ prebuilt->trx->op_info =
+ "returning various info to MySQL";
}
my_snprintf(path, sizeof(path), "%s/%s%s",
- mysql_data_home, ib_table->name, reg_ext);
+ mysql_data_home, ib_table->name, reg_ext);
unpack_filename(path,path);
/* Note that we do not know the access time of the table,
nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
- if (os_file_get_status(path,&stat_info)) {
+ if (os_file_get_status(path, &stat_info, false) == DB_SUCCESS) {
stats.create_time = (ulong) stat_info.ctime;
}
}
@@ -10254,13 +10977,28 @@ ha_innobase::info_low(
if (flag & HA_STATUS_VARIABLE) {
ulint page_size;
+ ulint stat_clustered_index_size;
+ ulint stat_sum_of_other_index_sizes;
+
+ if (!(flag & HA_STATUS_NO_LOCK)) {
+ dict_table_stats_lock(ib_table, RW_S_LATCH);
+ }
+
+ ut_a(ib_table->stat_initialized);
n_rows = ib_table->stat_n_rows;
- /* Because we do not protect stat_n_rows by any mutex in a
- delete, it is theoretically possible that the value can be
- smaller than zero! TODO: fix this race.
+ stat_clustered_index_size
+ = ib_table->stat_clustered_index_size;
+
+ stat_sum_of_other_index_sizes
+ = ib_table->stat_sum_of_other_index_sizes;
+
+ if (!(flag & HA_STATUS_NO_LOCK)) {
+ dict_table_stats_unlock(ib_table, RW_S_LATCH);
+ }
+ /*
The MySQL optimizer seems to assume in a left join that n_rows
is an accurate estimate if it is zero. Of course, it is not,
since we do not have any locks on the rows yet at this phase.
@@ -10270,10 +11008,6 @@ ha_innobase::info_low(
set. That way SHOW TABLE STATUS will show the best estimate,
while the optimizer never sees the table empty. */
- if (n_rows < 0) {
- n_rows = 0;
- }
-
if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
n_rows++;
}
@@ -10303,10 +11037,10 @@ ha_innobase::info_low(
stats.records = (ha_rows) n_rows;
stats.deleted = 0;
stats.data_file_length
- = ((ulonglong) ib_table->stat_clustered_index_size)
+ = ((ulonglong) stat_clustered_index_size)
* page_size;
- stats.index_file_length =
- ((ulonglong) ib_table->stat_sum_of_other_index_sizes)
+ stats.index_file_length
+ = ((ulonglong) stat_sum_of_other_index_sizes)
* page_size;
/* Since fsp_get_available_space_in_free_extents() is
@@ -10346,8 +11080,8 @@ ha_innobase::info_low(
"space for table %s but its "
"tablespace has been discarded or "
"the .ibd file is missing. Setting "
- "the free space to zero. "
- "(Errcode: %M)",
+ "the free space to zero. "
+ "(errno: %M)",
ib_table->name, errno);
stats.delete_length = 0;
@@ -10357,7 +11091,7 @@ ha_innobase::info_low(
}
stats.check_time = 0;
- stats.mrr_length_per_rec = ref_length + sizeof(void*);
+ stats.mrr_length_per_rec= ref_length + 8; // 8 = max(sizeof(void *));
if (stats.records == 0) {
stats.mean_rec_length = 0;
@@ -10373,12 +11107,40 @@ ha_innobase::info_low(
matches up. If prebuilt->clust_index_was_generated
holds, InnoDB defines GEN_CLUST_INDEX internally */
ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
- - prebuilt->clust_index_was_generated;
+ - prebuilt->clust_index_was_generated;
+ if (table->s->keys < num_innodb_index) {
+ /* If there are too many indexes defined
+ inside InnoDB, ignore those that are being
+ created, because MySQL will only consider
+ the fully built indexes here. */
+
+ for (const dict_index_t* index
+ = UT_LIST_GET_FIRST(ib_table->indexes);
+ index != NULL;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ /* First, online index creation is
+ completed inside InnoDB, and then
+ MySQL attempts to upgrade the
+ meta-data lock so that it can rebuild
+ the .frm file. If we get here in that
+ time frame, dict_index_is_online_ddl()
+ would not hold and the index would
+ still not be included in TABLE_SHARE. */
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ num_innodb_index--;
+ }
+ }
+
+ if (table->s->keys < num_innodb_index
+ && innobase_fts_check_doc_id_index(
+ ib_table, NULL, NULL)
+ == FTS_EXIST_DOC_ID_INDEX) {
+ num_innodb_index--;
+ }
+ }
- if (table->s->keys != num_innodb_index
- && (innobase_fts_check_doc_id_index(ib_table, NULL)
- == FTS_EXIST_DOC_ID_INDEX
- && table->s->keys != (num_innodb_index - 1))) {
+ if (table->s->keys != num_innodb_index) {
sql_print_error("InnoDB: Table %s contains %lu "
"indexes inside InnoDB, which "
"is different from the number of "
@@ -10387,6 +11149,12 @@ ha_innobase::info_low(
table->s->keys);
}
+ if (!(flag & HA_STATUS_NO_LOCK)) {
+ dict_table_stats_lock(ib_table, RW_S_LATCH);
+ }
+
+ ut_a(ib_table->stat_initialized);
+
for (i = 0; i < table->s->keys; i++) {
ulong j;
rec_per_key = 1;
@@ -10395,7 +11163,7 @@ ha_innobase::info_low(
The identity of index (match up index name with
that of table->key_info[i]) is already verified in
innobase_get_index(). */
- index = innobase_get_index(i);
+ dict_index_t* index = innobase_get_index(i);
if (index == NULL) {
sql_print_error("Table %s contains fewer "
@@ -10410,7 +11178,7 @@ ha_innobase::info_low(
break;
}
- for (j = 0; j < table->key_info[i].key_parts; j++) {
+ for (j = 0; j < table->key_info[i].ext_key_parts; j++) {
if (table->key_info[i].flags & HA_FULLTEXT) {
/* The whole concept has no validity
@@ -10459,13 +11227,15 @@ ha_innobase::info_low(
key_part_map ext_key_part_map=
key_info->ext_key_part_map;
- if (key_info->key_parts != key_info->ext_key_parts) {
+ if (key_info->user_defined_key_parts !=
+ key_info->ext_key_parts)
+ {
KEY *pk_key_info= key_info+
table->s->primary_key;
- uint k = key_info->key_parts;
+ uint k = key_info->user_defined_key_parts;
ha_rows k_rec_per_key = rec_per_key;
- uint pk_parts = pk_key_info->key_parts;
+ uint pk_parts = pk_key_info->user_defined_key_parts;
index= innobase_get_index(
table->s->primary_key);
@@ -10500,6 +11270,10 @@ ha_innobase::info_low(
}
}
}
+
+ if (!(flag & HA_STATUS_NO_LOCK)) {
+ dict_table_stats_unlock(ib_table, RW_S_LATCH);
+ }
}
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
@@ -10522,7 +11296,7 @@ ha_innobase::info_low(
errkey = (unsigned int) (
(prebuilt->trx->error_key_num
== ULINT_UNDEFINED)
- ? -1
+ ? ~0
: prebuilt->trx->error_key_num);
}
}
@@ -10545,9 +11319,9 @@ UNIV_INTERN
int
ha_innobase::info(
/*==============*/
- uint flag) /*!< in: what information MySQL requests */
+ uint flag) /*!< in: what information is requested */
{
- return(info_low(flag, DICT_STATS_FETCH));
+ return(this->info_low(flag, false /* not ANALYZE */));
}
/**********************************************************************//**
@@ -10561,19 +11335,13 @@ ha_innobase::analyze(
THD* thd, /*!< in: connection thread handle */
HA_CHECK_OPT* check_opt) /*!< in: currently ignored */
{
- dict_stats_upd_option_t upd_option;
- int ret;
+ int ret;
- if (THDVAR(thd, analyze_is_persistent)) {
- upd_option = DICT_STATS_RECALC_PERSISTENT;
- } else {
- upd_option = DICT_STATS_RECALC_TRANSIENT;
- }
-
- /* Simply call ::info_low() with all the flags
+ /* Simply call this->info_low() with all the flags
and request recalculation of the statistics */
- ret = info_low(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
- upd_option);
+ ret = this->info_low(
+ HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
+ true /* this is ANALYZE */);
if (ret != 0) {
return(HA_ADMIN_FAILED);
@@ -10646,19 +11414,23 @@ ha_innobase::check(
build_template(true);
}
- if (prebuilt->table->ibd_file_missing) {
- sql_print_error("InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you"
- " used DISCARD TABLESPACE?\n"
- "InnoDB: Please refer to\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
+ if (dict_table_is_discarded(prebuilt->table)) {
+
+ ib_senderrf(
+ thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_DISCARDED,
+ table->s->table_name.str);
+
+ DBUG_RETURN(HA_ADMIN_CORRUPT);
+
+ } else if (prebuilt->table->ibd_file_missing) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_MISSING,
+ table->s->table_name.str);
+
DBUG_RETURN(HA_ADMIN_CORRUPT);
}
@@ -10684,27 +11456,23 @@ ha_innobase::check(
/* Enlarge the fatal lock wait timeout during CHECK TABLE. */
os_increment_counter_by_amount(
server_mutex,
- srv_fatal_semaphore_wait_threshold, 7200/*2 hours*/);
+ srv_fatal_semaphore_wait_threshold,
+ SRV_SEMAPHORE_WAIT_EXTENSION);
for (index = dict_table_get_first_index(prebuilt->table);
index != NULL;
index = dict_table_get_next_index(index)) {
char index_name[MAX_FULL_NAME_LEN + 1];
-#if 0
- fputs("Validating index ", stderr);
- ut_print_name(stderr, trx, FALSE, index->name);
- putc('\n', stderr);
-#endif
- /* If this is an index being created, break */
+ /* If this is an index being created or dropped, break */
if (*index->name == TEMP_INDEX_PREFIX) {
break;
- } else if (!btr_validate_index(index, prebuilt->trx)) {
+ } else if (!btr_validate_index(index, prebuilt->trx)) {
is_ok = FALSE;
innobase_format_name(
index_name, sizeof index_name,
- prebuilt->index->name, TRUE);
+ index->name, TRUE);
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
ER_NOT_KEYFILE,
@@ -10768,9 +11536,8 @@ ha_innobase::check(
" index %s is corrupted.",
index_name);
is_ok = FALSE;
- row_mysql_lock_data_dictionary(prebuilt->trx);
- dict_set_corrupted(index);
- row_mysql_unlock_data_dictionary(prebuilt->trx);
+ dict_set_corrupted(
+ index, prebuilt->trx, "CHECK TABLE");
}
if (thd_kill_level(user_thd)) {
@@ -10805,9 +11572,8 @@ ha_innobase::check(
index = dict_table_get_first_index(prebuilt->table);
if (!dict_index_is_corrupted(index)) {
- mutex_enter(&dict_sys->mutex);
- dict_set_corrupted(index);
- mutex_exit(&dict_sys->mutex);
+ dict_set_corrupted(
+ index, prebuilt->trx, "CHECK TABLE");
}
prebuilt->table->corrupted = TRUE;
}
@@ -10828,7 +11594,8 @@ ha_innobase::check(
/* Restore the fatal lock wait timeout after CHECK TABLE. */
os_decrement_counter_by_amount(
server_mutex,
- srv_fatal_semaphore_wait_threshold, 7200/*2 hours*/);
+ srv_fatal_semaphore_wait_threshold,
+ SRV_SEMAPHORE_WAIT_EXTENSION);
prebuilt->trx->op_info = "";
if (thd_kill_level(user_thd)) {
@@ -10873,40 +11640,47 @@ ha_innobase::update_table_comment(
/* output the data to a temporary file */
- mutex_enter(&srv_dict_tmpfile_mutex);
- rewind(srv_dict_tmpfile);
+ if (!srv_read_only_mode) {
- fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
- fsp_get_available_space_in_free_extents(
- prebuilt->table->space));
+ mutex_enter(&srv_dict_tmpfile_mutex);
- dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile,
- prebuilt->trx, prebuilt->table);
- flen = ftell(srv_dict_tmpfile);
- if (flen < 0) {
- flen = 0;
- } else if (length + flen + 3 > 64000) {
- flen = 64000 - 3 - length;
- }
+ rewind(srv_dict_tmpfile);
- /* allocate buffer for the full string, and
- read the contents of the temporary file */
+ fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
+ fsp_get_available_space_in_free_extents(
+ prebuilt->table->space));
- str = (char*) my_malloc(length + flen + 3, MYF(0));
+ dict_print_info_on_foreign_keys(
+ FALSE, srv_dict_tmpfile, prebuilt->trx,
+ prebuilt->table);
- if (str) {
- char* pos = str + length;
- if (length) {
- memcpy(str, comment, length);
- *pos++ = ';';
- *pos++ = ' ';
+ flen = ftell(srv_dict_tmpfile);
+
+ if (flen < 0) {
+ flen = 0;
+ } else if (length + flen + 3 > 64000) {
+ flen = 64000 - 3 - length;
}
- rewind(srv_dict_tmpfile);
- flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile);
- pos[flen] = 0;
- }
- mutex_exit(&srv_dict_tmpfile_mutex);
+ /* allocate buffer for the full string, and
+ read the contents of the temporary file */
+
+ str = (char*) my_malloc(length + flen + 3, MYF(0));
+
+ if (str) {
+ char* pos = str + length;
+ if (length) {
+ memcpy(str, comment, length);
+ *pos++ = ';';
+ *pos++ = ' ';
+ }
+ rewind(srv_dict_tmpfile);
+ flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile);
+ pos[flen] = 0;
+ }
+
+ mutex_exit(&srv_dict_tmpfile_mutex);
+ }
prebuilt->trx->op_info = (char*)"";
@@ -10923,8 +11697,8 @@ char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
{
- char* str = 0;
long flen;
+ char* str = 0;
ut_a(prebuilt != NULL);
@@ -10942,31 +11716,36 @@ ha_innobase::get_foreign_key_create_info(void)
trx_search_latch_release_if_reserved(prebuilt->trx);
- mutex_enter(&srv_dict_tmpfile_mutex);
- rewind(srv_dict_tmpfile);
+ if (!srv_read_only_mode) {
+ mutex_enter(&srv_dict_tmpfile_mutex);
+ rewind(srv_dict_tmpfile);
- /* output the data to a temporary file */
- dict_print_info_on_foreign_keys(TRUE, srv_dict_tmpfile,
- prebuilt->trx, prebuilt->table);
- prebuilt->trx->op_info = (char*)"";
+ /* Output the data to a temporary file */
+ dict_print_info_on_foreign_keys(
+ TRUE, srv_dict_tmpfile, prebuilt->trx,
+ prebuilt->table);
- flen = ftell(srv_dict_tmpfile);
- if (flen < 0) {
- flen = 0;
- }
+ prebuilt->trx->op_info = (char*)"";
- /* allocate buffer for the string, and
- read the contents of the temporary file */
+ flen = ftell(srv_dict_tmpfile);
- str = (char*) my_malloc(flen + 1, MYF(0));
+ if (flen < 0) {
+ flen = 0;
+ }
- if (str) {
- rewind(srv_dict_tmpfile);
- flen = (uint) fread(str, 1, flen, srv_dict_tmpfile);
- str[flen] = 0;
- }
+ /* Allocate buffer for the string, and
+ read the contents of the temporary file */
+
+ str = (char*) my_malloc(flen + 1, MYF(0));
+
+ if (str) {
+ rewind(srv_dict_tmpfile);
+ flen = (uint) fread(str, 1, flen, srv_dict_tmpfile);
+ str[flen] = 0;
+ }
- mutex_exit(&srv_dict_tmpfile_mutex);
+ mutex_exit(&srv_dict_tmpfile_mutex);
+ }
return(str);
}
@@ -11180,17 +11959,16 @@ ha_innobase::can_switch_engines(void)
bool can_switch;
DBUG_ENTER("ha_innobase::can_switch_engines");
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+ update_thd();
prebuilt->trx->op_info =
"determining if there are foreign key constraints";
- row_mysql_lock_data_dictionary(prebuilt->trx);
+ row_mysql_freeze_data_dictionary(prebuilt->trx);
can_switch = !UT_LIST_GET_FIRST(prebuilt->table->referenced_list)
&& !UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
- row_mysql_unlock_data_dictionary(prebuilt->trx);
+ row_mysql_unfreeze_data_dictionary(prebuilt->trx);
prebuilt->trx->op_info = "";
DBUG_RETURN(can_switch);
@@ -11239,50 +12017,52 @@ ha_innobase::extra(
enum ha_extra_function operation)
/*!< in: HA_EXTRA_FLUSH or some other flag */
{
+ check_trx_exists(ha_thd());
+
/* Warning: since it is not sure that MySQL calls external_lock
before calling this function, the trx field in prebuilt can be
obsolete! */
switch (operation) {
- case HA_EXTRA_FLUSH:
- if (prebuilt->blob_heap) {
- row_mysql_prebuilt_free_blob_heap(prebuilt);
- }
- break;
- case HA_EXTRA_RESET_STATE:
- reset_template();
- thd_to_trx(ha_thd())->duplicates = 0;
- break;
- case HA_EXTRA_NO_KEYREAD:
- prebuilt->read_just_key = 0;
- break;
- case HA_EXTRA_KEYREAD:
- prebuilt->read_just_key = 1;
- break;
- case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
- prebuilt->keep_other_fields_on_keyread = 1;
- break;
+ case HA_EXTRA_FLUSH:
+ if (prebuilt->blob_heap) {
+ row_mysql_prebuilt_free_blob_heap(prebuilt);
+ }
+ break;
+ case HA_EXTRA_RESET_STATE:
+ reset_template();
+ thd_to_trx(ha_thd())->duplicates = 0;
+ break;
+ case HA_EXTRA_NO_KEYREAD:
+ prebuilt->read_just_key = 0;
+ break;
+ case HA_EXTRA_KEYREAD:
+ prebuilt->read_just_key = 1;
+ break;
+ case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
+ prebuilt->keep_other_fields_on_keyread = 1;
+ break;
- /* IMPORTANT: prebuilt->trx can be obsolete in
- this method, because it is not sure that MySQL
- calls external_lock before this method with the
- parameters below. We must not invoke update_thd()
- either, because the calling threads may change.
- CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
- case HA_EXTRA_INSERT_WITH_UPDATE:
- thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
- break;
- case HA_EXTRA_NO_IGNORE_DUP_KEY:
- thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_IGNORE;
- break;
- case HA_EXTRA_WRITE_CAN_REPLACE:
- thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
- break;
- case HA_EXTRA_WRITE_CANNOT_REPLACE:
- thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
- break;
- default:/* Do nothing */
- ;
+ /* IMPORTANT: prebuilt->trx can be obsolete in
+ this method, because it is not sure that MySQL
+ calls external_lock before this method with the
+ parameters below. We must not invoke update_thd()
+ either, because the calling threads may change.
+ CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
+ case HA_EXTRA_INSERT_WITH_UPDATE:
+ thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
+ break;
+ case HA_EXTRA_NO_IGNORE_DUP_KEY:
+ thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_IGNORE;
+ break;
+ case HA_EXTRA_WRITE_CAN_REPLACE:
+ thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
+ break;
+ case HA_EXTRA_WRITE_CANNOT_REPLACE:
+ thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
+ break;
+ default:/* Do nothing */
+ ;
}
return(0);
@@ -11391,14 +12171,6 @@ ha_innobase::start_stmt(
++trx->will_lock;
}
- if (prebuilt->result) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Warning: FTS result set not NULL\n");
-
- fts_query_free_result(prebuilt->result);
- prebuilt->result = NULL;
- }
-
return(0);
}
@@ -11471,6 +12243,24 @@ ha_innobase::external_lock(
}
}
+ /* Check for UPDATEs in read-only mode. */
+ if (srv_read_only_mode
+ && (thd_sql_command(thd) == SQLCOM_UPDATE
+ || thd_sql_command(thd) == SQLCOM_INSERT
+ || thd_sql_command(thd) == SQLCOM_REPLACE
+ || thd_sql_command(thd) == SQLCOM_DROP_TABLE
+ || thd_sql_command(thd) == SQLCOM_ALTER_TABLE
+ || thd_sql_command(thd) == SQLCOM_OPTIMIZE
+ || thd_sql_command(thd) == SQLCOM_CREATE_TABLE
+ || thd_sql_command(thd) == SQLCOM_CREATE_INDEX
+ || thd_sql_command(thd) == SQLCOM_DROP_INDEX
+ || thd_sql_command(thd) == SQLCOM_DELETE)) {
+
+ ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+
+ DBUG_RETURN(HA_ERR_TABLE_READONLY);
+ }
+
trx = prebuilt->trx;
prebuilt->sql_stat_start = TRUE;
@@ -11478,6 +12268,41 @@ ha_innobase::external_lock(
reset_template();
+ switch (prebuilt->table->quiesce) {
+ case QUIESCE_START:
+ /* Check for FLUSH TABLE t WITH READ LOCK; */
+ if (!srv_read_only_mode
+ && thd_sql_command(thd) == SQLCOM_FLUSH
+ && lock_type == F_RDLCK) {
+
+ row_quiesce_table_start(prebuilt->table, trx);
+
+ /* Use the transaction instance to track UNLOCK
+ TABLES. It can be done via START TRANSACTION; too
+ implicitly. */
+
+ ++trx->flush_tables;
+ }
+ break;
+
+ case QUIESCE_COMPLETE:
+ /* Check for UNLOCK TABLES; implicit or explicit
+ or trx interruption. */
+ if (trx->flush_tables > 0
+ && (lock_type == F_UNLCK || trx_is_interrupted(trx))) {
+
+ row_quiesce_table_complete(prebuilt->table, trx);
+
+ ut_a(trx->flush_tables > 0);
+ --trx->flush_tables;
+ }
+
+ break;
+
+ case QUIESCE_NONE:
+ break;
+ }
+
if (lock_type == F_WRLCK) {
/* If this is a SELECT, then it is in UPDATE TABLE ...
@@ -11528,13 +12353,13 @@ ha_innobase::external_lock(
&& thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
&& thd_in_lock_tables(thd)) {
- ulint error = row_lock_table_for_mysql(
+ dberr_t error = row_lock_table_for_mysql(
prebuilt, NULL, 0);
if (error != DB_SUCCESS) {
- error = convert_error_code_to_mysql(
- (int) error, 0, thd);
- DBUG_RETURN((int) error);
+ DBUG_RETURN(
+ convert_error_code_to_mysql(
+ error, 0, thd));
}
}
@@ -11624,19 +12449,23 @@ ha_innobase::transactional_table_lock(
update_thd(thd);
- if (prebuilt->table->ibd_file_missing && !thd_tablespace_op(thd)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir?"
- "InnoDB: See " REFMAN
- "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
+ if (!thd_tablespace_op(thd)) {
+
+ if (dict_table_is_discarded(prebuilt->table)) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_DISCARDED,
+ table->s->table_name.str);
+
+ } else if (prebuilt->table->ibd_file_missing) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLESPACE_MISSING,
+ table->s->table_name.str);
+ }
+
DBUG_RETURN(HA_ERR_CRASHED);
}
@@ -11654,11 +12483,12 @@ ha_innobase::transactional_table_lock(
prebuilt->select_lock_type = LOCK_S;
prebuilt->stored_select_lock_type = LOCK_S;
} else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB error:\n"
-"MySQL is trying to set transactional table lock with corrupted lock type\n"
-"to table %s, lock type %d does not exist.\n",
- prebuilt->table->name, lock_type);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "MySQL is trying to set transactional table lock "
+ "with corrupted lock type to table %s, lock type "
+ "%d does not exist.",
+ table->s->table_name.str, lock_type);
+
DBUG_RETURN(HA_ERR_CRASHED);
}
@@ -11667,14 +12497,14 @@ ha_innobase::transactional_table_lock(
innobase_register_trx(ht, thd, trx);
if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
- ulint error = DB_SUCCESS;
+ dberr_t error;
error = row_lock_table_for_mysql(prebuilt, NULL, 0);
if (error != DB_SUCCESS) {
- error = convert_error_code_to_mysql(
- (int) error, prebuilt->table->flags, thd);
- DBUG_RETURN((int) error);
+ DBUG_RETURN(
+ convert_error_code_to_mysql(
+ error, prebuilt->table->flags, thd));
}
if (thd_test_options(
@@ -11725,6 +12555,13 @@ innodb_show_status(
DBUG_ENTER("innodb_show_status");
DBUG_ASSERT(hton == innodb_hton_ptr);
+ /* We don't create the temp files or associated
+ mutexes in read-only-mode */
+
+ if (srv_read_only_mode) {
+ DBUG_RETURN(0);
+ }
+
trx = check_trx_exists(thd);
trx_search_latch_release_if_reserved(trx);
@@ -11814,11 +12651,11 @@ innodb_mutex_show_status(
{
char buf1[IO_SIZE];
char buf2[IO_SIZE];
- mutex_t* mutex;
+ ib_mutex_t* mutex;
rw_lock_t* lock;
ulint block_mutex_oswait_count = 0;
ulint block_lock_oswait_count = 0;
- mutex_t* block_mutex = NULL;
+ ib_mutex_t* block_mutex = NULL;
rw_lock_t* block_lock = NULL;
#ifdef UNIV_DEBUG
ulint rw_lock_count= 0;
@@ -11850,41 +12687,7 @@ innodb_mutex_show_status(
block_mutex_oswait_count += mutex->count_os_wait;
continue;
}
-#ifdef UNIV_DEBUG
- if (mutex->mutex_type != 1) {
- if (mutex->count_using > 0) {
- buf1len= my_snprintf(buf1, sizeof(buf1),
- "%s:%s",
- mutex->cmutex_name,
- innobase_basename(mutex->cfile_name));
- buf2len= my_snprintf(buf2, sizeof(buf2),
- "count=%lu, spin_waits=%lu,"
- " spin_rounds=%lu, "
- "os_waits=%lu, os_yields=%lu,"
- " os_wait_times=%lu",
- mutex->count_using,
- mutex->count_spin_loop,
- mutex->count_spin_rounds,
- mutex->count_os_wait,
- mutex->count_os_yield,
- (ulong) (mutex->lspent_time/1000));
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit(&mutex_list_mutex);
- DBUG_RETURN(1);
- }
- }
- } else {
- rw_lock_count += mutex->count_using;
- rw_lock_count_spin_loop += mutex->count_spin_loop;
- rw_lock_count_spin_rounds += mutex->count_spin_rounds;
- rw_lock_count_os_wait += mutex->count_os_wait;
- rw_lock_count_os_yield += mutex->count_os_yield;
- rw_lock_wait_time += mutex->lspent_time;
- }
-#else /* UNIV_DEBUG */
+
buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu",
innobase_basename(mutex->cfile_name),
(ulong) mutex->cline);
@@ -11897,7 +12700,6 @@ innodb_mutex_show_status(
mutex_exit(&mutex_list_mutex);
DBUG_RETURN(1);
}
-#endif /* UNIV_DEBUG */
}
if (block_mutex) {
@@ -12170,12 +12972,52 @@ ha_innobase::store_lock(
const bool in_lock_tables = thd_in_lock_tables(thd);
const uint sql_command = thd_sql_command(thd);
- if (sql_command == SQLCOM_DROP_TABLE) {
+ if (srv_read_only_mode
+ && (sql_command == SQLCOM_UPDATE
+ || sql_command == SQLCOM_INSERT
+ || sql_command == SQLCOM_REPLACE
+ || sql_command == SQLCOM_DROP_TABLE
+ || sql_command == SQLCOM_ALTER_TABLE
+ || sql_command == SQLCOM_OPTIMIZE
+ || sql_command == SQLCOM_CREATE_TABLE
+ || sql_command == SQLCOM_CREATE_INDEX
+ || sql_command == SQLCOM_DROP_INDEX
+ || sql_command == SQLCOM_DELETE)) {
+
+ ib_senderrf(trx->mysql_thd,
+ IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+
+ } else if (sql_command == SQLCOM_FLUSH
+ && lock_type == TL_READ_NO_INSERT) {
+
+ /* Check for FLUSH TABLES ... WITH READ LOCK */
+
+ /* Note: This call can fail, but there is no way to return
+ the error to the caller. We simply ignore it for now here
+ and push the error code to the caller where the error is
+ detected in the function. */
+
+ dberr_t err = row_quiesce_set_state(
+ prebuilt->table, QUIESCE_START, trx);
+
+ ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
+
+ if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
+ prebuilt->select_lock_type = LOCK_S;
+ prebuilt->stored_select_lock_type = LOCK_S;
+ } else {
+ prebuilt->select_lock_type = LOCK_NONE;
+ prebuilt->stored_select_lock_type = LOCK_NONE;
+ }
+
+ /* Check for DROP TABLE */
+ } else if (sql_command == SQLCOM_DROP_TABLE) {
/* MySQL calls this function in DROP TABLE though this table
handle may belong to another thd that is running a query. Let
us in that case skip any changes to the prebuilt struct. */
+ /* Check for LOCK TABLE t1,...,tn WITH SHARED LOCKS */
} else if ((lock_type == TL_READ && in_lock_tables)
|| (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables)
|| lock_type == TL_READ_WITH_SHARED_LOCKS
@@ -12201,18 +13043,18 @@ ha_innobase::store_lock(
unexpected if an obsolete consistent read view would be
used. */
- ulint isolation_level;
-
- isolation_level = trx->isolation_level;
+ /* Use consistent read for checksum table */
- if ((srv_locks_unsafe_for_binlog
- || isolation_level <= TRX_ISO_READ_COMMITTED)
- && isolation_level != TRX_ISO_SERIALIZABLE
- && (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT)
- && (sql_command == SQLCOM_INSERT_SELECT
- || sql_command == SQLCOM_REPLACE_SELECT
- || sql_command == SQLCOM_UPDATE
- || sql_command == SQLCOM_CREATE_TABLE)) {
+ if (sql_command == SQLCOM_CHECKSUM
+ || ((srv_locks_unsafe_for_binlog
+ || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
+ && trx->isolation_level != TRX_ISO_SERIALIZABLE
+ && (lock_type == TL_READ
+ || lock_type == TL_READ_NO_INSERT)
+ && (sql_command == SQLCOM_INSERT_SELECT
+ || sql_command == SQLCOM_REPLACE_SELECT
+ || sql_command == SQLCOM_UPDATE
+ || sql_command == SQLCOM_CREATE_TABLE))) {
/* If we either have innobase_locks_unsafe_for_binlog
option set or this session is using READ COMMITTED
@@ -12226,11 +13068,6 @@ ha_innobase::store_lock(
prebuilt->select_lock_type = LOCK_NONE;
prebuilt->stored_select_lock_type = LOCK_NONE;
- } else if (sql_command == SQLCOM_CHECKSUM) {
- /* Use consistent read for checksum table */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
} else {
prebuilt->select_lock_type = LOCK_S;
prebuilt->stored_select_lock_type = LOCK_S;
@@ -12330,7 +13167,7 @@ the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
on return and all relevant locks acquired.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
ha_innobase::innobase_get_autoinc(
/*==============================*/
ulonglong* value) /*!< out: autoinc value */
@@ -12387,12 +13224,7 @@ ha_innobase::innobase_peek_autoinc(void)
}
/*********************************************************************//**
-This function initializes the auto-inc counter if it has not been
-initialized yet. This function does not change the value of the auto-inc
-counter if it already has been initialized. Returns the value of the
-auto-inc counter in *first_value, and ULONGLONG_MAX in *nb_reserved_values (as
-we have a table-level lock). offset, increment, nb_desired_values are ignored.
-*first_value is set to -1 if error (deadlock or lock wait timeout) */
+Returns the value of the auto-inc counter in *first_value and ~0 on failure. */
UNIV_INTERN
void
ha_innobase::get_auto_increment(
@@ -12407,7 +13239,7 @@ ha_innobase::get_auto_increment(
values */
{
trx_t* trx;
- ulint error;
+ dberr_t error;
ulonglong autoinc = 0;
/* Prepare prebuilt->trx in the table handle */
@@ -12521,18 +13353,15 @@ ha_innobase::reset_auto_increment(
{
DBUG_ENTER("ha_innobase::reset_auto_increment");
- int error;
+ dberr_t error;
update_thd(ha_thd());
error = row_lock_table_autoinc_for_mysql(prebuilt);
if (error != DB_SUCCESS) {
- error = convert_error_code_to_mysql(error,
- prebuilt->table->flags,
- user_thd);
-
- DBUG_RETURN(error);
+ DBUG_RETURN(convert_error_code_to_mysql(
+ error, prebuilt->table->flags, user_thd));
}
/* The next value can never be 0. */
@@ -12601,7 +13430,7 @@ ha_innobase::get_foreign_dup_key(
/* else */
/* copy table name (and convert from filename-safe encoding to
- system_charset_info, e.g. "foo_@0J@00b6" -> "foo_ö") */
+ system_charset_info) */
char* p;
p = strchr(err_index->table->name, '/');
/* strip ".../" prefix if any */
@@ -12654,7 +13483,7 @@ ha_innobase::cmp_ref(
key_part = table->key_info[table->s->primary_key].key_part;
key_part_end = key_part
- + table->key_info[table->s->primary_key].key_parts;
+ + table->key_info[table->s->primary_key].user_defined_key_parts;
for (; key_part != key_part_end; ++key_part) {
field = key_part->field;
@@ -12699,11 +13528,10 @@ my_bool
ha_innobase::register_query_cache_table(
/*====================================*/
THD* thd, /*!< in: user thread handle */
- char* table_key, /*!< in: concatenation of database name,
- the null character NUL,
- and the table name */
- uint key_length, /*!< in: length of the full name, i.e.
- len(dbname) + len(tablename) + 1 */
+ char* table_key, /*!< in: normalized path to the
+ table */
+ uint key_length, /*!< in: length of the normalized
+ path to the table */
qc_engine_callback*
call_back, /*!< out: pointer to function for
checking if query caching
@@ -12825,8 +13653,8 @@ innobase_xa_prepare(
false - the current SQL statement
ended */
{
- int error = 0;
- trx_t* trx = check_trx_exists(thd);
+ int error = 0;
+ trx_t* trx = check_trx_exists(thd);
DBUG_ASSERT(hton == innodb_hton_ptr);
@@ -13019,124 +13847,6 @@ innobase_set_cursor_view(
}
/*******************************************************************//**
-If col_name is not NULL, check whether the named column is being
-renamed in the table. If col_name is not provided, check
-whether any one of columns in the table is being renamed.
-@return true if the column is being renamed */
-static
-bool
-check_column_being_renamed(
-/*=======================*/
- const TABLE* table, /*!< in: MySQL table */
- const char* col_name) /*!< in: name of the column */
-{
- uint k;
- Field* field;
-
- for (k = 0; k < table->s->fields; k++) {
- field = table->field[k];
-
- if (field->flags & FIELD_IS_RENAMED) {
-
- /* If col_name is not provided, return
- if the field is marked as being renamed. */
- if (!col_name) {
- return(true);
- }
-
- /* If col_name is provided, return only
- if names match */
- if (innobase_strcasecmp(field->field_name,
- col_name) == 0) {
- return(true);
- }
- }
- }
-
- return(false);
-}
-
-/*******************************************************************//**
-Check whether any of the given columns is being renamed in the table.
-@return true if any of col_names is being renamed in table */
-static
-bool
-column_is_being_renamed(
-/*====================*/
- TABLE* table, /*!< in: MySQL table */
- uint n_cols, /*!< in: number of columns */
- const char** col_names) /*!< in: names of the columns */
-{
- uint j;
-
- for (j = 0; j < n_cols; j++) {
- if (check_column_being_renamed(table, col_names[j])) {
- return(true);
- }
- }
-
- return(false);
-}
-
-/*******************************************************************//**
-Check whether a column in table "table" is being renamed and if this column
-is part of a foreign key, either part of another table, referencing this
-table or part of this table, referencing another table.
-@return true if a column that participates in a foreign key definition
-is being renamed */
-static
-bool
-foreign_key_column_is_being_renamed(
-/*================================*/
- row_prebuilt_t* prebuilt, /* in: InnoDB prebuilt struct */
- TABLE* table) /* in: MySQL table */
-{
- dict_foreign_t* foreign;
-
- /* check whether there are foreign keys at all */
- if (UT_LIST_GET_LEN(prebuilt->table->foreign_list) == 0
- && UT_LIST_GET_LEN(prebuilt->table->referenced_list) == 0) {
- /* no foreign keys involved with prebuilt->table */
-
- return(false);
- }
-
- row_mysql_lock_data_dictionary(prebuilt->trx);
-
- /* Check whether any column in the foreign key constraints which refer
- to this table is being renamed. */
- for (foreign = UT_LIST_GET_FIRST(prebuilt->table->referenced_list);
- foreign != NULL;
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
-
- if (column_is_being_renamed(table, foreign->n_fields,
- foreign->referenced_col_names)) {
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- return(true);
- }
- }
-
- /* Check whether any column in the foreign key constraints in the
- table is being renamed. */
- for (foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);
- foreign != NULL;
- foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
-
- if (column_is_being_renamed(table, foreign->n_fields,
- foreign->foreign_col_names)) {
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- return(true);
- }
- }
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
-
- return(false);
-}
-
-/*******************************************************************//**
*/
UNIV_INTERN
bool
@@ -13145,6 +13855,8 @@ ha_innobase::check_if_incompatible_data(
HA_CREATE_INFO* info,
uint table_changes)
{
+ innobase_copy_frm_flags_from_create_info(prebuilt->table, info);
+
if (table_changes != IS_EQUAL_YES) {
return(COMPATIBLE_DATA_NO);
@@ -13157,25 +13869,8 @@ ha_innobase::check_if_incompatible_data(
return(COMPATIBLE_DATA_NO);
}
- /* For column rename operation, MySQL does not supply enough
- information (new column name etc.) for InnoDB to make appropriate
- system metadata change. To avoid system metadata inconsistency,
- currently we can just request a table rebuild/copy by returning
- COMPATIBLE_DATA_NO */
- if (check_column_being_renamed(table, NULL)) {
- return(COMPATIBLE_DATA_NO);
- }
-
- /* Check if a column participating in a foreign key is being renamed.
- There is no mechanism for updating InnoDB foreign key definitions. */
- if (foreign_key_column_is_being_renamed(prebuilt, table)) {
-
- return(COMPATIBLE_DATA_NO);
- }
-
/* Check that row format didn't change */
if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT)
- && info->row_type != ROW_TYPE_DEFAULT
&& info->row_type != get_row_type()) {
return(COMPATIBLE_DATA_NO);
@@ -13189,6 +13884,135 @@ ha_innobase::check_if_incompatible_data(
return(COMPATIBLE_DATA_YES);
}
+/****************************************************************//**
+Update the system variable innodb_io_capacity_max using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_io_capacity_max_update(
+/*===========================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ ulong in_val = *static_cast<const ulong*>(save);
+ if (in_val < srv_io_capacity) {
+ in_val = srv_io_capacity;
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "innodb_io_capacity_max cannot be"
+ " set lower than innodb_io_capacity.");
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "Setting innodb_io_capacity_max to %lu",
+ srv_io_capacity);
+ }
+
+ srv_max_io_capacity = in_val;
+}
+
+/****************************************************************//**
+Update the system variable innodb_io_capacity using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_io_capacity_update(
+/*======================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ ulong in_val = *static_cast<const ulong*>(save);
+ if (in_val > srv_max_io_capacity) {
+ in_val = srv_max_io_capacity;
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "innodb_io_capacity cannot be set"
+ " higher than innodb_io_capacity_max.");
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "Setting innodb_io_capacity to %lu",
+ srv_max_io_capacity);
+ }
+
+ srv_io_capacity = in_val;
+}
+
+/****************************************************************//**
+Update the system variable innodb_max_dirty_pages_pct using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_max_dirty_pages_pct_update(
+/*==============================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ ulong in_val = *static_cast<const ulong*>(save);
+ if (in_val < srv_max_dirty_pages_pct_lwm) {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "innodb_max_dirty_pages_pct cannot be"
+ " set lower than"
+ " innodb_max_dirty_pages_pct_lwm.");
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "Lowering"
+ " innodb_max_dirty_page_pct_lwm to %lu",
+ in_val);
+
+ srv_max_dirty_pages_pct_lwm = in_val;
+ }
+
+ srv_max_buf_pool_modified_pct = in_val;
+}
+
+/****************************************************************//**
+Update the system variable innodb_max_dirty_pages_pct_lwm using the
+"saved" value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_max_dirty_pages_pct_lwm_update(
+/*==================================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ ulong in_val = *static_cast<const ulong*>(save);
+ if (in_val > srv_max_buf_pool_modified_pct) {
+ in_val = srv_max_buf_pool_modified_pct;
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "innodb_max_dirty_pages_pct_lwm"
+ " cannot be set higher than"
+ " innodb_max_dirty_pages_pct.");
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "Setting innodb_max_dirty_page_pct_lwm"
+ " to %lu",
+ in_val);
+ }
+
+ srv_max_dirty_pages_pct_lwm = in_val;
+}
+
/************************************************************//**
Validate the file format name and return its corresponding id.
@return valid file format id */
@@ -13554,8 +14378,8 @@ innodb_internal_table_validate(
return(0);
}
- user_table = dict_table_open_on_name_no_stats(
- table_name, FALSE, DICT_ERR_IGNORE_NONE);
+ user_table = dict_table_open_on_name(
+ table_name, FALSE, TRUE, DICT_ERR_IGNORE_NONE);
if (user_table) {
if (dict_table_has_fts_index(user_table)) {
@@ -13563,7 +14387,7 @@ innodb_internal_table_validate(
ret = 0;
}
- dict_table_close(user_table, FALSE);
+ dict_table_close(user_table, FALSE, TRUE);
}
return(ret);
@@ -13608,13 +14432,12 @@ innodb_internal_table_update(
}
/****************************************************************//**
-Update the session variable innodb_session_stopword_table
-with the "saved" stopword table name value. This function
-is registered as a callback with MySQL. */
+Update the system variable innodb_adaptive_hash_index using the "saved"
+value. This function is registered as a callback with MySQL. */
static
void
-innodb_session_stopword_update(
-/*===========================*/
+innodb_adaptive_hash_index_update(
+/*==============================*/
THD* thd, /*!< in: thread handle */
struct st_mysql_sys_var* var, /*!< in: pointer to
system variable */
@@ -13623,32 +14446,20 @@ innodb_session_stopword_update(
const void* save) /*!< in: immediate result
from check function */
{
- const char* stopword_table_name;
- char* old;
-
- ut_a(save != NULL);
- ut_a(var_ptr != NULL);
-
- stopword_table_name = *static_cast<const char*const*>(save);
- old = *(char**) var_ptr;
-
- if (stopword_table_name) {
- *(char**) var_ptr = my_strdup(stopword_table_name, MYF(0));
+ if (*(my_bool*) save) {
+ btr_search_enable();
} else {
- *(char**) var_ptr = NULL;
- }
-
- if (old) {
- my_free(old);
+ btr_search_disable();
}
}
+
/****************************************************************//**
-Update the system variable innodb_adaptive_hash_index using the "saved"
+Update the system variable innodb_cmp_per_index using the "saved"
value. This function is registered as a callback with MySQL. */
static
void
-innodb_adaptive_hash_index_update(
-/*==============================*/
+innodb_cmp_per_index_update(
+/*========================*/
THD* thd, /*!< in: thread handle */
struct st_mysql_sys_var* var, /*!< in: pointer to
system variable */
@@ -13657,11 +14468,13 @@ innodb_adaptive_hash_index_update(
const void* save) /*!< in: immediate result
from check function */
{
- if (*(my_bool*) save) {
- btr_search_enable();
- } else {
- btr_search_disable();
+ /* Reset the stats whenever we enable the table
+ INFORMATION_SCHEMA.innodb_cmp_per_index. */
+ if (!srv_cmp_per_index_enabled && *(my_bool*) save) {
+ page_zip_reset_stat_per_index();
}
+
+ srv_cmp_per_index_enabled = !!(*(my_bool*) save);
}
/****************************************************************//**
@@ -14233,6 +15046,53 @@ exit:
return;
}
+#ifdef __WIN__
+/*************************************************************//**
+Validate if passed-in "value" is a valid value for
+innodb_buffer_pool_filename. On Windows, file names with colon (:)
+are not allowed.
+
+@return 0 for valid name */
+static
+int
+innodb_srv_buf_dump_filename_validate(
+/*==================================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to system
+ variable */
+ void* save, /*!< out: immediate result
+ for update function */
+ struct st_mysql_value* value) /*!< in: incoming string */
+{
+ const char* buf_name;
+ char buff[OS_FILE_MAX_PATH];
+ int len= sizeof(buff);
+
+ ut_a(save != NULL);
+ ut_a(value != NULL);
+
+ buf_name = value->val_str(value, buff, &len);
+
+ if (buf_name) {
+ if (is_filename_allowed(buf_name, len, FALSE)){
+ *static_cast<const char**>(save) = buf_name;
+ return(0);
+ } else {
+ push_warning_printf(thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_WRONG_ARGUMENTS,
+ "InnoDB: innodb_buffer_pool_filename "
+ "cannot have colon (:) in the file name.");
+
+ }
+ }
+
+ return(1);
+}
+#else /* __WIN__ */
+# define innodb_srv_buf_dump_filename_validate NULL
+#endif /* __WIN__ */
+
/****************************************************************//**
Update the system variable innodb_monitor_enable and enable
specified monitor counter.
@@ -14310,6 +15170,29 @@ innodb_reset_all_monitor_update(
}
/****************************************************************//**
+Update the system variable innodb_compression_level using the "saved"
+value. This function is registered as a callback with MySQL. */
+static
+void
+innodb_compression_level_update(
+/*============================*/
+ THD* thd, /*!< in: thread handle */
+ struct st_mysql_sys_var* var, /*!< in: pointer to
+ system variable */
+ void* var_ptr,/*!< out: where the
+ formal string goes */
+ const void* save) /*!< in: immediate result
+ from check function */
+{
+ /* We have this call back just to avoid confusion between
+ ulong and ulint datatypes. */
+ innobase_compression_level =
+ (*static_cast<const ulong*>(save));
+ page_compression_level =
+ (static_cast<const ulint>(innobase_compression_level));
+}
+
+/****************************************************************//**
Parse and enable InnoDB monitor counters during server startup.
User can list the monitor counters/groups to be enable by specifying
"loose-innodb_monitor_enable=monitor_name1;monitor_name2..."
@@ -14427,6 +15310,12 @@ innobase_fts_retrieve_ranking(
ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt;
+ if (ft_prebuilt->read_just_key) {
+ fts_ranking_t* ranking =
+ rbt_value(fts_ranking_t, result->current);
+ return(ranking->rank);
+ }
+
/* Retrieve the ranking value for doc_id with value of
prebuilt->fts_doc_id */
return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
@@ -14441,20 +15330,16 @@ innobase_fts_close_ranking(
FT_INFO * fts_hdl)
{
fts_result_t* result;
- row_prebuilt_t* ft_prebuilt;
- ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt;
+ ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt->in_fts_query = false;
result = ((NEW_FT_INFO*) fts_hdl)->ft_result;
fts_query_free_result(result);
- if (result == ft_prebuilt->result) {
- ft_prebuilt->result = NULL;
- }
-
my_free((uchar*) fts_hdl);
+
return;
}
@@ -14478,7 +15363,120 @@ innobase_fts_find_ranking(
/* Retrieve the ranking value for doc_id with value of
prebuilt->fts_doc_id */
- return fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id);
+ return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
+}
+
+#ifdef UNIV_DEBUG
+static my_bool innodb_purge_run_now = TRUE;
+static my_bool innodb_purge_stop_now = TRUE;
+
+/****************************************************************//**
+Set the purge state to RUN. If purge is disabled then it
+is a no-op. This function is registered as a callback with MySQL. */
+static
+void
+purge_run_now_set(
+/*==============*/
+ THD* thd /*!< in: thread handle */
+ __attribute__((unused)),
+ struct st_mysql_sys_var* var /*!< in: pointer to system
+ variable */
+ __attribute__((unused)),
+ void* var_ptr /*!< out: where the formal
+ string goes */
+ __attribute__((unused)),
+ const void* save) /*!< in: immediate result from
+ check function */
+{
+ if (*(my_bool*) save && trx_purge_state() != PURGE_STATE_DISABLED) {
+ trx_purge_run();
+ }
+}
+
+/****************************************************************//**
+Set the purge state to STOP. If purge is disabled then it
+is a no-op. This function is registered as a callback with MySQL. */
+static
+void
+purge_stop_now_set(
+/*===============*/
+ THD* thd /*!< in: thread handle */
+ __attribute__((unused)),
+ struct st_mysql_sys_var* var /*!< in: pointer to system
+ variable */
+ __attribute__((unused)),
+ void* var_ptr /*!< out: where the formal
+ string goes */
+ __attribute__((unused)),
+ const void* save) /*!< in: immediate result from
+ check function */
+{
+ if (*(my_bool*) save && trx_purge_state() != PURGE_STATE_DISABLED) {
+ trx_purge_stop();
+ }
+}
+#endif /* UNIV_DEBUG */
+
+/***********************************************************************
+@return version of the extended FTS API */
+uint
+innobase_fts_get_version()
+/*======================*/
+{
+ /* Currently this doesn't make much sense as returning
+ HA_CAN_FULLTEXT_EXT automatically mean this version is supported.
+ This supposed to ease future extensions. */
+ return(2);
+}
+
+/***********************************************************************
+@return Which part of the extended FTS API is supported */
+ulonglong
+innobase_fts_flags()
+/*================*/
+{
+ return(FTS_ORDERED_RESULT | FTS_DOCID_IN_RESULT);
+}
+
+
+/***********************************************************************
+Find and Retrieve the FTS doc_id for the current result row
+@return the document ID */
+ulonglong
+innobase_fts_retrieve_docid(
+/*========================*/
+ FT_INFO_EXT * fts_hdl) /*!< in: FTS handler */
+{
+ row_prebuilt_t* ft_prebuilt;
+ fts_result_t* result;
+
+ ft_prebuilt = ((NEW_FT_INFO *)fts_hdl)->ft_prebuilt;
+ result = ((NEW_FT_INFO *)fts_hdl)->ft_result;
+
+ if (ft_prebuilt->read_just_key) {
+ fts_ranking_t* ranking =
+ rbt_value(fts_ranking_t, result->current);
+ return(ranking->doc_id);
+ }
+
+ return(ft_prebuilt->fts_doc_id);
+}
+
+/***********************************************************************
+Find and retrieve the size of the current result
+@return number of matching rows */
+ulonglong
+innobase_fts_count_matches(
+/*=======================*/
+ FT_INFO_EXT* fts_hdl) /*!< in: FTS handler */
+{
+ NEW_FT_INFO* handle = (NEW_FT_INFO *) fts_hdl;
+
+ if (handle->ft_result->rankings_by_id != 0) {
+ return rbt_size(handle->ft_result->rankings_by_id);
+ } else {
+ return(0);
+ }
}
/* These variables are never read by InnoDB or changed. They are a kind of
@@ -14515,7 +15513,7 @@ buffer_pool_dump_now(
const void* save) /*!< in: immediate result from
check function */
{
- if (*(my_bool*) save) {
+ if (*(my_bool*) save && !srv_read_only_mode) {
buf_dump_start();
}
}
@@ -14622,7 +15620,26 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
PLUGIN_VAR_RQCMDARG,
"Number of IOPs the server can do. Tunes the background IO rate",
- NULL, NULL, 200, 100, ~0UL, 0);
+ NULL, innodb_io_capacity_update, 200, 100, ~0UL, 0);
+
+static MYSQL_SYSVAR_ULONG(io_capacity_max, srv_max_io_capacity,
+ PLUGIN_VAR_RQCMDARG,
+ "Limit to which innodb_io_capacity can be inflated.",
+ NULL, innodb_io_capacity_max_update,
+ SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT, 100,
+ SRV_MAX_IO_CAPACITY_LIMIT, 0);
+
+#ifdef UNIV_DEBUG
+static MYSQL_SYSVAR_BOOL(purge_run_now, innodb_purge_run_now,
+ PLUGIN_VAR_OPCMDARG,
+ "Set purge state to RUN",
+ NULL, purge_run_now_set, FALSE);
+
+static MYSQL_SYSVAR_BOOL(purge_stop_now, innodb_purge_stop_now,
+ PLUGIN_VAR_OPCMDARG,
+ "Set purge state to STOP",
+ NULL, purge_stop_now_set, FALSE);
+#endif /* UNIV_DEBUG */
static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
PLUGIN_VAR_OPCMDARG,
@@ -14634,7 +15651,7 @@ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Purge threads can be from 0 to 32. Default is 0.",
+ "Purge threads can be from 1 to 32. Default is 1.",
NULL, NULL,
1, /* Default setting */
1, /* Minimum value */
@@ -14657,7 +15674,7 @@ static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
PLUGIN_VAR_NOCMDARG,
"Stores each InnoDB table to an .ibd file in the database dir.",
- NULL, NULL, FALSE);
+ NULL, NULL, TRUE);
static MYSQL_SYSVAR_STR(file_format, innobase_file_format_name,
PLUGIN_VAR_RQCMDARG,
@@ -14693,6 +15710,11 @@ static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table
innodb_stopword_table_update,
NULL);
+static MYSQL_SYSVAR_UINT(flush_log_at_timeout, srv_flush_log_at_timeout,
+ PLUGIN_VAR_OPCMDARG,
+ "Write and flush logs every (n) second.",
+ NULL, NULL, 1, 0, 2700, 0);
+
static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
PLUGIN_VAR_OPCMDARG,
"Controls the durability/speed trade-off for commits."
@@ -14738,20 +15760,38 @@ static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
"Set to 1 if you want to have logs archived.", NULL, NULL, FALSE);
#endif /* UNIV_LOG_ARCHIVE */
-static MYSQL_SYSVAR_STR(log_group_home_dir, innobase_log_group_home_dir,
+static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Path to InnoDB log files.", NULL, NULL, NULL);
static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
PLUGIN_VAR_RQCMDARG,
"Percentage of dirty pages allowed in bufferpool.",
- NULL, NULL, 75, 0, 99, 0);
+ NULL, innodb_max_dirty_pages_pct_update, 75, 0, 99, 0);
+
+static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct_lwm,
+ srv_max_dirty_pages_pct_lwm,
+ PLUGIN_VAR_RQCMDARG,
+ "Percentage of dirty pages at which flushing kicks in.",
+ NULL, innodb_max_dirty_pages_pct_lwm_update, 0, 0, 99, 0);
+
+static MYSQL_SYSVAR_ULONG(adaptive_flushing_lwm,
+ srv_adaptive_flushing_lwm,
+ PLUGIN_VAR_RQCMDARG,
+ "Percentage of log capacity below which no adaptive flushing happens.",
+ NULL, NULL, 10, 0, 70, 0);
static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
PLUGIN_VAR_NOCMDARG,
"Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
NULL, NULL, TRUE);
+static MYSQL_SYSVAR_ULONG(flushing_avg_loops,
+ srv_flushing_avg_loops,
+ PLUGIN_VAR_RQCMDARG,
+ "Number of iterations over which the background flushing is averaged.",
+ NULL, NULL, 30, 1, 1000, 0);
+
static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
PLUGIN_VAR_RQCMDARG,
"Desired maximum length of the purge queue (0 = no limit)",
@@ -14760,11 +15800,11 @@ static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
static MYSQL_SYSVAR_ULONG(max_purge_lag_delay, srv_max_purge_lag_delay,
PLUGIN_VAR_RQCMDARG,
"Maximum delay of user threads in micro-seconds",
- NULL, NULL,
+ NULL, NULL,
0L, /* Default seting */
0L, /* Minimum value */
10000000UL, 0); /* Maximum value */
-
+
static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
"Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)",
@@ -14777,8 +15817,9 @@ static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
PLUGIN_VAR_OPCMDARG,
- "Enable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)",
- NULL, NULL, TRUE);
+ "Enable statistics gathering for metadata commands such as "
+ "SHOW TABLE STATUS for tables that use transient statistics (off by default)",
+ NULL, NULL, FALSE);
static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pages,
PLUGIN_VAR_RQCMDARG,
@@ -14792,6 +15833,20 @@ static MYSQL_SYSVAR_ULONGLONG(stats_transient_sample_pages,
"statistics (if persistent statistics are not used, default 8)",
NULL, NULL, 8, 1, ~0ULL, 0);
+static MYSQL_SYSVAR_BOOL(stats_persistent, srv_stats_persistent,
+ PLUGIN_VAR_OPCMDARG,
+ "InnoDB persistent statistics enabled for all tables unless overridden "
+ "at table level",
+ NULL, NULL, TRUE);
+
+static MYSQL_SYSVAR_BOOL(stats_auto_recalc, srv_stats_auto_recalc,
+ PLUGIN_VAR_OPCMDARG,
+ "InnoDB automatic recalculation of persistent statistics enabled for all "
+ "tables unless overridden at table level (automatic recalculation is only "
+ "done when InnoDB decides that the table has changed too much and needs a "
+ "new statistics)",
+ NULL, NULL, TRUE);
+
static MYSQL_SYSVAR_ULONGLONG(stats_persistent_sample_pages,
srv_stats_persistent_sample_pages,
PLUGIN_VAR_RQCMDARG,
@@ -14811,6 +15866,13 @@ static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
"innodb_thread_concurrency is reached (0 by default)",
NULL, NULL, 0, 0, ~0UL, 0);
+static MYSQL_SYSVAR_ULONG(compression_level, innobase_compression_level,
+ PLUGIN_VAR_RQCMDARG,
+ "Compression level used for compressed row format. 0 is no compression"
+ ", 1 is fastest, 9 is best compression and default is 6.",
+ NULL, innodb_compression_level_update,
+ DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
+
static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"DEPRECATED. This option may be removed in future releases, "
@@ -14822,7 +15884,7 @@ static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_
static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
PLUGIN_VAR_RQCMDARG,
"Data file autoextend increment in megabytes",
- NULL, NULL, 8L, 1L, 1000L, 0);
+ NULL, NULL, 64L, 1L, 1000L, 0);
static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -14844,12 +15906,12 @@ static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
- NULL, NULL, 1L, 1L, MAX_BUFFER_POOLS, 1L);
+ NULL, NULL, 0L, 0L, MAX_BUFFER_POOLS, 1L);
static MYSQL_SYSVAR_STR(buffer_pool_filename, srv_buf_dump_filename,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
"Filename to/from which to dump/load the InnoDB buffer pool",
- NULL, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT);
+ innodb_srv_buf_dump_filename_validate, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT);
static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now,
PLUGIN_VAR_RQCMDARG,
@@ -14882,10 +15944,13 @@ static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth,
"How deep to scan LRU to keep it clean",
NULL, NULL, 1024, 100, ~0UL, 0);
-static MYSQL_SYSVAR_BOOL(flush_neighbors, srv_flush_neighbors,
- PLUGIN_VAR_NOCMDARG,
- "Flush neighbors from buffer pool when flushing a block.",
- NULL, NULL, TRUE);
+static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors,
+ PLUGIN_VAR_OPCMDARG,
+ "Set to 0 (don't flush neighbors from buffer pool),"
+ " 1 (flush contiguous neighbors from buffer pool)"
+ " or 2 (flush neighbors from buffer pool),"
+ " when flushing a block",
+ NULL, NULL, 1, 0, 2, 0);
static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
PLUGIN_VAR_RQCMDARG,
@@ -14895,7 +15960,7 @@ static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
PLUGIN_VAR_RQCMDARG,
"Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
- NULL, NULL, 500L, 1L, ~0UL, 0);
+ NULL, NULL, 5000L, 1L, ~0UL, 0);
static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
@@ -14905,7 +15970,7 @@ static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
static MYSQL_SYSVAR_BOOL(ft_enable_diag_print, fts_enable_diag_print,
PLUGIN_VAR_OPCMDARG,
"Whether to enable additional FTS diagnostic printout ",
- NULL, NULL, TRUE);
+ NULL, NULL, FALSE);
static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
PLUGIN_VAR_OPCMDARG,
@@ -14921,7 +15986,7 @@ static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name,
static MYSQL_SYSVAR_ULONG(ft_cache_size, fts_max_cache_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"InnoDB Fulltext search cache size in bytes",
- NULL, NULL, 32000000, 1600000, 80000000, 0);
+ NULL, NULL, 8000000, 1600000, 80000000, 0);
static MYSQL_SYSVAR_ULONG(ft_min_token_size, fts_min_token_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -14947,7 +16012,12 @@ static MYSQL_SYSVAR_ULONG(ft_sort_pll_degree, fts_sort_pll_degree,
static MYSQL_SYSVAR_ULONG(sort_buffer_size, srv_sort_buf_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Memory buffer size for index creation",
- NULL, NULL, 1048576, 524288, 64<<20, 0);
+ NULL, NULL, 1048576, 65536, 64<<20, 0);
+
+static MYSQL_SYSVAR_ULONGLONG(online_alter_log_max_size, srv_online_max_size,
+ PLUGIN_VAR_RQCMDARG,
+ "Maximum modification log file size for online index creation",
+ NULL, NULL, 128<<20, 65536, ~0ULL, 0);
static MYSQL_SYSVAR_BOOL(optimize_fulltext_only, innodb_optimize_fulltext_only,
PLUGIN_VAR_NOCMDARG,
@@ -14964,11 +16034,18 @@ static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads,
"Number of background write I/O threads in InnoDB.",
NULL, NULL, 4, 1, 64, 0);
-static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery,
+static MYSQL_SYSVAR_ULONG(force_recovery, srv_force_recovery,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Helps to save your data in case the disk image of the database becomes corrupt.",
NULL, NULL, 0, 0, 6, 0);
+#ifndef DBUG_OFF
+static MYSQL_SYSVAR_ULONG(force_recovery_crash, srv_force_recovery_crash,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Kills the server during crash recovery.",
+ NULL, NULL, 0, 0, 10, 0);
+#endif /* !DBUG_OFF */
+
static MYSQL_SYSVAR_ULONG(page_size, srv_page_size,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
"Page size to use for all InnoDB tablespaces.",
@@ -14983,12 +16060,12 @@ static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Size of each log file in a log group.",
- NULL, NULL, 5*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);
+ NULL, NULL, 48*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);
-static MYSQL_SYSVAR_LONG(log_files_in_group, innobase_log_files_in_group,
+static MYSQL_SYSVAR_ULONG(log_files_in_group, srv_n_log_files,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of log files in the log group. InnoDB writes to the files in a circular fashion. Value 3 is recommended here.",
- NULL, NULL, 2, 2, 100, 0);
+ "Number of log files in the log group. InnoDB writes to the files in a circular fashion.",
+ NULL, NULL, 2, 2, SRV_N_LOG_FILES_MAX, 0);
static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -15004,13 +16081,13 @@ static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
PLUGIN_VAR_RQCMDARG,
"Move blocks to the 'new' end of the buffer pool if the first access"
" was at least this many milliseconds ago."
- " The timeout is disabled if 0 (the default).",
- NULL, NULL, 0, 0, UINT_MAX32, 0);
+ " The timeout is disabled if 0.",
+ NULL, NULL, 1000, 0, UINT_MAX32, 0);
static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"How many files at the maximum InnoDB keeps open at the same time.",
- NULL, NULL, 300L, 10L, LONG_MAX, 0);
+ NULL, NULL, 0L, 0L, LONG_MAX, 0);
static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
PLUGIN_VAR_RQCMDARG,
@@ -15110,6 +16187,37 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
"Use native AIO if supported on this platform.",
NULL, NULL, TRUE);
+static MYSQL_SYSVAR_BOOL(api_enable_binlog, ib_binlog_enabled,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Enable binlog for applications direct access InnoDB through InnoDB APIs",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(api_enable_mdl, ib_mdl_enabled,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Enable MDL for applications direct access InnoDB through InnoDB APIs",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(api_disable_rowlock, ib_disable_row_lock,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Disable row lock when direct access InnoDB through InnoDB APIs",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_ULONG(api_trx_level, ib_trx_level_setting,
+ PLUGIN_VAR_OPCMDARG,
+ "InnoDB API transaction isolation level",
+ NULL, NULL,
+ 0, /* Default setting */
+ 0, /* Minimum value */
+ 3, 0); /* Maximum value */
+
+static MYSQL_SYSVAR_ULONG(api_bk_commit_interval, ib_bk_commit_interval,
+ PLUGIN_VAR_OPCMDARG,
+ "Background commit interval in seconds",
+ NULL, NULL,
+ 5, /* Default setting */
+ 1, /* Minimum value */
+ 1024 * 1024 * 1024, 0); /* Maximum value */
+
static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
PLUGIN_VAR_RQCMDARG,
"Buffer changes to reduce random access: "
@@ -15137,6 +16245,12 @@ static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
PLUGIN_VAR_RQCMDARG,
"Debug flags for InnoDB change buffering (0=none, 2=crash at merge)",
NULL, NULL, 0, 0, 2, 0);
+
+static MYSQL_SYSVAR_BOOL(disable_background_merge,
+ srv_ibuf_disable_background_merge,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_RQCMDARG,
+ "Disable change buffering merges by the master thread",
+ NULL, NULL, FALSE);
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
@@ -15179,15 +16293,53 @@ static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
"Print all deadlocks to MySQL error log (off by default)",
NULL, NULL, FALSE);
+static MYSQL_SYSVAR_ULONG(compression_failure_threshold_pct,
+ zip_failure_threshold_pct, PLUGIN_VAR_OPCMDARG,
+ "If the compression failure rate of a table is greater than this number"
+ " more padding is added to the pages to reduce the failures. A value of"
+ " zero implies no padding",
+ NULL, NULL, 5, 0, 100, 0);
+
+static MYSQL_SYSVAR_ULONG(compression_pad_pct_max,
+ zip_pad_max, PLUGIN_VAR_OPCMDARG,
+ "Percentage of empty space on a data page that can be reserved"
+ " to make the page compressible.",
+ NULL, NULL, 50, 0, 75, 0);
+
+static MYSQL_SYSVAR_BOOL(read_only, srv_read_only_mode,
+ PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
+ "Start InnoDB in read only mode (off by default)",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(cmp_per_index_enabled, srv_cmp_per_index_enabled,
+ PLUGIN_VAR_OPCMDARG,
+ "Enable INFORMATION_SCHEMA.innodb_cmp_per_index, "
+ "may have negative impact on performance (off by default)",
+ NULL, innodb_cmp_per_index_update, FALSE);
+
#ifdef UNIV_DEBUG_never
static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
PLUGIN_VAR_RQCMDARG,
"Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
NULL, NULL, 0, 0, 1024, 0);
+
+static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
+ btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
+ "Artificially limit the number of records per B-tree page (0=unlimited).",
+ NULL, NULL, 0, 0, UINT_MAX32, 0);
+
+static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
+ srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDARG,
+ "Pause actual purging any delete-marked records, but merely update the purge view. "
+ "It is to create artificially the situation the purge view have been updated "
+ "but the each purges were not done yet.",
+ NULL, NULL, FALSE);
#endif /* UNIV_DEBUG */
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(additional_mem_pool_size),
+ MYSQL_SYSVAR(api_trx_level),
+ MYSQL_SYSVAR(api_bk_commit_interval),
MYSQL_SYSVAR(autoextend_increment),
MYSQL_SYSVAR(buffer_pool_size),
MYSQL_SYSVAR(buffer_pool_instances),
@@ -15203,9 +16355,13 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(checksums),
MYSQL_SYSVAR(commit_concurrency),
MYSQL_SYSVAR(concurrency_tickets),
+ MYSQL_SYSVAR(compression_level),
MYSQL_SYSVAR(data_file_path),
MYSQL_SYSVAR(data_home_dir),
MYSQL_SYSVAR(doublewrite),
+ MYSQL_SYSVAR(api_enable_binlog),
+ MYSQL_SYSVAR(api_enable_mdl),
+ MYSQL_SYSVAR(api_disable_rowlock),
MYSQL_SYSVAR(fast_shutdown),
MYSQL_SYSVAR(file_io_threads),
MYSQL_SYSVAR(read_io_threads),
@@ -15214,9 +16370,13 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(file_format),
MYSQL_SYSVAR(file_format_check),
MYSQL_SYSVAR(file_format_max),
+ MYSQL_SYSVAR(flush_log_at_timeout),
MYSQL_SYSVAR(flush_log_at_trx_commit),
MYSQL_SYSVAR(flush_method),
MYSQL_SYSVAR(force_recovery),
+#ifndef DBUG_OFF
+ MYSQL_SYSVAR(force_recovery_crash),
+#endif /* !DBUG_OFF */
MYSQL_SYSVAR(ft_cache_size),
MYSQL_SYSVAR(ft_enable_stopword),
MYSQL_SYSVAR(ft_max_token_size),
@@ -15237,7 +16397,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(log_files_in_group),
MYSQL_SYSVAR(log_group_home_dir),
MYSQL_SYSVAR(max_dirty_pages_pct),
+ MYSQL_SYSVAR(max_dirty_pages_pct_lwm),
+ MYSQL_SYSVAR(adaptive_flushing_lwm),
MYSQL_SYSVAR(adaptive_flushing),
+ MYSQL_SYSVAR(flushing_avg_loops),
MYSQL_SYSVAR(max_purge_lag),
MYSQL_SYSVAR(max_purge_lag_delay),
MYSQL_SYSVAR(mirrored_log_groups),
@@ -15254,7 +16417,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(stats_on_metadata),
MYSQL_SYSVAR(stats_sample_pages),
MYSQL_SYSVAR(stats_transient_sample_pages),
+ MYSQL_SYSVAR(stats_persistent),
MYSQL_SYSVAR(stats_persistent_sample_pages),
+ MYSQL_SYSVAR(stats_auto_recalc),
MYSQL_SYSVAR(adaptive_hash_index),
MYSQL_SYSVAR(stats_method),
MYSQL_SYSVAR(replication_delay),
@@ -15262,7 +16427,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(strict_mode),
MYSQL_SYSVAR(support_xa),
MYSQL_SYSVAR(sort_buffer_size),
- MYSQL_SYSVAR(analyze_is_persistent),
+ MYSQL_SYSVAR(online_alter_log_max_size),
MYSQL_SYSVAR(sync_spin_loops),
MYSQL_SYSVAR(spin_wait_delay),
MYSQL_SYSVAR(table_locks),
@@ -15279,33 +16444,45 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(change_buffer_max_size),
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
MYSQL_SYSVAR(change_buffering_debug),
+ MYSQL_SYSVAR(disable_background_merge),
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
MYSQL_SYSVAR(random_read_ahead),
MYSQL_SYSVAR(read_ahead_threshold),
+ MYSQL_SYSVAR(read_only),
MYSQL_SYSVAR(io_capacity),
+ MYSQL_SYSVAR(io_capacity_max),
MYSQL_SYSVAR(monitor_enable),
MYSQL_SYSVAR(monitor_disable),
MYSQL_SYSVAR(monitor_reset),
MYSQL_SYSVAR(monitor_reset_all),
MYSQL_SYSVAR(purge_threads),
MYSQL_SYSVAR(purge_batch_size),
+#ifdef UNIV_DEBUG
+ MYSQL_SYSVAR(purge_run_now),
+ MYSQL_SYSVAR(purge_stop_now),
+#endif /* UNIV_DEBUG */
#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
MYSQL_SYSVAR(page_hash_locks),
MYSQL_SYSVAR(doublewrite_batch_size),
#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
MYSQL_SYSVAR(print_all_deadlocks),
+ MYSQL_SYSVAR(cmp_per_index_enabled),
MYSQL_SYSVAR(undo_logs),
MYSQL_SYSVAR(rollback_segments),
MYSQL_SYSVAR(undo_directory),
MYSQL_SYSVAR(undo_tablespaces),
MYSQL_SYSVAR(sync_array_size),
+ MYSQL_SYSVAR(compression_failure_threshold_pct),
+ MYSQL_SYSVAR(compression_pad_pct_max),
#ifdef UNIV_DEBUG_never /* disable this flag. --innodb-trx becomes ambiguous */
MYSQL_SYSVAR(trx_rseg_n_slots_debug),
+ MYSQL_SYSVAR(limit_optimistic_insert_debug),
+ MYSQL_SYSVAR(trx_purge_view_update_only_debug),
#endif /* UNIV_DEBUG */
NULL
};
-maria_declare_plugin(innobase)
+mysql_declare_plugin(innobase)
{
MYSQL_STORAGE_ENGINE_PLUGIN,
&innobase_storage_engine,
@@ -15318,8 +16495,8 @@ maria_declare_plugin(innobase)
INNODB_VERSION_SHORT,
innodb_status_variables_export,/* status variables */
innobase_system_variables, /* system variables */
- INNODB_VERSION_STR, /* string version */
- MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
+ NULL, /* reserved */
+ 0, /* flags */
},
i_s_innodb_trx,
i_s_innodb_locks,
@@ -15328,6 +16505,8 @@ i_s_innodb_cmp,
i_s_innodb_cmp_reset,
i_s_innodb_cmpmem,
i_s_innodb_cmpmem_reset,
+i_s_innodb_cmp_per_index,
+i_s_innodb_cmp_per_index_reset,
i_s_innodb_buffer_page,
i_s_innodb_buffer_page_lru,
i_s_innodb_buffer_stats,
@@ -15345,9 +16524,11 @@ i_s_innodb_sys_indexes,
i_s_innodb_sys_columns,
i_s_innodb_sys_fields,
i_s_innodb_sys_foreign,
-i_s_innodb_sys_foreign_cols
+i_s_innodb_sys_foreign_cols,
+i_s_innodb_sys_tablespaces,
+i_s_innodb_sys_datafiles
-maria_declare_plugin_end;
+mysql_declare_plugin_end;
/** @brief Initialize the default value of innodb_commit_concurrency.
@@ -15384,7 +16565,7 @@ innobase_undo_logs_init_default_max()
#ifdef UNIV_COMPILE_TEST_FUNCS
-typedef struct innobase_convert_name_test_struct {
+struct innobase_convert_name_test_t {
char* buf;
ulint buflen;
const char* id;
@@ -15393,7 +16574,7 @@ typedef struct innobase_convert_name_test_struct {
ibool file_id;
const char* expected;
-} innobase_convert_name_test_t;
+};
void
test_innobase_convert_name()
@@ -15512,62 +16693,52 @@ test_innobase_convert_name()
* Multi Range Read interface, DS-MRR calls
*/
-int
-ha_innobase::multi_range_read_init(
- RANGE_SEQ_IF* seq,
- void* seq_init_param,
- uint n_ranges,
- uint mode,
- HANDLER_BUFFER* buf)
+int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
+ uint n_ranges, uint mode,
+ HANDLER_BUFFER *buf)
{
- return(ds_mrr.dsmrr_init(this, seq, seq_init_param,
- n_ranges, mode, buf));
+ return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
}
-int
-ha_innobase::multi_range_read_next(
- range_id_t *range_info)
+int ha_innobase::multi_range_read_next(range_id_t *range_info)
{
- return(ds_mrr.dsmrr_next(range_info));
+ return ds_mrr.dsmrr_next(range_info);
}
-ha_rows
-ha_innobase::multi_range_read_info_const(
- uint keyno,
- RANGE_SEQ_IF* seq,
- void* seq_init_param,
- uint n_ranges,
- uint* bufsz,
- uint* flags,
- Cost_estimate* cost)
+ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
+ void *seq_init_param,
+ uint n_ranges, uint *bufsz,
+ uint *flags,
+ Cost_estimate *cost)
{
- /* See comments in ha_myisam::multi_range_read_info_const */
- ds_mrr.init(this, table);
- return(ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param,
- n_ranges, bufsz, flags, cost));
+ /* See comments in ha_myisam::multi_range_read_info_const */
+ ds_mrr.init(this, table);
+
+ if (prebuilt->select_lock_type != LOCK_NONE)
+ *flags |= HA_MRR_USE_DEFAULT_IMPL;
+
+ ha_rows res= ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
+ bufsz, flags, cost);
+ return res;
}
-ha_rows
-ha_innobase::multi_range_read_info(
- uint keyno,
- uint n_ranges,
- uint keys,
- uint key_parts,
- uint* bufsz,
- uint* flags,
- Cost_estimate* cost)
+ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges,
+ uint keys, uint key_parts,
+ uint *bufsz, uint *flags,
+ Cost_estimate *cost)
{
- ds_mrr.init(this, table);
- return(ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
- flags, cost));
+ ds_mrr.init(this, table);
+ ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
+ flags, cost);
+ return res;
}
-int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t size)
+int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str,
+ size_t size)
{
return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
}
-
/**
* Index Condition Pushdown interface implementation
*/
@@ -15581,7 +16752,7 @@ innobase_index_cond(
/*================*/
void* file) /*!< in/out: pointer to ha_innobase */
{
- return handler_index_cond_check(file);
+ return handler_index_cond_check(file);
}
/** Attempt to push down an index condition.
@@ -15606,3 +16777,181 @@ ha_innobase::idx_cond_push(
DBUG_RETURN(NULL);
}
+/******************************************************************//**
+Use this when the args are passed to the format string from
+errmsg-utf8.txt directly as is.
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+ THD *thd, Sql_condition::enum_warning_level level,
+ uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_senderrf(
+/*========*/
+ THD* thd, /*!< in/out: session */
+ ib_log_level_t level, /*!< in: warning level */
+ ib_uint32_t code, /*!< MySQL error code */
+ ...) /*!< Args */
+{
+ char* str;
+ va_list args;
+ const char* format = innobase_get_err_msg(code);
+
+ /* If the caller wants to push a message to the client then
+ the caller must pass a valid session handle. */
+
+ ut_a(thd != 0);
+
+ /* The error code must exist in the errmsg-utf8.txt file. */
+ ut_a(format != 0);
+
+ va_start(args, code);
+
+#ifdef __WIN__
+ int size = _vscprintf(format, args) + 1;
+ str = static_cast<char*>(malloc(size));
+ str[size - 1] = 0x0;
+ vsnprintf(str, size, format, args);
+#elif HAVE_VASPRINTF
+ (void) vasprintf(&str, format, args);
+#else
+ /* Use a fixed length string. */
+ str = static_cast<char*>(malloc(BUFSIZ));
+ my_vsnprintf(str, BUFSIZ, format, args);
+#endif /* __WIN__ */
+
+ Sql_condition::enum_warning_level l;
+
+ l = Sql_condition::WARN_LEVEL_NOTE;
+
+ switch(level) {
+ case IB_LOG_LEVEL_INFO:
+ break;
+ case IB_LOG_LEVEL_WARN:
+ l = Sql_condition::WARN_LEVEL_WARN;
+ break;
+ case IB_LOG_LEVEL_ERROR:
+ /* We can't use push_warning_printf(), it is a hard error. */
+ my_printf_error(code, "%s", MYF(0), str);
+ break;
+ case IB_LOG_LEVEL_FATAL:
+ l = Sql_condition::WARN_LEVEL_END;
+ break;
+ }
+
+ if (level != IB_LOG_LEVEL_ERROR) {
+ push_warning_printf(thd, l, code, "InnoDB: %s", str);
+ }
+
+ va_end(args);
+ free(str);
+
+ if (level == IB_LOG_LEVEL_FATAL) {
+ ut_error;
+ }
+}
+
+/******************************************************************//**
+Use this when the args are first converted to a formatted string and then
+passed to the format string from errmsg-utf8.txt. The error message format
+must be: "Some string ... %s".
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+ THD *thd, Sql_condition::enum_warning_level level,
+ uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_errf(
+/*====*/
+ THD* thd, /*!< in/out: session */
+ ib_log_level_t level, /*!< in: warning level */
+ ib_uint32_t code, /*!< MySQL error code */
+ const char* format, /*!< printf format */
+ ...) /*!< Args */
+{
+ char* str;
+ va_list args;
+
+ /* If the caller wants to push a message to the client then
+ the caller must pass a valid session handle. */
+
+ ut_a(thd != 0);
+ ut_a(format != 0);
+
+ va_start(args, format);
+
+#ifdef __WIN__
+ int size = _vscprintf(format, args) + 1;
+ str = static_cast<char*>(malloc(size));
+ str[size - 1] = 0x0;
+ vsnprintf(str, size, format, args);
+#elif HAVE_VASPRINTF
+ (void) vasprintf(&str, format, args);
+#else
+ /* Use a fixed length string. */
+ str = static_cast<char*>(malloc(BUFSIZ));
+ my_vsnprintf(str, BUFSIZ, format, args);
+#endif /* __WIN__ */
+
+ ib_senderrf(thd, level, code, str);
+
+ va_end(args);
+ free(str);
+}
+
+/******************************************************************//**
+Write a message to the MySQL log, prefixed with "InnoDB: " */
+UNIV_INTERN
+void
+ib_logf(
+/*====*/
+ ib_log_level_t level, /*!< in: warning level */
+ const char* format, /*!< printf format */
+ ...) /*!< Args */
+{
+ char* str;
+ va_list args;
+
+ va_start(args, format);
+
+#ifdef __WIN__
+ int size = _vscprintf(format, args) + 1;
+ str = static_cast<char*>(malloc(size));
+ str[size - 1] = 0x0;
+ vsnprintf(str, size, format, args);
+#elif HAVE_VASPRINTF
+ (void) vasprintf(&str, format, args);
+#else
+ /* Use a fixed length string. */
+ str = static_cast<char*>(malloc(BUFSIZ));
+ my_vsnprintf(str, BUFSIZ, format, args);
+#endif /* __WIN__ */
+
+ switch(level) {
+ case IB_LOG_LEVEL_INFO:
+ sql_print_information("InnoDB: %s", str);
+ break;
+ case IB_LOG_LEVEL_WARN:
+ sql_print_warning("InnoDB: %s", str);
+ break;
+ case IB_LOG_LEVEL_ERROR:
+ sql_print_error("InnoDB: %s", str);
+ break;
+ case IB_LOG_LEVEL_FATAL:
+ sql_print_error("InnoDB: %s", str);
+ break;
+ }
+
+ va_end(args);
+ free(str);
+
+ if (level == IB_LOG_LEVEL_FATAL) {
+ ut_error;
+ }
+}
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index e56a1ec52e3..ece9f7cf58a 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -27,14 +27,14 @@ this program; if not, write to the Free Software Foundation, Inc.,
/* Structure defines translation table between mysql index and innodb
index structures */
-typedef struct innodb_idx_translate_struct {
+struct innodb_idx_translate_t {
ulint index_count; /*!< number of valid index entries
in the index_mapping array */
ulint array_size; /*!< array size of index_mapping */
dict_index_t** index_mapping; /*!< index pointer array directly
maps to index in Innodb from MySQL
array index */
-} innodb_idx_translate_t;
+};
/** InnoDB table share */
@@ -53,15 +53,8 @@ typedef struct st_innobase_share {
} INNOBASE_SHARE;
-/** InnoDB B-tree index */
-struct dict_index_struct;
-/** Prebuilt structures in an Innobase table handle used within MySQL */
-struct row_prebuilt_struct;
-
-/** InnoDB B-tree index */
-typedef struct dict_index_struct dict_index_t;
-/** Prebuilt structures in an Innobase table handle used within MySQL */
-typedef struct row_prebuilt_struct row_prebuilt_t;
+/** Prebuilt structures in an InnoDB table handle used within MySQL */
+struct row_prebuilt_t;
/** The class defining a handle to an Innodb table */
class ha_innobase: public handler
@@ -101,15 +94,13 @@ class ha_innobase: public handler
void update_thd();
int change_active_index(uint keynr);
int general_fetch(uchar* buf, uint direction, uint match_mode);
- ulint innobase_lock_autoinc();
+ dberr_t innobase_lock_autoinc();
ulonglong innobase_peek_autoinc();
- ulint innobase_set_max_autoinc(ulonglong auto_inc);
- ulint innobase_reset_autoinc(ulonglong auto_inc);
- ulint innobase_get_autoinc(ulonglong* value);
- ulint innobase_update_autoinc(ulonglong auto_inc);
+ dberr_t innobase_set_max_autoinc(ulonglong auto_inc);
+ dberr_t innobase_reset_autoinc(ulonglong auto_inc);
+ dberr_t innobase_get_autoinc(ulonglong* value);
void innobase_initialize_autoinc();
dict_index_t* innobase_get_index(uint keynr);
- int info_low(uint flag, dict_stats_upd_option_t stats_upd_option);
/* Init values for the class: */
public:
@@ -132,9 +123,11 @@ class ha_innobase: public handler
const key_map* keys_to_use_for_scanning();
int open(const char *name, int mode, uint test_if_locked);
+ handler* clone(const char *name, MEM_ROOT *mem_root);
int close(void);
double scan_time();
double read_time(uint index, uint ranges, ha_rows rows);
+ longlong get_memory_buffer_size() const;
int write_row(uchar * buf);
int update_row(const uchar * old_data, uchar * new_data);
@@ -182,6 +175,13 @@ class ha_innobase: public handler
ha_rows estimate_rows_upper_bound();
void update_create_info(HA_CREATE_INFO* create_info);
+ int parse_table_name(const char*name,
+ HA_CREATE_INFO* create_info,
+ ulint flags,
+ ulint flags2,
+ char* norm_name,
+ char* temp_path,
+ char* remote_path);
int create(const char *name, register TABLE *form,
HA_CREATE_INFO *create_info);
int truncate();
@@ -219,13 +219,76 @@ class ha_innobase: public handler
static ulonglong get_mysql_bin_log_pos();
bool primary_key_is_clustered();
int cmp_ref(const uchar *ref1, const uchar *ref2);
- /** Fast index creation (smart ALTER TABLE) @see handler0alter.cc @{ */
- int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys,
- handler_add_index **add);
- int final_add_index(handler_add_index *add, bool commit);
- int prepare_drop_index(TABLE *table_arg, uint *key_num,
- uint num_of_keys);
- int final_drop_index(TABLE *table_arg);
+ /** On-line ALTER TABLE interface @see handler0alter.cc @{ */
+
+ /** Check if InnoDB supports a particular alter table in-place
+ @param altered_table TABLE object for new version of table.
+ @param ha_alter_info Structure describing changes to be done
+ by ALTER TABLE and holding data used during in-place alter.
+
+ @retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
+ @retval HA_ALTER_INPLACE_NO_LOCK Supported
+ @retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE
+ Supported, but requires lock
+ during main phase and exclusive
+ lock during prepare phase.
+ @retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
+ Supported, prepare phase
+ requires exclusive lock.
+ */
+ enum_alter_inplace_result check_if_supported_inplace_alter(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info);
+ /** Allows InnoDB to update internal structures with concurrent
+ writes blocked (provided that check_if_supported_inplace_alter()
+ did not return HA_ALTER_INPLACE_NO_LOCK).
+ This will be invoked before inplace_alter_table().
+
+ @param altered_table TABLE object for new version of table.
+ @param ha_alter_info Structure describing changes to be done
+ by ALTER TABLE and holding data used during in-place alter.
+
+ @retval true Failure
+ @retval false Success
+ */
+ bool prepare_inplace_alter_table(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info);
+
+ /** Alter the table structure in-place with operations
+ specified using HA_ALTER_FLAGS and Alter_inplace_information.
+ The level of concurrency allowed during this operation depends
+ on the return value from check_if_supported_inplace_alter().
+
+ @param altered_table TABLE object for new version of table.
+ @param ha_alter_info Structure describing changes to be done
+ by ALTER TABLE and holding data used during in-place alter.
+
+ @retval true Failure
+ @retval false Success
+ */
+ bool inplace_alter_table(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info);
+
+ /** Commit or rollback the changes made during
+ prepare_inplace_alter_table() and inplace_alter_table() inside
+ the storage engine. Note that the allowed level of concurrency
+ during this operation will be the same as for
+ inplace_alter_table() and thus might be higher than during
+ prepare_inplace_alter_table(). (E.g concurrent writes were
+ blocked during prepare, but might not be during commit).
+ @param altered_table TABLE object for new version of table.
+ @param ha_alter_info Structure describing changes to be done
+ by ALTER TABLE and holding data used during in-place alter.
+ @param commit true => Commit, false => Rollback.
+ @retval true Failure
+ @retval false Success
+ */
+ bool commit_inplace_alter_table(
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info,
+ bool commit);
/** @} */
bool check_if_incompatible_data(HA_CREATE_INFO *info,
uint table_changes);
@@ -241,6 +304,8 @@ private:
@see build_template() */
inline void reset_template();
+ int info_low(uint, bool);
+
public:
/** @name Multi Range Read interface @{ */
/** Initialize multi range read @see DsMrr_impl::dsmrr_init
@@ -283,15 +348,12 @@ public:
* @param flags
* @param cost
*/
- ha_rows multi_range_read_info(uint keyno,
- uint n_ranges, uint keys,
- uint key_parts,
- uint* bufsz, uint* mrr_mode,
+ ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
+ uint key_parts, uint* bufsz, uint* flags,
Cost_estimate* cost);
- int multi_range_read_explain_info(uint mrr_mode,
- char *str, size_t size);
-
+ int multi_range_read_explain_info(uint mrr_mode, char *str,
+ size_t size);
/** Attempt to push down an index condition.
* @param[in] keyno MySQL key number
* @param[in] idx_cond Index condition to be checked
@@ -364,6 +426,27 @@ bool thd_binlog_filter_ok(const MYSQL_THD thd);
*/
bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
+/**
+ Gets information on the durability property requested by
+ a thread.
+ @param thd Thread handle
+ @return a durability property.
+*/
+enum durability_properties thd_get_durability_property(const MYSQL_THD thd);
+
+/** Get the auto_increment_offset auto_increment_increment.
+@param thd Thread object
+@param off auto_increment_offset
+@param inc auto_increment_increment */
+void thd_get_autoinc(const MYSQL_THD thd, ulong* off, ulong* inc)
+__attribute__((nonnull));
+
+/** Is strict sql_mode set.
+@param thd Thread object
+@return True if sql_mode has strict mode (all or trans), false otherwise.
+*/
+bool thd_is_strict_mode(const MYSQL_THD thd)
+__attribute__((nonnull));
} /* extern "C" */
/** Get the file name and position of the MySQL binlog corresponding to the
@@ -371,7 +454,7 @@ bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
*/
extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
-typedef struct trx_struct trx_t;
+struct trx_t;
extern const struct _ft_vft ft_vft_result;
@@ -379,23 +462,11 @@ extern const struct _ft_vft ft_vft_result;
typedef struct new_ft_info
{
struct _ft_vft *please;
+ struct _ft_vft_ext *could_you;
row_prebuilt_t* ft_prebuilt;
fts_result_t* ft_result;
} NEW_FT_INFO;
-/********************************************************************//**
-@file handler/ha_innodb.h
-Converts an InnoDB error code to a MySQL error code and also tells to MySQL
-about a possible transaction rollback inside InnoDB caused by a lock wait
-timeout or a deadlock.
-@return MySQL error code */
-int
-convert_error_code_to_mysql(
-/*========================*/
- int error, /*!< in: InnoDB error code */
- ulint flags, /*!< in: InnoDB table flags, or 0 */
- MYSQL_THD thd); /*!< in: user thread handle or NULL */
-
/*********************************************************************//**
Allocates an InnoDB transaction for a MySQL handler object.
@return InnoDB transaction handle */
@@ -410,13 +481,50 @@ system default primary index name 'GEN_CLUST_INDEX'. If a name
matches, this function pushes an warning message to the client,
and returns true.
@return true if the index name matches the reserved name */
+UNIV_INTERN
bool
innobase_index_name_is_reserved(
/*============================*/
THD* thd, /*!< in/out: MySQL connection */
const KEY* key_info, /*!< in: Indexes to be created */
- ulint num_of_keys); /*!< in: Number of indexes to
+ ulint num_of_keys) /*!< in: Number of indexes to
be created. */
+ __attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Determines InnoDB table flags.
+@retval true if successful, false if error */
+UNIV_INTERN
+bool
+innobase_table_flags(
+/*=================*/
+ const TABLE* form, /*!< in: table */
+ const HA_CREATE_INFO* create_info, /*!< in: information
+ on table columns and indexes */
+ THD* thd, /*!< in: connection */
+ bool use_tablespace, /*!< in: whether to create
+ outside system tablespace */
+ ulint* flags, /*!< out: DICT_TF flags */
+ ulint* flags2) /*!< out: DICT_TF2 flags */
+ __attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Validates the create options. We may build on this function
+in future. For now, it checks two specifiers:
+KEY_BLOCK_SIZE and ROW_FORMAT
+If innodb_strict_mode is not set then this function is a no-op
+@return NULL if valid, string if not. */
+UNIV_INTERN
+const char*
+create_options_are_invalid(
+/*=======================*/
+ THD* thd, /*!< in: connection thread. */
+ TABLE* form, /*!< in: information on table
+ columns and indexes */
+ HA_CREATE_INFO* create_info, /*!< in: create info. */
+ bool use_tablespace) /*!< in: srv_file_per_table */
+ __attribute__((nonnull, warn_unused_result));
+
/*********************************************************************//**
Retrieve the FTS Relevance Ranking result for doc with doc_id
of prebuilt->fts_doc_id
@@ -434,7 +542,7 @@ of prebuilt->fts_doc_id
UNIV_INTERN
float
innobase_fts_find_ranking(
-/*==========================*/
+/*======================*/
FT_INFO* fts_hdl, /*!< in: FTS handler */
uchar* record, /*!< in: Unused */
uint len); /*!< in: Unused */
@@ -443,24 +551,20 @@ Free the memory for the FTS handler */
UNIV_INTERN
void
innobase_fts_close_ranking(
-/*==========================*/
- FT_INFO* fts_hdl); /*!< in: FTS handler */
-/*********************************************************************//**
-Free the memory for the FTS handler */
-void
-innobase_fts_close_ranking(
-/*==========================*/
- FT_INFO* fts_hdl); /*!< in: FTS handler */
+/*=======================*/
+ FT_INFO* fts_hdl) /*!< in: FTS handler */
+ __attribute__((nonnull));
/*****************************************************************//**
Initialize the table FTS stopword list
-@return TRUE is succeed */
+@return TRUE if success */
UNIV_INTERN
ibool
innobase_fts_load_stopword(
/*=======================*/
dict_table_t* table, /*!< in: Table has the FTS */
trx_t* trx, /*!< in: transaction */
- THD* thd); /*!< in: current thread */
+ THD* thd) /*!< in: current thread */
+ __attribute__((nonnull(1,3), warn_unused_result));
/** Some defines for innobase_fts_check_doc_id_index() return value */
enum fts_doc_id_index_enum {
@@ -472,15 +576,17 @@ enum fts_doc_id_index_enum {
/*******************************************************************//**
Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
on the Doc ID column.
-@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
-FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */
+@return the status of the FTS_DOC_ID index */
UNIV_INTERN
enum fts_doc_id_index_enum
innobase_fts_check_doc_id_index(
/*============================*/
- dict_table_t* table, /*!< in: table definition */
- ulint* fts_doc_col_no);/*!< out: The column number for
- Doc ID */
+ const dict_table_t* table, /*!< in: table definition */
+ const TABLE* altered_table, /*!< in: MySQL table
+ that is being altered */
+ ulint* fts_doc_col_no) /*!< out: The column number for
+ Doc ID */
+ __attribute__((warn_unused_result));
/*******************************************************************//**
Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
@@ -492,4 +598,59 @@ enum fts_doc_id_index_enum
innobase_fts_check_doc_id_index_in_def(
/*===================================*/
ulint n_key, /*!< in: Number of keys */
- KEY* key_info); /*!< in: Key definition */
+ const KEY* key_info) /*!< in: Key definitions */
+ __attribute__((nonnull, warn_unused_result));
+
+/***********************************************************************
+@return version of the extended FTS API */
+uint
+innobase_fts_get_version();
+
+/***********************************************************************
+@return Which part of the extended FTS API is supported */
+ulonglong
+innobase_fts_flags();
+
+/***********************************************************************
+Find and Retrieve the FTS doc_id for the current result row
+@return the document ID */
+ulonglong
+innobase_fts_retrieve_docid(
+/*============================*/
+ FT_INFO_EXT* fts_hdl); /*!< in: FTS handler */
+
+/***********************************************************************
+Find and retrieve the size of the current result
+@return number of matching rows */
+ulonglong
+innobase_fts_count_matches(
+/*============================*/
+ FT_INFO_EXT* fts_hdl); /*!< in: FTS handler */
+
+/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
+system clustered index when there is no primary key. */
+extern const char innobase_index_reserve_name[];
+
+/*********************************************************************//**
+Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
+Those flags are stored in .frm file and end up in the MySQL table object,
+but are frequently used inside InnoDB so we keep their copies into the
+InnoDB table object. */
+UNIV_INTERN
+void
+innobase_copy_frm_flags_from_create_info(
+/*=====================================*/
+ dict_table_t* innodb_table, /*!< in/out: InnoDB table */
+ HA_CREATE_INFO* create_info); /*!< in: create info */
+
+/*********************************************************************//**
+Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
+Those flags are stored in .frm file and end up in the MySQL table object,
+but are frequently used inside InnoDB so we keep their copies into the
+InnoDB table object. */
+UNIV_INTERN
+void
+innobase_copy_frm_flags_from_table_share(
+/*=====================================*/
+ dict_table_t* innodb_table, /*!< in/out: InnoDB table */
+ TABLE_SHARE* table_share); /*!< in: table share */
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 1468bc79c04..437443979c0 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -23,11 +23,20 @@ Smart ALTER TABLE
#include <unireg.h>
#include <mysqld_error.h>
-#include <sql_lex.h> // SQLCOM_CREATE_INDEX
+#include <log.h>
+#include <debug_sync.h>
#include <innodb_priv.h>
+#include <sql_alter.h>
+#include <sql_class.h>
+#include "dict0crea.h"
+#include "dict0dict.h"
+#include "dict0priv.h"
#include "dict0stats.h"
+#include "dict0stats_bg.h"
#include "log0log.h"
+#include "rem0types.h"
+#include "row0log.h"
#include "row0merge.h"
#include "srv0srv.h"
#include "trx0trx.h"
@@ -36,9 +45,995 @@ Smart ALTER TABLE
#include "handler0alter.h"
#include "srv0mon.h"
#include "fts0priv.h"
+#include "pars0pars.h"
#include "ha_innodb.h"
+/** Operations for creating an index in place */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_CREATE
+ = Alter_inplace_info::ADD_INDEX
+ | Alter_inplace_info::ADD_UNIQUE_INDEX;
+
+/** Operations for rebuilding a table in place */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_REBUILD
+ = Alter_inplace_info::ADD_PK_INDEX
+ | Alter_inplace_info::DROP_PK_INDEX
+ | Alter_inplace_info::CHANGE_CREATE_OPTION
+ | Alter_inplace_info::ALTER_COLUMN_NULLABLE
+ | Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE
+ | Alter_inplace_info::ALTER_COLUMN_ORDER
+ | Alter_inplace_info::DROP_COLUMN
+ | Alter_inplace_info::ADD_COLUMN
+ /*
+ | Alter_inplace_info::ALTER_COLUMN_TYPE
+ | Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
+ */
+ ;
+
+/** Operations for creating indexes or rebuilding a table */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_CREATE
+ = INNOBASE_ONLINE_CREATE | INNOBASE_INPLACE_REBUILD;
+
+/** Operations for altering a table that InnoDB does not care about */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_IGNORE
+ = Alter_inplace_info::ALTER_COLUMN_DEFAULT
+ | Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT
+ | Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE
+ | Alter_inplace_info::ALTER_RENAME;
+
+/** Operations that InnoDB can perform online */
+static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_OPERATIONS
+ = INNOBASE_INPLACE_IGNORE
+ | INNOBASE_ONLINE_CREATE
+ | Alter_inplace_info::DROP_INDEX
+ | Alter_inplace_info::DROP_UNIQUE_INDEX
+ | Alter_inplace_info::DROP_FOREIGN_KEY
+ | Alter_inplace_info::ALTER_COLUMN_NAME
+ | Alter_inplace_info::ADD_FOREIGN_KEY;
+
+/* Report an InnoDB error to the client by invoking my_error(). */
+static UNIV_COLD __attribute__((nonnull))
+void
+my_error_innodb(
+/*============*/
+ dberr_t error, /*!< in: InnoDB error code */
+ const char* table, /*!< in: table name */
+ ulint flags) /*!< in: table flags */
+{
+ switch (error) {
+ case DB_MISSING_HISTORY:
+ my_error(ER_TABLE_DEF_CHANGED, MYF(0));
+ break;
+ case DB_RECORD_NOT_FOUND:
+ my_error(ER_KEY_NOT_FOUND, MYF(0), table);
+ break;
+ case DB_DEADLOCK:
+ my_error(ER_LOCK_DEADLOCK, MYF(0));
+ break;
+ case DB_LOCK_WAIT_TIMEOUT:
+ my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0));
+ break;
+ case DB_INTERRUPTED:
+ my_error(ER_QUERY_INTERRUPTED, MYF(0));
+ break;
+ case DB_OUT_OF_MEMORY:
+ my_error(ER_OUT_OF_RESOURCES, MYF(0));
+ break;
+ case DB_OUT_OF_FILE_SPACE:
+ my_error(ER_RECORD_FILE_FULL, MYF(0), table);
+ break;
+ case DB_TOO_BIG_INDEX_COL:
+ my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
+ DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
+ break;
+ case DB_TOO_MANY_CONCURRENT_TRXS:
+ my_error(ER_TOO_MANY_CONCURRENT_TRXS, MYF(0));
+ break;
+ case DB_LOCK_TABLE_FULL:
+ my_error(ER_LOCK_TABLE_FULL, MYF(0));
+ break;
+ case DB_UNDO_RECORD_TOO_BIG:
+ my_error(ER_UNDO_RECORD_TOO_BIG, MYF(0));
+ break;
+ case DB_CORRUPTION:
+ my_error(ER_NOT_KEYFILE, MYF(0), table);
+ break;
+ case DB_TOO_BIG_RECORD:
+ my_error(ER_TOO_BIG_ROWSIZE, MYF(0),
+ page_get_free_space_of_empty(
+ flags & DICT_TF_COMPACT) / 2);
+ break;
+ case DB_INVALID_NULL:
+ /* TODO: report the row, as we do for DB_DUPLICATE_KEY */
+ my_error(ER_INVALID_USE_OF_NULL, MYF(0));
+ break;
+#ifdef UNIV_DEBUG
+ case DB_SUCCESS:
+ case DB_DUPLICATE_KEY:
+ case DB_TABLESPACE_EXISTS:
+ case DB_ONLINE_LOG_TOO_BIG:
+ /* These codes should not be passed here. */
+ ut_error;
+#endif /* UNIV_DEBUG */
+ default:
+ my_error(ER_GET_ERRNO, MYF(0), error);
+ break;
+ }
+}
+
+/** Determine if fulltext indexes exist in a given table.
+@param table_share MySQL table
+@return whether fulltext indexes exist on the table */
+static
+bool
+innobase_fulltext_exist(
+/*====================*/
+ const TABLE_SHARE* table_share)
+{
+ for (uint i = 0; i < table_share->keys; i++) {
+ if (table_share->key_info[i].flags & HA_FULLTEXT) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
+
+/*******************************************************************//**
+Determine if ALTER TABLE needs to rebuild the table.
+@param ha_alter_info the DDL operation
+@return whether it is necessary to rebuild the table */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_need_rebuild(
+/*==================*/
+ const Alter_inplace_info* ha_alter_info)
+{
+ if (ha_alter_info->handler_flags
+ == Alter_inplace_info::CHANGE_CREATE_OPTION
+ && !(ha_alter_info->create_info->used_fields
+ & (HA_CREATE_USED_ROW_FORMAT
+ | HA_CREATE_USED_KEY_BLOCK_SIZE))) {
+ /* Any other CHANGE_CREATE_OPTION than changing
+ ROW_FORMAT or KEY_BLOCK_SIZE is ignored. */
+ return(false);
+ }
+
+ return(!!(ha_alter_info->handler_flags & INNOBASE_INPLACE_REBUILD));
+}
+
+/** Check if InnoDB supports a particular alter table in-place
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+
+@retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
+@retval HA_ALTER_INPLACE_NO_LOCK Supported
+@retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE Supported, but requires
+lock during main phase and exclusive lock during prepare phase.
+@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE Supported, prepare phase
+requires exclusive lock (any transactions that have accessed the table
+must commit or roll back first, and no transactions can access the table
+while prepare_inplace_alter_table() is executing)
+*/
+UNIV_INTERN
+enum_alter_inplace_result
+ha_innobase::check_if_supported_inplace_alter(
+/*==========================================*/
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info)
+{
+ DBUG_ENTER("check_if_supported_inplace_alter");
+
+ if (srv_read_only_mode) {
+ ha_alter_info->unsupported_reason =
+ innobase_get_err_msg(ER_READ_ONLY_MODE);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ } else if (srv_created_new_raw || srv_force_recovery) {
+ ha_alter_info->unsupported_reason =
+ innobase_get_err_msg(ER_READ_ONLY_MODE);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ if (altered_table->s->fields > REC_MAX_N_USER_FIELDS) {
+ /* Deny the inplace ALTER TABLE. MySQL will try to
+ re-create the table and ha_innobase::create() will
+ return an error too. This is how we effectively
+ deny adding too many columns to a table. */
+ ha_alter_info->unsupported_reason =
+ innobase_get_err_msg(ER_TOO_MANY_FIELDS);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ update_thd();
+ trx_search_latch_release_if_reserved(prebuilt->trx);
+
+ if (ha_alter_info->handler_flags
+ & ~(INNOBASE_ONLINE_OPERATIONS | INNOBASE_INPLACE_REBUILD)) {
+ if (ha_alter_info->handler_flags
+ & (Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
+ | Alter_inplace_info::ALTER_COLUMN_TYPE))
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COLUMN_TYPE);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ /* Only support online add foreign key constraint when
+ check_foreigns is turned off */
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_FOREIGN_KEY)
+ && prebuilt->trx->check_foreigns) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_CHECK);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
+ DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
+ }
+
+ /* Only support NULL -> NOT NULL change if strict table sql_mode
+ is set. Fall back to COPY for conversion if not strict tables.
+ In-Place will fail with an error when trying to convert
+ NULL to a NOT NULL value. */
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE)
+ && !thd_is_strict_mode(user_thd)) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ /* InnoDB cannot IGNORE when creating unique indexes. IGNORE
+ should silently delete some duplicate rows. Our inplace_alter
+ code will not delete anything from existing indexes. */
+ if (ha_alter_info->ignore
+ && (ha_alter_info->handler_flags
+ & (Alter_inplace_info::ADD_PK_INDEX
+ | Alter_inplace_info::ADD_UNIQUE_INDEX))) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_IGNORE);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ /* DROP PRIMARY KEY is only allowed in combination with ADD
+ PRIMARY KEY. */
+ if ((ha_alter_info->handler_flags
+ & (Alter_inplace_info::ADD_PK_INDEX
+ | Alter_inplace_info::DROP_PK_INDEX))
+ == Alter_inplace_info::DROP_PK_INDEX) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOPK);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ /* ADD FOREIGN KEY does not currently work properly in combination
+ with renaming columns. (Bug#14105491) */
+ if ((ha_alter_info->handler_flags
+ & (Alter_inplace_info::ADD_FOREIGN_KEY
+ | Alter_inplace_info::ALTER_COLUMN_NAME))
+ == (Alter_inplace_info::ADD_FOREIGN_KEY
+ | Alter_inplace_info::ALTER_COLUMN_NAME)) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_RENAME);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ /* DROP FOREIGN KEY may not currently work properly in combination
+ with other operations. (Work-around for 5.6.10 only.) */
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::DROP_FOREIGN_KEY)
+ && (ha_alter_info->handler_flags
+ & (Alter_inplace_info::DROP_FOREIGN_KEY
+ | INNOBASE_INPLACE_REBUILD))
+ != Alter_inplace_info::DROP_FOREIGN_KEY) {
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ /* If a column change from NOT NULL to NULL,
+ and there's a implict pk on this column. the
+ table should be rebuild. The change should
+ only go through the "Copy" method.*/
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NULLABLE)) {
+ uint primary_key = altered_table->s->primary_key;
+
+ /* See if MYSQL table has no pk but we do.*/
+ if (UNIV_UNLIKELY(primary_key >= MAX_KEY)
+ && !row_table_got_default_clust_index(prebuilt->table)) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_PRIMARY_CANT_HAVE_NULL);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+ }
+
+ /* We should be able to do the operation in-place.
+ See if we can do it online (LOCK=NONE). */
+ bool online = true;
+
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
+
+ /* Fix the key parts. */
+ for (KEY* new_key = ha_alter_info->key_info_buffer;
+ new_key < ha_alter_info->key_info_buffer
+ + ha_alter_info->key_count;
+ new_key++) {
+ for (KEY_PART_INFO* key_part = new_key->key_part;
+ key_part < new_key->key_part + new_key->user_defined_key_parts;
+ key_part++) {
+ const Create_field* new_field;
+
+ DBUG_ASSERT(key_part->fieldnr
+ < altered_table->s->fields);
+
+ cf_it.rewind();
+ for (uint fieldnr = 0; (new_field = cf_it++);
+ fieldnr++) {
+ if (fieldnr == key_part->fieldnr) {
+ break;
+ }
+ }
+
+ DBUG_ASSERT(new_field);
+
+ key_part->field = altered_table->field[
+ key_part->fieldnr];
+ /* In some special cases InnoDB emits "false"
+ duplicate key errors with NULL key values. Let
+ us play safe and ensure that we can correctly
+ print key values even in such cases .*/
+ key_part->null_offset = key_part->field->null_offset();
+ key_part->null_bit = key_part->field->null_bit;
+
+ if (new_field->field) {
+ /* This is an existing column. */
+ continue;
+ }
+
+ /* This is an added column. */
+ DBUG_ASSERT(ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_COLUMN);
+
+ /* We cannot replace a hidden FTS_DOC_ID
+ with a user-visible FTS_DOC_ID. */
+ if (prebuilt->table->fts
+ && innobase_fulltext_exist(altered_table->s)
+ && !my_strcasecmp(
+ system_charset_info,
+ key_part->field->field_name,
+ FTS_DOC_ID_COL_NAME)) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_HIDDEN_FTS);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+
+ DBUG_ASSERT((MTYP_TYPENR(key_part->field->unireg_check)
+ == Field::NEXT_NUMBER)
+ == !!(key_part->field->flags
+ & AUTO_INCREMENT_FLAG));
+
+ if (key_part->field->flags & AUTO_INCREMENT_FLAG) {
+ /* We cannot assign an AUTO_INCREMENT
+ column values during online ALTER. */
+ DBUG_ASSERT(key_part->field == altered_table
+ -> found_next_number_field);
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_AUTOINC);
+ online = false;
+ }
+ }
+ }
+
+ DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col
+ <= table->s->fields);
+ DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col
+ < dict_table_get_n_user_cols(prebuilt->table));
+
+ if (prebuilt->table->fts
+ && innobase_fulltext_exist(altered_table->s)) {
+ /* FULLTEXT indexes are supposed to remain. */
+ /* Disallow DROP INDEX FTS_DOC_ID_INDEX */
+
+ for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
+ if (!my_strcasecmp(
+ system_charset_info,
+ ha_alter_info->index_drop_buffer[i]->name,
+ FTS_DOC_ID_INDEX_NAME)) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+ }
+
+ /* InnoDB can have a hidden FTS_DOC_ID_INDEX on a
+ visible FTS_DOC_ID column as well. Prevent dropping or
+ renaming the FTS_DOC_ID. */
+
+ for (Field** fp = table->field; *fp; fp++) {
+ if (!((*fp)->flags
+ & (FIELD_IS_RENAMED | FIELD_IS_DROPPED))) {
+ continue;
+ }
+
+ if (!my_strcasecmp(
+ system_charset_info,
+ (*fp)->field_name,
+ FTS_DOC_ID_COL_NAME)) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+ }
+ }
+
+ prebuilt->trx->will_lock++;
+
+ if (!online) {
+ /* We already determined that only a non-locking
+ operation is possible. */
+ } else if (((ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_PK_INDEX)
+ || innobase_need_rebuild(ha_alter_info))
+ && (innobase_fulltext_exist(altered_table->s)
+ || (prebuilt->table->flags2
+ & DICT_TF2_FTS_HAS_DOC_ID))) {
+ /* Refuse to rebuild the table online, if
+ fulltext indexes are to survive the rebuild,
+ or if the table contains a hidden FTS_DOC_ID column. */
+ online = false;
+ /* If the table already contains fulltext indexes,
+ refuse to rebuild the table natively altogether. */
+ if (prebuilt->table->fts) {
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_INNODB_FT_LIMIT);
+ DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+ }
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
+ } else if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_INDEX)) {
+ /* Building a full-text index requires a lock.
+ We could do without a lock if the table already contains
+ an FTS_DOC_ID column, but in that case we would have
+ to apply the modification log to the full-text indexes. */
+
+ for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
+ const KEY* key =
+ &ha_alter_info->key_info_buffer[
+ ha_alter_info->index_add_buffer[i]];
+ if (key->flags & HA_FULLTEXT) {
+ DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
+ & ~(HA_FULLTEXT
+ | HA_PACK_KEY
+ | HA_GENERATED_KEY
+ | HA_BINARY_PACK_KEY)));
+ ha_alter_info->unsupported_reason = innobase_get_err_msg(
+ ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
+ online = false;
+ break;
+ }
+ }
+ }
+
+ DBUG_RETURN(online
+ ? HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
+ : HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE);
+}
+
+/*************************************************************//**
+Initialize the dict_foreign_t structure with supplied info
+@return true if added, false if duplicate foreign->id */
+static __attribute__((nonnull(1,3,5,7)))
+bool
+innobase_init_foreign(
+/*==================*/
+ dict_foreign_t* foreign, /*!< in/out: structure to
+ initialize */
+ char* constraint_name, /*!< in/out: constraint name if
+ exists */
+ dict_table_t* table, /*!< in: foreign table */
+ dict_index_t* index, /*!< in: foreign key index */
+ const char** column_names, /*!< in: foreign key column
+ names */
+ ulint num_field, /*!< in: number of columns */
+ const char* referenced_table_name, /*!< in: referenced table
+ name */
+ dict_table_t* referenced_table, /*!< in: referenced table */
+ dict_index_t* referenced_index, /*!< in: referenced index */
+ const char** referenced_column_names,/*!< in: referenced column
+ names */
+ ulint referenced_num_field) /*!< in: number of referenced
+ columns */
+{
+ if (constraint_name) {
+ ulint db_len;
+
+ /* Catenate 'databasename/' to the constraint name specified
+ by the user: we conceive the constraint as belonging to the
+ same MySQL 'database' as the table itself. We store the name
+ to foreign->id. */
+
+ db_len = dict_get_db_name_len(table->name);
+
+ foreign->id = static_cast<char*>(mem_heap_alloc(
+ foreign->heap, db_len + strlen(constraint_name) + 2));
+
+ ut_memcpy(foreign->id, table->name, db_len);
+ foreign->id[db_len] = '/';
+ strcpy(foreign->id + db_len + 1, constraint_name);
+ }
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ /* Check if any existing foreign key has the same id */
+
+ for (const dict_foreign_t* existing_foreign
+ = UT_LIST_GET_FIRST(table->foreign_list);
+ existing_foreign != 0;
+ existing_foreign = UT_LIST_GET_NEXT(
+ foreign_list, existing_foreign)) {
+
+ if (ut_strcmp(existing_foreign->id, foreign->id) == 0) {
+ return(false);
+ }
+ }
+
+ foreign->foreign_table = table;
+ foreign->foreign_table_name = mem_heap_strdup(
+ foreign->heap, table->name);
+ dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
+
+ foreign->foreign_index = index;
+ foreign->n_fields = (unsigned int) num_field;
+
+ foreign->foreign_col_names = static_cast<const char**>(
+ mem_heap_alloc(foreign->heap, num_field * sizeof(void*)));
+
+ for (ulint i = 0; i < foreign->n_fields; i++) {
+ foreign->foreign_col_names[i] = mem_heap_strdup(
+ foreign->heap, column_names[i]);
+ }
+
+ foreign->referenced_index = referenced_index;
+ foreign->referenced_table = referenced_table;
+
+ foreign->referenced_table_name = mem_heap_strdup(
+ foreign->heap, referenced_table_name);
+ dict_mem_referenced_table_name_lookup_set(foreign, TRUE);
+
+ foreign->referenced_col_names = static_cast<const char**>(
+ mem_heap_alloc(foreign->heap,
+ referenced_num_field * sizeof(void*)));
+
+ for (ulint i = 0; i < foreign->n_fields; i++) {
+ foreign->referenced_col_names[i]
+ = mem_heap_strdup(foreign->heap,
+ referenced_column_names[i]);
+ }
+
+ return(true);
+}
+
+/*************************************************************//**
+Check whether the foreign key options is legit
+@return true if it is */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_check_fk_option(
+/*=====================*/
+ dict_foreign_t* foreign) /*!< in:InnoDB Foreign key */
+{
+ if (foreign->type & (DICT_FOREIGN_ON_UPDATE_SET_NULL
+ | DICT_FOREIGN_ON_DELETE_SET_NULL)
+ && foreign->foreign_index) {
+
+ for (ulint j = 0; j < foreign->n_fields; j++) {
+ if ((dict_index_get_nth_col(
+ foreign->foreign_index, j)->prtype)
+ & DATA_NOT_NULL) {
+
+ /* It is not sensible to define
+ SET NULL if the column is not
+ allowed to be NULL! */
+ return(false);
+ }
+ }
+ }
+
+ return(true);
+}
+
+/*************************************************************//**
+Set foreign key options
+@return true if successfully set */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_set_foreign_key_option(
+/*============================*/
+ dict_foreign_t* foreign, /*!< in:InnoDB Foreign key */
+ Foreign_key* fk_key) /*!< in: Foreign key info from
+ MySQL */
+{
+ ut_ad(!foreign->type);
+
+ switch (fk_key->delete_opt) {
+ case Foreign_key::FK_OPTION_NO_ACTION:
+ case Foreign_key::FK_OPTION_RESTRICT:
+ case Foreign_key::FK_OPTION_DEFAULT:
+ foreign->type = DICT_FOREIGN_ON_DELETE_NO_ACTION;
+ break;
+ case Foreign_key::FK_OPTION_CASCADE:
+ foreign->type = DICT_FOREIGN_ON_DELETE_CASCADE;
+ break;
+ case Foreign_key::FK_OPTION_SET_NULL:
+ foreign->type = DICT_FOREIGN_ON_DELETE_SET_NULL;
+ break;
+ }
+
+ switch (fk_key->update_opt) {
+ case Foreign_key::FK_OPTION_NO_ACTION:
+ case Foreign_key::FK_OPTION_RESTRICT:
+ case Foreign_key::FK_OPTION_DEFAULT:
+ foreign->type |= DICT_FOREIGN_ON_UPDATE_NO_ACTION;
+ break;
+ case Foreign_key::FK_OPTION_CASCADE:
+ foreign->type |= DICT_FOREIGN_ON_UPDATE_CASCADE;
+ break;
+ case Foreign_key::FK_OPTION_SET_NULL:
+ foreign->type |= DICT_FOREIGN_ON_UPDATE_SET_NULL;
+ break;
+ }
+
+ return(innobase_check_fk_option(foreign));
+}
+
+/*******************************************************************//**
+Check if a foreign key constraint can make use of an index
+that is being created.
+@return useable index, or NULL if none found */
+static __attribute__((nonnull, warn_unused_result))
+const KEY*
+innobase_find_equiv_index(
+/*======================*/
+ const char*const* col_names,
+ /*!< in: column names */
+ uint n_cols, /*!< in: number of columns */
+ const KEY* keys, /*!< in: index information */
+ const uint* add, /*!< in: indexes being created */
+ uint n_add) /*!< in: number of indexes to create */
+{
+ for (uint i = 0; i < n_add; i++) {
+ const KEY* key = &keys[add[i]];
+
+ if (key->user_defined_key_parts < n_cols) {
+no_match:
+ continue;
+ }
+
+ for (uint j = 0; j < n_cols; j++) {
+ const KEY_PART_INFO& key_part = key->key_part[j];
+ uint32 col_len
+ = key_part.field->pack_length();
+
+ /* The MySQL pack length contains 1 or 2 bytes
+ length field for a true VARCHAR. */
+
+ if (key_part.field->type() == MYSQL_TYPE_VARCHAR) {
+ col_len -= static_cast<const Field_varstring*>(
+ key_part.field)->length_bytes;
+ }
+
+ if (key_part.length < col_len) {
+
+ /* Column prefix indexes cannot be
+ used for FOREIGN KEY constraints. */
+ goto no_match;
+ }
+
+ if (innobase_strcasecmp(col_names[j],
+ key_part.field->field_name)) {
+ /* Name mismatch */
+ goto no_match;
+ }
+ }
+
+ return(key);
+ }
+
+ return(NULL);
+}
+
+/*************************************************************//**
+Found an index whose first fields are the columns in the array
+in the same order and is not marked for deletion
+@return matching index, NULL if not found */
+static
+dict_index_t*
+innobase_find_fk_index(
+/*===================*/
+ Alter_inplace_info* ha_alter_info,
+ /*!< in: alter table info */
+ dict_table_t* table, /*!< in: table */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols) /*!< in: number of columns */
+
+{
+ dict_index_t* index;
+ dict_index_t* found_index = NULL;
+
+ index = dict_table_get_first_index(table);
+
+ while (index != NULL) {
+ if (index->type & DICT_FTS) {
+ goto next_rec;
+ } else if (dict_foreign_qualify_index(
+ table, columns, n_cols, index, NULL, TRUE, FALSE)) {
+ /* Check if this index is in the drop list */
+ if (index) {
+ KEY** drop_key;
+
+ drop_key = ha_alter_info->index_drop_buffer;
+
+ for (uint i = 0;
+ i < ha_alter_info->index_drop_count;
+ i++) {
+ if (innobase_strcasecmp(
+ drop_key[i]->name,
+ index->name) == 0) {
+ goto next_rec;
+ }
+ }
+ }
+
+ found_index = index;
+ break;
+ }
+
+next_rec:
+ index = dict_table_get_next_index(index);
+ }
+
+ return(found_index);
+}
+
+/*************************************************************//**
+Create InnoDB foreign key structure from MySQL alter_info
+@retval true if successful
+@retval false on error (will call my_error()) */
+static
+bool
+innobase_get_foreign_key_info(
+/*==========================*/
+ Alter_inplace_info*
+ ha_alter_info, /*!< in: alter table info */
+ const TABLE_SHARE*
+ table_share, /*!< in: the TABLE_SHARE */
+ dict_table_t* table, /*!< in: table */
+ dict_foreign_t**add_fk, /*!< out: foreign constraint added */
+ ulint* n_add_fk, /*!< out: number of foreign
+ constraints added */
+ mem_heap_t* heap, /*!< in: memory heap */
+ const trx_t* trx) /*!< in: user transaction */
+{
+ Key* key;
+ Foreign_key* fk_key;
+ ulint i = 0;
+ dict_table_t* referenced_table = NULL;
+ char* referenced_table_name = NULL;
+ ulint num_fk = 0;
+ Alter_info* alter_info = ha_alter_info->alter_info;
+
+ *n_add_fk = 0;
+
+ List_iterator<Key> key_iterator(alter_info->key_list);
+
+ while ((key=key_iterator++)) {
+ if (key->type == Key::FOREIGN_KEY) {
+ const char* column_names[MAX_NUM_FK_COLUMNS];
+ dict_index_t* index = NULL;
+ const char* referenced_column_names[MAX_NUM_FK_COLUMNS];
+ dict_index_t* referenced_index = NULL;
+ ulint num_col = 0;
+ ulint referenced_num_col = 0;
+ bool correct_option;
+ char* db_namep = NULL;
+ char* tbl_namep = NULL;
+ ulint db_name_len = 0;
+ ulint tbl_name_len = 0;
+#ifdef __WIN__
+ char db_name[MAX_DATABASE_NAME_LEN];
+ char tbl_name[MAX_TABLE_NAME_LEN];
+#endif
+
+ fk_key= static_cast<Foreign_key*>(key);
+
+ if (fk_key->columns.elements > 0) {
+ Key_part_spec* column;
+ List_iterator<Key_part_spec> key_part_iterator(
+ fk_key->columns);
+
+ /* Get all the foreign key column info for the
+ current table */
+ while ((column = key_part_iterator++)) {
+ column_names[i] =
+ column->field_name.str;
+ ut_ad(i < MAX_NUM_FK_COLUMNS);
+ i++;
+ }
+
+ index = innobase_find_fk_index(
+ ha_alter_info, table, column_names, i);
+
+ /* MySQL would add a index in the creation
+ list if no such index for foreign table,
+ so we have to use DBUG_EXECUTE_IF to simulate
+ the scenario */
+ DBUG_EXECUTE_IF("innodb_test_no_foreign_idx",
+ index = NULL;);
+
+ /* Check whether there exist such
+ index in the the index create clause */
+ if (!index && !innobase_find_equiv_index(
+ column_names, i,
+ ha_alter_info->key_info_buffer,
+ ha_alter_info->index_add_buffer,
+ ha_alter_info->index_add_count)) {
+ my_error(
+ ER_FK_NO_INDEX_CHILD,
+ MYF(0),
+ fk_key->name.str,
+ table_share->table_name.str);
+ goto err_exit;
+ }
+
+ num_col = i;
+ }
+
+ add_fk[num_fk] = dict_mem_foreign_create();
+
+#ifndef __WIN__
+ tbl_namep = fk_key->ref_table.str;
+ tbl_name_len = fk_key->ref_table.length;
+ db_namep = fk_key->ref_db.str;
+ db_name_len = fk_key->ref_db.length;
+#else
+ ut_ad(fk_key->ref_table.str);
+
+ memcpy(tbl_name, fk_key->ref_table.str,
+ fk_key->ref_table.length);
+ tbl_name[fk_key->ref_table.length] = 0;
+ innobase_casedn_str(tbl_name);
+ tbl_name_len = strlen(tbl_name);
+ tbl_namep = &tbl_name[0];
+
+ if (fk_key->ref_db.str != NULL) {
+ memcpy(db_name, fk_key->ref_db.str,
+ fk_key->ref_db.length);
+ db_name[fk_key->ref_db.length] = 0;
+ innobase_casedn_str(db_name);
+ db_name_len = strlen(db_name);
+ db_namep = &db_name[0];
+ }
+#endif
+ mutex_enter(&dict_sys->mutex);
+
+ referenced_table_name = dict_get_referenced_table(
+ table->name,
+ db_namep,
+ db_name_len,
+ tbl_namep,
+ tbl_name_len,
+ &referenced_table,
+ add_fk[num_fk]->heap);
+
+ /* Test the case when referenced_table failed to
+ open, if trx->check_foreigns is not set, we should
+ still be able to add the foreign key */
+ DBUG_EXECUTE_IF("innodb_test_open_ref_fail",
+ referenced_table = NULL;);
+
+ if (!referenced_table && trx->check_foreigns) {
+ mutex_exit(&dict_sys->mutex);
+ my_error(ER_FK_CANNOT_OPEN_PARENT,
+ MYF(0), tbl_namep);
+
+ goto err_exit;
+ }
+
+ i = 0;
+
+ if (fk_key->ref_columns.elements > 0) {
+ Key_part_spec* column;
+ List_iterator<Key_part_spec> key_part_iterator(
+ fk_key->ref_columns);
+
+ while ((column = key_part_iterator++)) {
+ referenced_column_names[i] =
+ column->field_name.str;
+ ut_ad(i < MAX_NUM_FK_COLUMNS);
+ i++;
+ }
+
+ if (referenced_table) {
+ referenced_index =
+ dict_foreign_find_index(
+ referenced_table,
+ referenced_column_names,
+ i, NULL,
+ TRUE, FALSE);
+
+ DBUG_EXECUTE_IF(
+ "innodb_test_no_reference_idx",
+ referenced_index = NULL;);
+
+ /* Check whether there exist such
+ index in the the index create clause */
+ if (!referenced_index) {
+ mutex_exit(&dict_sys->mutex);
+ my_error(
+ ER_FK_NO_INDEX_PARENT,
+ MYF(0),
+ fk_key->name.str,
+ tbl_namep);
+ goto err_exit;
+ }
+ } else {
+ ut_a(!trx->check_foreigns);
+ }
+
+ referenced_num_col = i;
+ }
+
+ if (!innobase_init_foreign(
+ add_fk[num_fk], fk_key->name.str,
+ table, index, column_names,
+ num_col, referenced_table_name,
+ referenced_table, referenced_index,
+ referenced_column_names, referenced_num_col)) {
+ mutex_exit(&dict_sys->mutex);
+ my_error(
+ ER_FK_DUP_NAME,
+ MYF(0),
+ add_fk[num_fk]->id);
+ goto err_exit;
+ }
+
+ mutex_exit(&dict_sys->mutex);
+
+ correct_option = innobase_set_foreign_key_option(
+ add_fk[num_fk], fk_key);
+
+ DBUG_EXECUTE_IF("innodb_test_wrong_fk_option",
+ correct_option = false;);
+
+ if (!correct_option) {
+ my_error(ER_FK_INCORRECT_OPTION,
+ MYF(0),
+ table_share->table_name.str,
+ add_fk[num_fk]->id);
+ goto err_exit;
+ }
+
+ num_fk++;
+ i = 0;
+ }
+
+ }
+
+ *n_add_fk = num_fk;
+
+ return(true);
+err_exit:
+ for (i = 0; i <= num_fk; i++) {
+ if (add_fk[i]) {
+ dict_foreign_free(add_fk[i]);
+ }
+ }
+
+ return(false);
+}
+
/*************************************************************//**
Copies an InnoDB column to a MySQL field. This function is
adapted from row_sel_field_store_in_mysql_format(). */
@@ -91,10 +1086,9 @@ innobase_col_to_mysql(
break;
case DATA_BLOB:
- /* Store a pointer to the BLOB buffer to dest: the BLOB was
- already copied to the buffer in row_sel_store_mysql_rec */
-
- row_mysql_store_blob_ref(dest, flen, data, len);
+ /* Skip MySQL BLOBs when reporting an erroneous row
+ during index creation or table rebuild. */
+ field->set_null();
break;
#ifdef UNIV_DEBUG
@@ -102,8 +1096,6 @@ innobase_col_to_mysql(
ut_ad(flen >= len);
ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
>= DATA_MBMINLEN(col->mbminmaxlen));
- ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
- > DATA_MBMINLEN(col->mbminmaxlen) || flen == len);
memcpy(dest, data, len);
break;
@@ -137,20 +1129,19 @@ UNIV_INTERN
void
innobase_rec_to_mysql(
/*==================*/
- TABLE* table, /*!< in/out: MySQL table */
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: index */
- const ulint* offsets) /*!< in: rec_get_offsets(
- rec, index, ...) */
+ struct TABLE* table, /*!< in/out: MySQL table */
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: index */
+ const ulint* offsets)/*!< in: rec_get_offsets(
+ rec, index, ...) */
{
uint n_fields = table->s->fields;
- uint i;
ut_ad(n_fields == dict_table_get_n_user_cols(index->table)
- || (DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_FTS_HAS_DOC_ID)
- && n_fields + 1 == dict_table_get_n_user_cols(index->table)));
+ - !!(DICT_TF2_FLAG_IS_SET(index->table,
+ DICT_TF2_FTS_HAS_DOC_ID)));
- for (i = 0; i < n_fields; i++) {
+ for (uint i = 0; i < n_fields; i++) {
Field* field = table->field[i];
ulint ipos;
ulint ilen;
@@ -160,7 +1151,8 @@ innobase_rec_to_mysql(
ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE);
- if (UNIV_UNLIKELY(ipos == ULINT_UNDEFINED)) {
+ if (ipos == ULINT_UNDEFINED
+ || rec_offs_nth_extern(offsets, ipos)) {
null_field:
field->set_null();
continue;
@@ -184,6 +1176,85 @@ null_field:
}
/*************************************************************//**
+Copies an InnoDB index entry to table->record[0]. */
+UNIV_INTERN
+void
+innobase_fields_to_mysql(
+/*=====================*/
+ struct TABLE* table, /*!< in/out: MySQL table */
+ const dict_index_t* index, /*!< in: InnoDB index */
+ const dfield_t* fields) /*!< in: InnoDB index fields */
+{
+ uint n_fields = table->s->fields;
+
+ ut_ad(n_fields == dict_table_get_n_user_cols(index->table)
+ - !!(DICT_TF2_FLAG_IS_SET(index->table,
+ DICT_TF2_FTS_HAS_DOC_ID)));
+
+ for (uint i = 0; i < n_fields; i++) {
+ Field* field = table->field[i];
+ ulint ipos;
+
+ field->reset();
+
+ ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE);
+
+ if (ipos == ULINT_UNDEFINED
+ || dfield_is_ext(&fields[ipos])
+ || dfield_is_null(&fields[ipos])) {
+
+ field->set_null();
+ } else {
+ field->set_notnull();
+
+ const dfield_t* df = &fields[ipos];
+
+ innobase_col_to_mysql(
+ dict_field_get_col(
+ dict_index_get_nth_field(index, ipos)),
+ static_cast<const uchar*>(dfield_get_data(df)),
+ dfield_get_len(df), field);
+ }
+ }
+}
+
+/*************************************************************//**
+Copies an InnoDB row to table->record[0]. */
+UNIV_INTERN
+void
+innobase_row_to_mysql(
+/*==================*/
+ struct TABLE* table, /*!< in/out: MySQL table */
+ const dict_table_t* itab, /*!< in: InnoDB table */
+ const dtuple_t* row) /*!< in: InnoDB row */
+{
+ uint n_fields = table->s->fields;
+
+ /* The InnoDB row may contain an extra FTS_DOC_ID column at the end. */
+ ut_ad(row->n_fields == dict_table_get_n_cols(itab));
+ ut_ad(n_fields == row->n_fields - DATA_N_SYS_COLS
+ - !!(DICT_TF2_FLAG_IS_SET(itab, DICT_TF2_FTS_HAS_DOC_ID)));
+
+ for (uint i = 0; i < n_fields; i++) {
+ Field* field = table->field[i];
+ const dfield_t* df = dtuple_get_nth_field(row, i);
+
+ field->reset();
+
+ if (dfield_is_ext(df) || dfield_is_null(df)) {
+ field->set_null();
+ } else {
+ field->set_notnull();
+
+ innobase_col_to_mysql(
+ dict_table_get_nth_col(itab, i),
+ static_cast<const uchar*>(dfield_get_data(df)),
+ dfield_get_len(df), field);
+ }
+ }
+}
+
+/*************************************************************//**
Resets table->record[0]. */
UNIV_INTERN
void
@@ -199,66 +1270,29 @@ innobase_rec_reset(
}
}
-/******************************************************************//**
-Removes the filename encoding of a database and table name. */
-static
-void
-innobase_convert_tablename(
-/*=======================*/
- char* s) /*!< in: identifier; out: decoded identifier */
-{
- uint errors;
-
- char* slash = strchr(s, '/');
-
- if (slash) {
- char* t;
- /* Temporarily replace the '/' with NUL. */
- *slash = 0;
- /* Convert the database name. */
- strconvert(&my_charset_filename, s, system_charset_info,
- s, slash - s + 1, &errors);
-
- t = s + strlen(s);
- ut_ad(slash >= t);
- /* Append a '.' after the database name. */
- *t++ = '.';
- slash++;
- /* Convert the table name. */
- strconvert(&my_charset_filename, slash, system_charset_info,
- t, slash - t + strlen(slash), &errors);
- } else {
- strconvert(&my_charset_filename, s,
- system_charset_info, s, strlen(s), &errors);
- }
-}
-
/*******************************************************************//**
This function checks that index keys are sensible.
@return 0 or error number */
-static
+static __attribute__((nonnull, warn_unused_result))
int
innobase_check_index_keys(
/*======================*/
- const KEY* key_info, /*!< in: Indexes to be
- created */
- ulint num_of_keys, /*!< in: Number of
- indexes to be created */
- const dict_table_t* table) /*!< in: Existing indexes */
+ const Alter_inplace_info* info,
+ /*!< in: indexes to be created or dropped */
+ const dict_table_t* innodb_table)
+ /*!< in: Existing indexes */
{
- ulint key_num;
-
- ut_ad(key_info);
- ut_ad(num_of_keys);
-
- for (key_num = 0; key_num < num_of_keys; key_num++) {
- const KEY& key = key_info[key_num];
+ for (uint key_num = 0; key_num < info->index_add_count;
+ key_num++) {
+ const KEY& key = info->key_info_buffer[
+ info->index_add_buffer[key_num]];
/* Check that the same index name does not appear
twice in indexes to be created. */
for (ulint i = 0; i < key_num; i++) {
- const KEY& key2 = key_info[i];
+ const KEY& key2 = info->key_info_buffer[
+ info->index_add_buffer[i]];
if (0 == strcmp(key.name, key2.name)) {
my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
@@ -270,23 +1304,36 @@ innobase_check_index_keys(
/* Check that the same index name does not already exist. */
- for (const dict_index_t* index
- = dict_table_get_first_index(table);
- index; index = dict_table_get_next_index(index)) {
+ const dict_index_t* index;
- if (0 == strcmp(key.name, index->name)) {
- my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
- key.name);
+ for (index = dict_table_get_first_index(innodb_table);
+ index; index = dict_table_get_next_index(index)) {
- return(ER_WRONG_NAME_FOR_INDEX);
+ if (!strcmp(key.name, index->name)) {
+ break;
}
}
- /* Check that MySQL does not try to create a column
- prefix index field on an inappropriate data type and
- that the same column does not appear twice in the index. */
+ if (index) {
+ /* If a key by the same name is being created and
+ dropped, the name clash is OK. */
+ for (uint i = 0; i < info->index_drop_count;
+ i++) {
+ const KEY* drop_key
+ = info->index_drop_buffer[i];
- for (ulint i = 0; i < key.key_parts; i++) {
+ if (0 == strcmp(key.name, drop_key->name)) {
+ goto name_ok;
+ }
+ }
+
+ my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), key.name);
+
+ return(ER_WRONG_NAME_FOR_INDEX);
+ }
+
+name_ok:
+ for (ulint i = 0; i < key.user_defined_key_parts; i++) {
const KEY_PART_INFO& key_part1
= key.key_part[i];
const Field* field
@@ -301,6 +1348,10 @@ innobase_check_index_keys(
case DATA_FLOAT:
case DATA_DOUBLE:
case DATA_DECIMAL:
+ /* Check that MySQL does not try to
+ create a column prefix index field on
+ an inappropriate data type. */
+
if (field->type() == MYSQL_TYPE_VARCHAR) {
if (key_part1.length
>= field->pack_length()
@@ -320,17 +1371,19 @@ innobase_check_index_keys(
return(ER_WRONG_KEY_COLUMN);
}
+ /* Check that the same column does not appear
+ twice in the index. */
+
for (ulint j = 0; j < i; j++) {
const KEY_PART_INFO& key_part2
= key.key_part[j];
- if (strcmp(key_part1.field->field_name,
- key_part2.field->field_name)) {
+ if (key_part1.fieldnr != key_part2.fieldnr) {
continue;
}
my_error(ER_WRONG_KEY_COLUMN, MYF(0),
- key_part1.field->field_name);
+ field->field_name);
return(ER_WRONG_KEY_COLUMN);
}
}
@@ -341,16 +1394,19 @@ innobase_check_index_keys(
/*******************************************************************//**
Create index field definition for key part */
-static
+static __attribute__((nonnull(2,3)))
void
innobase_create_index_field_def(
/*============================*/
- KEY_PART_INFO* key_part, /*!< in: MySQL key definition */
- mem_heap_t* heap, /*!< in: memory heap */
- merge_index_field_t* index_field) /*!< out: index field
+ const TABLE* altered_table, /*!< in: MySQL table that is
+ being altered, or NULL
+ if a new clustered index is
+ not being created */
+ const KEY_PART_INFO* key_part, /*!< in: MySQL key definition */
+ index_field_t* index_field) /*!< out: index field
definition for key_part */
{
- Field* field;
+ const Field* field;
ibool is_unsigned;
ulint col_type;
@@ -359,9 +1415,13 @@ innobase_create_index_field_def(
ut_ad(key_part);
ut_ad(index_field);
- field = key_part->field;
+ field = altered_table
+ ? altered_table->field[key_part->fieldnr]
+ : key_part->field;
ut_a(field);
+ index_field->col_no = key_part->fieldnr;
+
col_type = get_innobase_type_from_mysql_type(&is_unsigned, field);
if (DATA_BLOB == col_type
@@ -376,44 +1436,48 @@ innobase_create_index_field_def(
index_field->prefix_len = 0;
}
- index_field->field_name = mem_heap_strdup(heap, field->field_name);
-
DBUG_VOID_RETURN;
}
/*******************************************************************//**
Create index definition for key */
-static
+static __attribute__((nonnull))
void
innobase_create_index_def(
/*======================*/
- KEY* key, /*!< in: key definition */
- bool new_primary, /*!< in: TRUE=generating
- a new primary key
+ const TABLE* altered_table, /*!< in: MySQL table that is
+ being altered */
+ const KEY* keys, /*!< in: key definitions */
+ ulint key_number, /*!< in: MySQL key number */
+ bool new_clustered, /*!< in: true if generating
+ a new clustered index
on the table */
- bool key_primary, /*!< in: TRUE if this key
- is a primary key */
- merge_index_def_t* index, /*!< out: index definition */
+ bool key_clustered, /*!< in: true if this is
+ the new clustered index */
+ index_def_t* index, /*!< out: index definition */
mem_heap_t* heap) /*!< in: heap where memory
is allocated */
{
- ulint i;
- ulint len;
- ulint n_fields = key->key_parts;
- char* index_name;
+ const KEY* key = &keys[key_number];
+ ulint i;
+ ulint len;
+ ulint n_fields = key->user_defined_key_parts;
+ char* index_name;
DBUG_ENTER("innobase_create_index_def");
+ DBUG_ASSERT(!key_clustered || new_clustered);
- index->fields = (merge_index_field_t*) mem_heap_alloc(
- heap, n_fields * sizeof *index->fields);
+ index->fields = static_cast<index_field_t*>(
+ mem_heap_alloc(heap, n_fields * sizeof *index->fields));
index->ind_type = 0;
+ index->key_number = key_number;
index->n_fields = n_fields;
len = strlen(key->name) + 1;
- index->name = index_name = (char*) mem_heap_alloc(heap,
- len + !new_primary);
+ index->name = index_name = static_cast<char*>(
+ mem_heap_alloc(heap, len + !new_clustered));
- if (UNIV_LIKELY(!new_primary)) {
+ if (!new_clustered) {
*index_name++ = TEMP_INDEX_PREFIX;
}
@@ -423,144 +1487,155 @@ innobase_create_index_def(
index->ind_type |= DICT_UNIQUE;
}
- if (key->flags & HA_FULLTEXT) {
+ if (key_clustered) {
+ DBUG_ASSERT(!(key->flags & HA_FULLTEXT));
+ index->ind_type |= DICT_CLUSTERED;
+ } else if (key->flags & HA_FULLTEXT) {
+ DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
+ & ~(HA_FULLTEXT
+ | HA_PACK_KEY
+ | HA_BINARY_PACK_KEY)));
+ DBUG_ASSERT(!(key->flags & HA_NOSAME));
+ DBUG_ASSERT(!index->ind_type);
index->ind_type |= DICT_FTS;
}
- if (key_primary) {
- index->ind_type |= DICT_CLUSTERED;
+ if (!new_clustered) {
+ altered_table = NULL;
}
for (i = 0; i < n_fields; i++) {
- innobase_create_index_field_def(&key->key_part[i], heap,
- &index->fields[i]);
+ innobase_create_index_field_def(
+ altered_table, &key->key_part[i], &index->fields[i]);
}
DBUG_VOID_RETURN;
}
/*******************************************************************//**
-Copy index field definition */
+Check whether the table has the FTS_DOC_ID column
+@return whether there exists an FTS_DOC_ID column */
static
-void
-innobase_copy_index_field_def(
+bool
+innobase_fts_check_doc_id_col(
/*==========================*/
- const dict_field_t* field, /*!< in: definition to copy */
- merge_index_field_t* index_field) /*!< out: copied definition */
+ const dict_table_t* table, /*!< in: InnoDB table with
+ fulltext index */
+ const TABLE* altered_table,
+ /*!< in: MySQL table with
+ fulltext index */
+ ulint* fts_doc_col_no)
+ /*!< out: The column number for
+ Doc ID, or ULINT_UNDEFINED
+ if it is of wrong type */
{
- DBUG_ENTER("innobase_copy_index_field_def");
- DBUG_ASSERT(field != NULL);
- DBUG_ASSERT(index_field != NULL);
-
- index_field->field_name = field->name;
- index_field->prefix_len = field->prefix_len;
-
- DBUG_VOID_RETURN;
-}
-
-/*******************************************************************//**
-Copy index definition for the index */
-static
-void
-innobase_copy_index_def(
-/*====================*/
- const dict_index_t* index, /*!< in: index definition to copy */
- merge_index_def_t* new_index,/*!< out: Index definition */
- mem_heap_t* heap) /*!< in: heap where allocated */
-{
- ulint n_fields;
- ulint i;
-
- DBUG_ENTER("innobase_copy_index_def");
+ *fts_doc_col_no = ULINT_UNDEFINED;
- /* Note that we take only those fields that user defined to be
- in the index. In the internal representation more colums were
- added and those colums are not copied .*/
+ const uint n_cols = altered_table->s->fields;
+ uint i;
- n_fields = index->n_user_defined_cols;
+ for (i = 0; i < n_cols; i++) {
+ const Field* field = altered_table->s->field[i];
- new_index->fields = (merge_index_field_t*) mem_heap_alloc(
- heap, n_fields * sizeof *new_index->fields);
+ if (my_strcasecmp(system_charset_info,
+ field->field_name, FTS_DOC_ID_COL_NAME)) {
+ continue;
+ }
- /* When adding a PRIMARY KEY, we may convert a previous
- clustered index to a secondary index (UNIQUE NOT NULL). */
- new_index->ind_type = index->type & ~DICT_CLUSTERED;
- new_index->n_fields = n_fields;
- new_index->name = index->name;
+ if (strcmp(field->field_name, FTS_DOC_ID_COL_NAME)) {
+ my_error(ER_WRONG_COLUMN_NAME, MYF(0),
+ field->field_name);
+ } else if (field->type() != MYSQL_TYPE_LONGLONG
+ || field->pack_length() != 8
+ || field->real_maybe_null()
+ || !(field->flags & UNSIGNED_FLAG)) {
+ my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN, MYF(0),
+ field->field_name);
+ } else {
+ *fts_doc_col_no = i;
+ }
- for (i = 0; i < n_fields; i++) {
- innobase_copy_index_field_def(&index->fields[i],
- &new_index->fields[i]);
+ return(true);
}
- DBUG_VOID_RETURN;
-}
-
-/*******************************************************************//**
-Check whether the table has the FTS_DOC_ID column
-@return TRUE if there exists the FTS_DOC_ID column, if TRUE but fts_doc_col_no
- equal to ULINT_UNDEFINED then that means the column exists but is not
- of the right type. */
-static
-ibool
-innobase_fts_check_doc_id_col(
-/*==========================*/
- dict_table_t* table, /*!< in: table with FTS index */
- ulint* fts_doc_col_no) /*!< out: The column number for
- Doc ID */
-{
- *fts_doc_col_no = ULINT_UNDEFINED;
+ if (!table) {
+ return(false);
+ }
- for (ulint i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
+ for (; i + DATA_N_SYS_COLS < (uint) table->n_cols; i++) {
const char* name = dict_table_get_col_name(table, i);
if (strcmp(name, FTS_DOC_ID_COL_NAME) == 0) {
+#ifdef UNIV_DEBUG
const dict_col_t* col;
col = dict_table_get_nth_col(table, i);
- if (col->mtype != DATA_INT || col->len != 8) {
- fprintf(stderr,
- " InnoDB: %s column in table %s"
- " must be of the BIGINT datatype\n",
- FTS_DOC_ID_COL_NAME, table->name);
- } else if (!(col->prtype & DATA_NOT_NULL)) {
- fprintf(stderr,
- " InnoDB: %s column in table %s"
- " must be NOT NULL\n",
- FTS_DOC_ID_COL_NAME, table->name);
-
- } else if (!(col->prtype & DATA_UNSIGNED)) {
- fprintf(stderr,
- " InnoDB: %s column in table %s"
- " must be UNSIGNED\n",
- FTS_DOC_ID_COL_NAME, table->name);
- } else {
- *fts_doc_col_no = i;
- }
-
- return(TRUE);
+ /* Because the FTS_DOC_ID does not exist in
+ the MySQL data dictionary, this must be the
+ internally created FTS_DOC_ID column. */
+ ut_ad(col->mtype == DATA_INT);
+ ut_ad(col->len == 8);
+ ut_ad(col->prtype & DATA_NOT_NULL);
+ ut_ad(col->prtype & DATA_UNSIGNED);
+#endif /* UNIV_DEBUG */
+ *fts_doc_col_no = i;
+ return(true);
}
}
- return(FALSE);
+ return(false);
}
/*******************************************************************//**
Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
on the Doc ID column.
-@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
-FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */
+@return the status of the FTS_DOC_ID index */
UNIV_INTERN
enum fts_doc_id_index_enum
innobase_fts_check_doc_id_index(
/*============================*/
- dict_table_t* table, /*!< in: table definition */
- ulint* fts_doc_col_no) /*!< out: The column number for
- Doc ID */
+ const dict_table_t* table, /*!< in: table definition */
+ const TABLE* altered_table, /*!< in: MySQL table
+ that is being altered */
+ ulint* fts_doc_col_no) /*!< out: The column number for
+ Doc ID, or ULINT_UNDEFINED
+ if it is being created in
+ ha_alter_info */
{
- dict_index_t* index;
- dict_field_t* field;
+ const dict_index_t* index;
+ const dict_field_t* field;
+
+ if (altered_table) {
+ /* Check if a unique index with the name of
+ FTS_DOC_ID_INDEX_NAME is being created. */
+
+ for (uint i = 0; i < altered_table->s->keys; i++) {
+ const KEY& key = altered_table->s->key_info[i];
+
+ if (innobase_strcasecmp(
+ key.name, FTS_DOC_ID_INDEX_NAME)) {
+ continue;
+ }
+
+ if ((key.flags & HA_NOSAME)
+ && key.user_defined_key_parts == 1
+ && !strcmp(key.name, FTS_DOC_ID_INDEX_NAME)
+ && !strcmp(key.key_part[0].field->field_name,
+ FTS_DOC_ID_COL_NAME)) {
+ if (fts_doc_col_no) {
+ *fts_doc_col_no = ULINT_UNDEFINED;
+ }
+ return(FTS_EXIST_DOC_ID_INDEX);
+ } else {
+ return(FTS_INCORRECT_DOC_ID_INDEX);
+ }
+ }
+ }
+
+ if (!table) {
+ return(FTS_NOT_EXIST_DOC_ID_INDEX);
+ }
for (index = dict_table_get_first_index(table);
index; index = dict_table_get_next_index(index)) {
@@ -572,6 +1647,7 @@ innobase_fts_check_doc_id_index(
}
if (!dict_index_is_unique(index)
+ || dict_index_get_n_unique(index) > 1
|| strcmp(index->name, FTS_DOC_ID_INDEX_NAME)) {
return(FTS_INCORRECT_DOC_ID_INDEX);
}
@@ -592,9 +1668,9 @@ innobase_fts_check_doc_id_index(
} else {
return(FTS_INCORRECT_DOC_ID_INDEX);
}
-
}
+
/* Not found */
return(FTS_NOT_EXIST_DOC_ID_INDEX);
}
@@ -608,12 +1684,12 @@ enum fts_doc_id_index_enum
innobase_fts_check_doc_id_index_in_def(
/*===================================*/
ulint n_key, /*!< in: Number of keys */
- KEY * key_info) /*!< in: Key definition */
+ const KEY* key_info) /*!< in: Key definition */
{
/* Check whether there is a "FTS_DOC_ID_INDEX" in the to be built index
list */
for (ulint j = 0; j < n_key; j++) {
- KEY* key = &key_info[j];
+ const KEY* key = &key_info[j];
if (innobase_strcasecmp(key->name, FTS_DOC_ID_INDEX_NAME)) {
continue;
@@ -622,14 +1698,15 @@ innobase_fts_check_doc_id_index_in_def(
/* Do a check on FTS DOC ID_INDEX, it must be unique,
named as "FTS_DOC_ID_INDEX" and on column "FTS_DOC_ID" */
if (!(key->flags & HA_NOSAME)
+ || key->user_defined_key_parts != 1
|| strcmp(key->name, FTS_DOC_ID_INDEX_NAME)
|| strcmp(key->key_part[0].field->field_name,
- FTS_DOC_ID_COL_NAME)) {
+ FTS_DOC_ID_COL_NAME)) {
return(FTS_INCORRECT_DOC_ID_INDEX);
- }
+ }
return(FTS_EXIST_DOC_ID_INDEX);
- }
+ }
return(FTS_NOT_EXIST_DOC_ID_INDEX);
}
@@ -639,8 +1716,7 @@ Create an index table where indexes are ordered as follows:
IF a new primary key is defined for the table THEN
1) New primary key
- 2) Original secondary indexes
- 3) New secondary indexes
+ 2) The remaining keys in key_info
ELSE
@@ -648,626 +1724,1272 @@ ELSE
ENDIF
-
-@return key definitions or NULL */
-static
-merge_index_def_t*
-innobase_create_key_def(
-/*====================*/
- trx_t* trx, /*!< in: trx */
- dict_table_t* table, /*!< in: table definition */
- mem_heap_t* heap, /*!< in: heap where space for key
- definitions are allocated */
- KEY* key_info, /*!< in: Indexes to be created */
- ulint& n_keys, /*!< in/out: Number of indexes to
- be created */
- ulint* num_fts_index, /*!< out: Number of FTS indexes */
- ibool* add_fts_doc_id, /*!< out: Whether we need to add
- new DOC ID column for FTS index */
- ibool* add_fts_doc_id_idx)/*!< out: Whether we need to add
- new index on DOC ID column */
+@return key definitions */
+static __attribute__((nonnull, warn_unused_result, malloc))
+index_def_t*
+innobase_create_key_defs(
+/*=====================*/
+ mem_heap_t* heap,
+ /*!< in/out: memory heap where space for key
+ definitions are allocated */
+ const Alter_inplace_info* ha_alter_info,
+ /*!< in: alter operation */
+ const TABLE* altered_table,
+ /*!< in: MySQL table that is being altered */
+ ulint& n_add,
+ /*!< in/out: number of indexes to be created */
+ ulint& n_fts_add,
+ /*!< out: number of FTS indexes to be created */
+ bool got_default_clust,
+ /*!< in: whether the table lacks a primary key */
+ ulint& fts_doc_id_col,
+ /*!< in: The column number for Doc ID */
+ bool& add_fts_doc_id,
+ /*!< in: whether we need to add new DOC ID
+ column for FTS index */
+ bool& add_fts_doc_idx)
+ /*!< in: whether we need to add new DOC ID
+ index for FTS index */
{
- ulint i = 0;
- merge_index_def_t* indexdef;
- merge_index_def_t* indexdefs;
+ index_def_t* indexdef;
+ index_def_t* indexdefs;
bool new_primary;
+ const uint*const add
+ = ha_alter_info->index_add_buffer;
+ const KEY*const key_info
+ = ha_alter_info->key_info_buffer;
- DBUG_ENTER("innobase_create_key_def");
-
- indexdef = indexdefs = (merge_index_def_t*)
- mem_heap_alloc(heap, sizeof *indexdef
- * (n_keys + UT_LIST_GET_LEN(table->indexes)));
-
- *add_fts_doc_id = FALSE;
- *add_fts_doc_id_idx = FALSE;
+ DBUG_ENTER("innobase_create_key_defs");
+ DBUG_ASSERT(!add_fts_doc_id || add_fts_doc_idx);
+ DBUG_ASSERT(ha_alter_info->index_add_count == n_add);
/* If there is a primary key, it is always the first index
- defined for the table. */
+ defined for the innodb_table. */
- new_primary = !my_strcasecmp(system_charset_info,
- key_info->name, "PRIMARY");
+ new_primary = n_add > 0
+ && !my_strcasecmp(system_charset_info,
+ key_info[*add].name, "PRIMARY");
+ n_fts_add = 0;
/* If there is a UNIQUE INDEX consisting entirely of NOT NULL
columns and if the index does not contain column prefix(es)
(only prefix/part of the column is indexed), MySQL will treat the
index as a PRIMARY KEY unless the table already has one. */
- if (!new_primary && (key_info->flags & HA_NOSAME)
- && (!(key_info->flags & HA_KEY_HAS_PART_KEY_SEG))
- && row_table_got_default_clust_index(table)) {
- uint key_part = key_info->key_parts;
+ if (n_add > 0 && !new_primary && got_default_clust
+ && (key_info[*add].flags & HA_NOSAME)
+ && !(key_info[*add].flags & HA_KEY_HAS_PART_KEY_SEG)) {
+ uint key_part = key_info[*add].user_defined_key_parts;
- new_primary = TRUE;
+ new_primary = true;
while (key_part--) {
- if (key_info->key_part[key_part].key_type
- & FIELDFLAG_MAYBE_NULL) {
- new_primary = FALSE;
+ const uint maybe_null
+ = key_info[*add].key_part[key_part].key_type
+ & FIELDFLAG_MAYBE_NULL;
+ DBUG_ASSERT(!maybe_null
+ == !key_info[*add].key_part[key_part].
+ field->real_maybe_null());
+
+ if (maybe_null) {
+ new_primary = false;
break;
}
}
}
- /* Check whether any indexes in the create list are Full
- Text Indexes*/
- for (ulint j = 0; j < n_keys; j++) {
- if (key_info[j].flags & HA_FULLTEXT) {
- (*num_fts_index)++;
- }
- }
-
- /* Check whether there is a "FTS_DOC_ID_INDEX" in the to be built index
- list */
- if (innobase_fts_check_doc_id_index_in_def(n_keys, key_info)
- == FTS_INCORRECT_DOC_ID_INDEX) {
- push_warning_printf((THD*) trx->mysql_thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_NAME_FOR_INDEX,
- " InnoDB: Index name %s is reserved"
- " for the unique index on"
- " FTS_DOC_ID column for FTS"
- " document ID indexing"
- " on table %s. Please check"
- " the index definition to"
- " make sure it is of correct"
- " type\n",
- FTS_DOC_ID_INDEX_NAME,
- table->name);
- DBUG_RETURN(NULL);
- }
-
- /* If we are to build an FTS index, check whether the table
- already has a DOC ID column, if not, we will need to add a
- Doc ID hidden column and rebuild the primary index */
- if (*num_fts_index) {
- enum fts_doc_id_index_enum ret;
- ibool exists;
- ulint doc_col_no;
- ulint fts_doc_col_no;
-
- exists = innobase_fts_check_doc_id_col(table, &fts_doc_col_no);
-
- if (exists) {
-
- if (fts_doc_col_no == ULINT_UNDEFINED) {
-
- push_warning_printf(
- (THD*) trx->mysql_thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_COLUMN_NAME,
- " InnoDB: There exists a column %s "
- "in table %s, but it is the wrong "
- "type. Create of FTS index failed.\n",
- FTS_DOC_ID_COL_NAME, table->name);
+ const bool rebuild = new_primary || add_fts_doc_id
+ || innobase_need_rebuild(ha_alter_info);
+ /* Reserve one more space if new_primary is true, and we might
+ need to add the FTS_DOC_ID_INDEX */
+ indexdef = indexdefs = static_cast<index_def_t*>(
+ mem_heap_alloc(
+ heap, sizeof *indexdef
+ * (ha_alter_info->key_count
+ + rebuild
+ + got_default_clust)));
- DBUG_RETURN(NULL);
-
- } else if (!table->fts) {
- table->fts = fts_create(table);
- }
-
- table->fts->doc_col = fts_doc_col_no;
+ if (rebuild) {
+ ulint primary_key_number;
+ if (new_primary) {
+ DBUG_ASSERT(n_add > 0);
+ primary_key_number = *add;
+ } else if (got_default_clust) {
+ /* Create the GEN_CLUST_INDEX */
+ index_def_t* index = indexdef++;
+
+ index->fields = NULL;
+ index->n_fields = 0;
+ index->ind_type = DICT_CLUSTERED;
+ index->name = mem_heap_strdup(
+ heap, innobase_index_reserve_name);
+ index->key_number = ~0;
+ primary_key_number = ULINT_UNDEFINED;
+ goto created_clustered;
} else {
- *add_fts_doc_id = TRUE;
- *add_fts_doc_id_idx = TRUE;
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Rebuild table %s to add "
- "DOC_ID column\n", table->name);
+ primary_key_number = 0;
}
- ret = innobase_fts_check_doc_id_index(table, &doc_col_no);
+ /* Create the PRIMARY key index definition */
+ innobase_create_index_def(
+ altered_table, key_info, primary_key_number,
+ TRUE, TRUE, indexdef++, heap);
- switch (ret) {
- case FTS_NOT_EXIST_DOC_ID_INDEX:
- *add_fts_doc_id_idx = TRUE;
- break;
- case FTS_INCORRECT_DOC_ID_INDEX:
+created_clustered:
+ n_add = 1;
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Index %s is used for FTS"
- " Doc ID indexing on table %s, it is"
- " now on the wrong column or of"
- " wrong format. Please drop it.\n",
- FTS_DOC_ID_INDEX_NAME, table->name);
- DBUG_RETURN(NULL);
+ for (ulint i = 0; i < ha_alter_info->key_count; i++) {
+ if (i == primary_key_number) {
+ continue;
+ }
+ /* Copy the index definitions. */
+ innobase_create_index_def(
+ altered_table, key_info, i, TRUE, FALSE,
+ indexdef, heap);
- default:
- ut_ad(ret == FTS_EXIST_DOC_ID_INDEX);
+ if (indexdef->ind_type & DICT_FTS) {
+ n_fts_add++;
+ }
- ut_ad(doc_col_no == fts_doc_col_no);
+ indexdef++;
+ n_add++;
}
- }
- /* If DICT_TF2_FTS_ADD_DOC_ID is set, we will need to rebuild
- the table to add the unique Doc ID column for FTS index. And
- thus the primary index would required to be rebuilt. Copy all
- the index definitions */
- if (new_primary || *add_fts_doc_id) {
- const dict_index_t* index;
-
- if (new_primary) {
- /* Create the PRIMARY key index definition */
- innobase_create_index_def(&key_info[i++],
- TRUE, TRUE,
- indexdef++, heap);
- }
+ if (n_fts_add > 0) {
+ if (!add_fts_doc_id
+ && !innobase_fts_check_doc_id_col(
+ NULL, altered_table,
+ &fts_doc_id_col)) {
+ fts_doc_id_col = altered_table->s->fields;
+ add_fts_doc_id = true;
+ }
- row_mysql_lock_data_dictionary(trx);
+ if (!add_fts_doc_idx) {
+ fts_doc_id_index_enum ret;
+ ulint doc_col_no;
- index = dict_table_get_first_index(table);
+ ret = innobase_fts_check_doc_id_index(
+ NULL, altered_table, &doc_col_no);
- /* Copy the index definitions of the old table. Skip
- the old clustered index if it is a generated clustered
- index or a PRIMARY KEY. If the clustered index is a
- UNIQUE INDEX, it must be converted to a secondary index. */
+ /* This should have been checked before */
+ ut_ad(ret != FTS_INCORRECT_DOC_ID_INDEX);
- if (new_primary
- && (dict_index_get_nth_col(index, 0)->mtype
- == DATA_SYS
- || !my_strcasecmp(system_charset_info,
- index->name, "PRIMARY"))) {
- index = dict_table_get_next_index(index);
+ if (ret == FTS_NOT_EXIST_DOC_ID_INDEX) {
+ add_fts_doc_idx = true;
+ } else {
+ ut_ad(ret == FTS_EXIST_DOC_ID_INDEX);
+ ut_ad(doc_col_no == ULINT_UNDEFINED
+ || doc_col_no == fts_doc_id_col);
+ }
+ }
}
+ } else {
+ /* Create definitions for added secondary indexes. */
- while (index) {
- innobase_copy_index_def(index, indexdef++, heap);
+ for (ulint i = 0; i < n_add; i++) {
+ innobase_create_index_def(
+ altered_table, key_info, add[i], FALSE, FALSE,
+ indexdef, heap);
- if (new_primary && index->type & DICT_FTS) {
- (*num_fts_index)++;
+ if (indexdef->ind_type & DICT_FTS) {
+ n_fts_add++;
}
- index = dict_table_get_next_index(index);
+ indexdef++;
}
+ }
- /* The primary index would be rebuilt if a FTS Doc ID
- column is to be added, and the primary index definition
- is just copied from old table and stored in indexdefs[0] */
- if (*add_fts_doc_id) {
- indexdefs[0].ind_type |= DICT_CLUSTERED;
- DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_ADD_DOC_ID);
- }
+ DBUG_ASSERT(indexdefs + n_add == indexdef);
- row_mysql_unlock_data_dictionary(trx);
- }
+ if (add_fts_doc_idx) {
+ index_def_t* index = indexdef++;
- /* Create definitions for added secondary indexes. */
+ index->fields = static_cast<index_field_t*>(
+ mem_heap_alloc(heap, sizeof *index->fields));
+ index->n_fields = 1;
+ index->fields->col_no = fts_doc_id_col;
+ index->fields->prefix_len = 0;
+ index->ind_type = DICT_UNIQUE;
- while (i < n_keys) {
- innobase_create_index_def(&key_info[i++], new_primary, FALSE,
- indexdef++, heap);
- }
+ if (rebuild) {
+ index->name = mem_heap_strdup(
+ heap, FTS_DOC_ID_INDEX_NAME);
+ ut_ad(!add_fts_doc_id
+ || fts_doc_id_col == altered_table->s->fields);
+ } else {
+ char* index_name;
+ index->name = index_name = static_cast<char*>(
+ mem_heap_alloc(
+ heap,
+ 1 + sizeof FTS_DOC_ID_INDEX_NAME));
+ *index_name++ = TEMP_INDEX_PREFIX;
+ memcpy(index_name, FTS_DOC_ID_INDEX_NAME,
+ sizeof FTS_DOC_ID_INDEX_NAME);
+ }
- n_keys = indexdef - indexdefs;
+ /* TODO: assign a real MySQL key number for this */
+ index->key_number = ULINT_UNDEFINED;
+ n_add++;
+ }
+ DBUG_ASSERT(indexdef > indexdefs);
+ DBUG_ASSERT((ulint) (indexdef - indexdefs)
+ <= ha_alter_info->key_count
+ + add_fts_doc_idx + got_default_clust);
+ DBUG_ASSERT(ha_alter_info->index_add_count <= n_add);
DBUG_RETURN(indexdefs);
}
/*******************************************************************//**
Check each index column size, make sure they do not exceed the max limit
-@return HA_ERR_INDEX_COL_TOO_LONG if index column size exceeds limit */
-static
-int
+@return true if index column size exceeds limit */
+static __attribute__((nonnull, warn_unused_result))
+bool
innobase_check_column_length(
/*=========================*/
- const dict_table_t*table, /*!< in: table definition */
+ ulint max_col_len, /*!< in: maximum column length */
const KEY* key_info) /*!< in: Indexes to be created */
{
- ulint max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
-
- for (ulint key_part = 0; key_part < key_info->key_parts; key_part++) {
+ for (ulint key_part = 0; key_part < key_info->user_defined_key_parts; key_part++) {
if (key_info->key_part[key_part].length > max_col_len) {
- my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), max_col_len);
- return(HA_ERR_INDEX_COL_TOO_LONG);
+ return(true);
}
}
- return(0);
+ return(false);
}
-/*******************************************************************//**
-Create a temporary tablename using query id, thread id, and id
-@return temporary tablename */
-static
-char*
-innobase_create_temporary_tablename(
-/*================================*/
- mem_heap_t* heap, /*!< in: memory heap */
- char id, /*!< in: identifier [0-9a-zA-Z] */
- const char* table_name) /*!< in: table name */
+struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
{
- char* name;
- ulint len;
- static const char suffix[] = "@0023 "; /* "# " */
+ /** Dummy query graph */
+ que_thr_t* thr;
+ /** InnoDB indexes being created */
+ dict_index_t** add;
+ /** MySQL key numbers for the InnoDB indexes that are being created */
+ const ulint* add_key_numbers;
+ /** number of InnoDB indexes being created */
+ const ulint num_to_add;
+ /** InnoDB indexes being dropped */
+ dict_index_t** drop;
+ /** number of InnoDB indexes being dropped */
+ const ulint num_to_drop;
+ /** InnoDB foreign key constraints being dropped */
+ dict_foreign_t** drop_fk;
+ /** number of InnoDB foreign key constraints being dropped */
+ const ulint num_to_drop_fk;
+ /** InnoDB foreign key constraints being added */
+ dict_foreign_t** add_fk;
+ /** number of InnoDB foreign key constraints being dropped */
+ const ulint num_to_add_fk;
+ /** whether to create the indexes online */
+ bool online;
+ /** memory heap */
+ mem_heap_t* heap;
+ /** dictionary transaction */
+ trx_t* trx;
+ /** table where the indexes are being created or dropped */
+ dict_table_t* indexed_table;
+ /** mapping of old column numbers to new ones, or NULL */
+ const ulint* col_map;
+ /** added AUTO_INCREMENT column position, or ULINT_UNDEFINED */
+ const ulint add_autoinc;
+ /** default values of ADD COLUMN, or NULL */
+ const dtuple_t* add_cols;
+ /** autoinc sequence to use */
+ ib_sequence_t sequence;
+
+ ha_innobase_inplace_ctx(trx_t* user_trx,
+ dict_index_t** add_arg,
+ const ulint* add_key_numbers_arg,
+ ulint num_to_add_arg,
+ dict_index_t** drop_arg,
+ ulint num_to_drop_arg,
+ dict_foreign_t** drop_fk_arg,
+ ulint num_to_drop_fk_arg,
+ dict_foreign_t** add_fk_arg,
+ ulint num_to_add_fk_arg,
+ bool online_arg,
+ mem_heap_t* heap_arg,
+ trx_t* trx_arg,
+ dict_table_t* indexed_table_arg,
+ const ulint* col_map_arg,
+ ulint add_autoinc_arg,
+ ulonglong autoinc_col_min_value_arg,
+ ulonglong autoinc_col_max_value_arg,
+ const dtuple_t* add_cols_arg) :
+ inplace_alter_handler_ctx(),
+ add (add_arg), add_key_numbers (add_key_numbers_arg),
+ num_to_add (num_to_add_arg),
+ drop (drop_arg), num_to_drop (num_to_drop_arg),
+ drop_fk (drop_fk_arg), num_to_drop_fk (num_to_drop_fk_arg),
+ add_fk (add_fk_arg), num_to_add_fk (num_to_add_fk_arg),
+ online (online_arg), heap (heap_arg), trx (trx_arg),
+ indexed_table (indexed_table_arg),
+ col_map (col_map_arg), add_autoinc (add_autoinc_arg),
+ add_cols (add_cols_arg),
+ sequence(user_trx ? user_trx->mysql_thd : 0,
+ autoinc_col_min_value_arg, autoinc_col_max_value_arg)
+ {
+#ifdef UNIV_DEBUG
+ for (ulint i = 0; i < num_to_add; i++) {
+ ut_ad(!add[i]->to_be_dropped);
+ }
+ for (ulint i = 0; i < num_to_drop; i++) {
+ ut_ad(drop[i]->to_be_dropped);
+ }
+#endif /* UNIV_DEBUG */
- len = strlen(table_name);
+ thr = pars_complete_graph_for_exec(NULL, user_trx, heap);
+ }
- name = (char*) mem_heap_alloc(heap, len + sizeof suffix);
- memcpy(name, table_name, len);
- memcpy(name + len, suffix, sizeof suffix);
- name[len + (sizeof suffix - 2)] = id;
+ ~ha_innobase_inplace_ctx()
+ {
+ mem_heap_free(heap);
+ }
- return(name);
-}
+private:
+ // Disable copying
+ ha_innobase_inplace_ctx(const ha_innobase_inplace_ctx&);
+ ha_innobase_inplace_ctx& operator=(const ha_innobase_inplace_ctx&);
+};
-class ha_innobase_add_index : public handler_add_index
+/********************************************************************//**
+Drop any indexes that we were not able to free previously due to
+open table handles. */
+static
+void
+online_retry_drop_indexes_low(
+/*==========================*/
+ dict_table_t* table, /*!< in/out: table */
+ trx_t* trx) /*!< in/out: transaction */
{
-public:
- /** table where the indexes are being created */
- dict_table_t* indexed_table;
- ha_innobase_add_index(TABLE* table, KEY* key_info, uint num_of_keys,
- dict_table_t* indexed_table_arg) :
- handler_add_index(table, key_info, num_of_keys),
- indexed_table (indexed_table_arg) {}
- ~ha_innobase_add_index() {}
-};
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+
+ /* We can have table->n_ref_count > 1, because other threads
+ may have prebuilt->table pointing to the table. However, these
+ other threads should be between statements, waiting for the
+ next statement to execute, or for a meta-data lock. */
+ ut_ad(table->n_ref_count >= 1);
+
+ if (table->drop_aborted) {
+ row_merge_drop_indexes(trx, table, TRUE);
+ }
+}
-/*******************************************************************//**
-This is to create FTS_DOC_ID_INDEX definition on the newly added Doc ID for
-the FTS indexes table
-@return dict_index_t for the FTS_DOC_ID_INDEX */
-dict_index_t*
-innobase_create_fts_doc_id_idx(
-/*===========================*/
- dict_table_t* indexed_table, /*!< in: Table where indexes are
- created */
- trx_t* trx, /*!< in: Transaction */
- mem_heap_t* heap) /*!< Heap for index definitions */
+/********************************************************************//**
+Drop any indexes that we were not able to free previously due to
+open table handles. */
+static __attribute__((nonnull))
+void
+online_retry_drop_indexes(
+/*======================*/
+ dict_table_t* table, /*!< in/out: table */
+ THD* user_thd) /*!< in/out: MySQL connection */
{
- dict_index_t* index;
- merge_index_def_t fts_index_def;
- char* index_name;
-
- /* Create the temp index name for FTS_DOC_ID_INDEX */
- fts_index_def.name = index_name = (char*) mem_heap_alloc(
- heap, FTS_DOC_ID_INDEX_NAME_LEN + 2);
- *index_name++ = TEMP_INDEX_PREFIX;
- memcpy(index_name, FTS_DOC_ID_INDEX_NAME,
- FTS_DOC_ID_INDEX_NAME_LEN);
- index_name[FTS_DOC_ID_INDEX_NAME_LEN] = 0;
-
- /* Only the Doc ID will be indexed */
- fts_index_def.n_fields = 1;
- fts_index_def.ind_type = DICT_UNIQUE;
- fts_index_def.fields = (merge_index_field_t*) mem_heap_alloc(
- heap, sizeof *fts_index_def.fields);
- fts_index_def.fields[0].prefix_len = 0;
- fts_index_def.fields[0].field_name = mem_heap_strdup(
- heap, FTS_DOC_ID_COL_NAME);
-
- index = row_merge_create_index(trx, indexed_table, &fts_index_def);
- return(index);
+ if (table->drop_aborted) {
+ trx_t* trx = innobase_trx_allocate(user_thd);
+
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+ row_mysql_lock_data_dictionary(trx);
+ online_retry_drop_indexes_low(table, trx);
+ trx_commit_for_mysql(trx);
+ row_mysql_unlock_data_dictionary(trx);
+ trx_free_for_mysql(trx);
+ }
+
+#ifdef UNIV_DEBUG
+ mutex_enter(&dict_sys->mutex);
+ dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE);
+ mutex_exit(&dict_sys->mutex);
+ ut_a(!table->drop_aborted);
+#endif /* UNIV_DEBUG */
}
-/*******************************************************************//**
-Clean up on ha_innobase::add_index error. */
-static
+/********************************************************************//**
+Commit a dictionary transaction and drop any indexes that we were not
+able to free previously due to open table handles. */
+static __attribute__((nonnull))
void
-innobase_add_index_cleanup(
-/*=======================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt */
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table) /*!< in/out: table on which
- the indexes were going to be
- created */
+online_retry_drop_indexes_with_trx(
+/*===============================*/
+ dict_table_t* table, /*!< in/out: table */
+ trx_t* trx) /*!< in/out: transaction */
{
- trx_rollback_to_savepoint(trx, NULL);
+ ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
+ ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_a(trx != prebuilt->trx);
+ /* Now that the dictionary is being locked, check if we can
+ drop any incompletely created indexes that may have been left
+ behind in rollback_inplace_alter_table() earlier. */
+ if (table->drop_aborted) {
- trx_free_for_mysql(trx);
+ trx->table_id = 0;
- trx_commit_for_mysql(prebuilt->trx);
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
- if (table != NULL) {
+ online_retry_drop_indexes_low(table, trx);
+ trx_commit_for_mysql(trx);
+ }
+}
- rw_lock_x_lock(&dict_operation_lock);
+/** Determines if InnoDB is dropping a foreign key constraint.
+@param foreign the constraint
+@param drop_fk constraints being dropped
+@param n_drop_fk number of constraints that are being dropped
+@return whether the constraint is being dropped */
+inline __attribute__((pure, nonnull, warn_unused_result))
+bool
+innobase_dropping_foreign(
+/*======================*/
+ const dict_foreign_t* foreign,
+ dict_foreign_t** drop_fk,
+ ulint n_drop_fk)
+{
+ while (n_drop_fk--) {
+ if (*drop_fk++ == foreign) {
+ return(true);
+ }
+ }
- dict_mutex_enter_for_mysql();
+ return(false);
+}
- /* Note: This check excludes the system tables. However, we
- should be safe because users cannot add indexes to system
- tables. */
+/** Determines if an InnoDB FOREIGN KEY constraint depends on a
+column that is being dropped or modified to NOT NULL.
+@param user_table InnoDB table as it is before the ALTER operation
+@param col_name Name of the column being altered
+@param drop_fk constraints being dropped
+@param n_drop_fk number of constraints that are being dropped
+@param drop true=drop column, false=set NOT NULL
+@retval true Not allowed (will call my_error())
+@retval false Allowed
+*/
+static __attribute__((pure, nonnull, warn_unused_result))
+bool
+innobase_check_foreigns_low(
+/*========================*/
+ const dict_table_t* user_table,
+ dict_foreign_t** drop_fk,
+ ulint n_drop_fk,
+ const char* col_name,
+ bool drop)
+{
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ /* Check if any FOREIGN KEY constraints are defined on this
+ column. */
+ for (const dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+ user_table->foreign_list);
+ foreign;
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
+ if (!drop && !(foreign->type
+ & (DICT_FOREIGN_ON_DELETE_SET_NULL
+ | DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
+ continue;
+ }
- if (UT_LIST_GET_LEN(table->foreign_list) == 0
- && UT_LIST_GET_LEN(table->referenced_list) == 0
- && !table->can_be_evicted) {
+ if (innobase_dropping_foreign(foreign, drop_fk, n_drop_fk)) {
+ continue;
+ }
- dict_table_move_from_non_lru_to_lru(table);
+ for (unsigned f = 0; f < foreign->n_fields; f++) {
+ if (!strcmp(foreign->foreign_col_names[f],
+ col_name)) {
+ my_error(drop
+ ? ER_FK_COLUMN_CANNOT_DROP
+ : ER_FK_COLUMN_NOT_NULL, MYF(0),
+ col_name, foreign->id);
+ return(true);
+ }
}
+ }
+
+ if (!drop) {
+ /* SET NULL clauses on foreign key constraints of
+ child tables affect the child tables, not the parent table.
+ The column can be NOT NULL in the parent table. */
+ return(false);
+ }
- dict_table_close(table, TRUE);
+ /* Check if any FOREIGN KEY constraints in other tables are
+ referring to the column that is being dropped. */
+ for (const dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+ user_table->referenced_list);
+ foreign;
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
+ if (innobase_dropping_foreign(foreign, drop_fk, n_drop_fk)) {
+ continue;
+ }
- dict_mutex_exit_for_mysql();
+ for (unsigned f = 0; f < foreign->n_fields; f++) {
+ char display_name[FN_REFLEN];
- rw_lock_x_unlock(&dict_operation_lock);
+ if (strcmp(foreign->referenced_col_names[f],
+ col_name)) {
+ continue;
+ }
+
+ char* buf_end = innobase_convert_name(
+ display_name, (sizeof display_name) - 1,
+ foreign->foreign_table_name,
+ strlen(foreign->foreign_table_name),
+ NULL, TRUE);
+ *buf_end = '\0';
+ my_error(ER_FK_COLUMN_CANNOT_DROP_CHILD,
+ MYF(0), col_name, foreign->id,
+ display_name);
+
+ return(true);
+ }
}
+
+ return(false);
}
-/*******************************************************************//**
-Create indexes.
-@return 0 or error number */
-UNIV_INTERN
-int
-ha_innobase::add_index(
-/*===================*/
- TABLE* in_table, /*!< in: Table where indexes
- are created */
- KEY* key_info, /*!< in: Indexes
- to be created */
- uint num_of_keys, /*!< in: Number of indexes
- to be created */
- handler_add_index** add) /*!< out: context */
+/** Determines if an InnoDB FOREIGN KEY constraint depends on a
+column that is being dropped or modified to NOT NULL.
+@param ha_alter_info Data used during in-place alter
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
+@param user_table InnoDB table as it is before the ALTER operation
+@param drop_fk constraints being dropped
+@param n_drop_fk number of constraints that are being dropped
+@retval true Not allowed (will call my_error())
+@retval false Allowed
+*/
+static __attribute__((pure, nonnull, warn_unused_result))
+bool
+innobase_check_foreigns(
+/*====================*/
+ Alter_inplace_info* ha_alter_info,
+ const TABLE* altered_table,
+ const TABLE* old_table,
+ const dict_table_t* user_table,
+ dict_foreign_t** drop_fk,
+ ulint n_drop_fk)
{
- dict_index_t** index = NULL; /*!< Index to be created */
- dict_index_t* fts_index = NULL;/*!< FTS Index to be created */
- dict_table_t* indexed_table; /*!< Table where indexes are created */
- merge_index_def_t* index_defs; /*!< Index definitions */
- mem_heap_t* heap = NULL; /*!< Heap for index definitions */
- trx_t* trx; /*!< Transaction */
- ulint num_of_idx;
- ulint num_created = 0;
- ibool dict_locked = FALSE;
- ulint new_primary = 0;
- int error;
- ulint num_fts_index = 0;
- ulint num_idx_create = 0;
- ibool fts_add_doc_id = FALSE;
- ibool fts_add_doc_idx = FALSE;
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
- DBUG_ENTER("ha_innobase::add_index");
- ut_a(table);
- ut_a(key_info);
- ut_a(num_of_keys);
+ for (Field** fp = old_table->field; *fp; fp++) {
+ cf_it.rewind();
+ const Create_field* new_field;
- *add = NULL;
+ ut_ad(!(*fp)->real_maybe_null()
+ == !!((*fp)->flags & NOT_NULL_FLAG));
- if (srv_created_new_raw || srv_force_recovery) {
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+ while ((new_field = cf_it++)) {
+ if (new_field->field == *fp) {
+ break;
+ }
+ }
+
+ if (!new_field || (new_field->flags & NOT_NULL_FLAG)) {
+ if (innobase_check_foreigns_low(
+ user_table, drop_fk, n_drop_fk,
+ (*fp)->field_name, !new_field)) {
+ return(true);
+ }
+ }
}
- update_thd();
+ return(false);
+}
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads. */
- trx_search_latch_release_if_reserved(prebuilt->trx);
+/** Convert a default value for ADD COLUMN.
- /* Check if the index name is reserved. */
- if (innobase_index_name_is_reserved(user_thd, key_info, num_of_keys)) {
- DBUG_RETURN(-1);
+@param heap Memory heap where allocated
+@param dfield InnoDB data field to copy to
+@param field MySQL value for the column
+@param comp nonzero if in compact format */
+static __attribute__((nonnull))
+void
+innobase_build_col_map_add(
+/*=======================*/
+ mem_heap_t* heap,
+ dfield_t* dfield,
+ const Field* field,
+ ulint comp)
+{
+ if (field->is_real_null()) {
+ dfield_set_null(dfield);
+ return;
}
- indexed_table = dict_table_open_on_name(prebuilt->table->name, FALSE);
+ ulint size = field->pack_length();
- if (UNIV_UNLIKELY(!indexed_table)) {
- DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
- }
+ byte* buf = static_cast<byte*>(mem_heap_alloc(heap, size));
- ut_a(indexed_table == prebuilt->table);
+ row_mysql_store_col_in_innobase_format(
+ dfield, buf, TRUE, field->ptr, size, comp);
+}
- if (indexed_table->tablespace_discarded) {
- DBUG_RETURN(-1);
+/** Construct the translation table for reordering, dropping or
+adding columns.
+
+@param ha_alter_info Data used during in-place alter
+@param altered_table MySQL table that is being altered
+@param table MySQL table as it is before the ALTER operation
+@param new_table InnoDB table corresponding to MySQL altered_table
+@param old_table InnoDB table corresponding to MYSQL table
+@param add_cols Default values for ADD COLUMN, or NULL if no ADD COLUMN
+@param heap Memory heap where allocated
+@return array of integers, mapping column numbers in the table
+to column numbers in altered_table */
+static __attribute__((nonnull(1,2,3,4,5,7), warn_unused_result))
+const ulint*
+innobase_build_col_map(
+/*===================*/
+ Alter_inplace_info* ha_alter_info,
+ const TABLE* altered_table,
+ const TABLE* table,
+ const dict_table_t* new_table,
+ const dict_table_t* old_table,
+ dtuple_t* add_cols,
+ mem_heap_t* heap)
+{
+ DBUG_ENTER("innobase_build_col_map");
+ DBUG_ASSERT(altered_table != table);
+ DBUG_ASSERT(new_table != old_table);
+ DBUG_ASSERT(dict_table_get_n_cols(new_table)
+ >= altered_table->s->fields + DATA_N_SYS_COLS);
+ DBUG_ASSERT(dict_table_get_n_cols(old_table)
+ >= table->s->fields + DATA_N_SYS_COLS);
+ DBUG_ASSERT(!!add_cols == !!(ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_COLUMN));
+ DBUG_ASSERT(!add_cols || dtuple_get_n_fields(add_cols)
+ == dict_table_get_n_cols(new_table));
+
+ ulint* col_map = static_cast<ulint*>(
+ mem_heap_alloc(heap, old_table->n_cols * sizeof *col_map));
+
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
+ uint i = 0;
+
+ /* Any dropped columns will map to ULINT_UNDEFINED. */
+ for (uint old_i = 0; old_i + DATA_N_SYS_COLS < old_table->n_cols;
+ old_i++) {
+ col_map[old_i] = ULINT_UNDEFINED;
}
- /* Check that index keys are sensible */
- error = innobase_check_index_keys(key_info, num_of_keys, prebuilt->table);
+ while (const Create_field* new_field = cf_it++) {
+ for (uint old_i = 0; table->field[old_i]; old_i++) {
+ const Field* field = table->field[old_i];
+ if (new_field->field == field) {
+ col_map[old_i] = i;
+ goto found_col;
+ }
+ }
- if (UNIV_UNLIKELY(error)) {
- dict_table_close(prebuilt->table, FALSE);
- DBUG_RETURN(error);
+ innobase_build_col_map_add(
+ heap, dtuple_get_nth_field(add_cols, i),
+ altered_table->s->field[i],
+ dict_table_is_comp(new_table));
+found_col:
+ i++;
}
- /* Check each index's column length to make sure they do not
- exceed limit */
- for (ulint i = 0; i < num_of_keys; i++) {
- if (key_info[i].flags & HA_FULLTEXT) {
- continue;
+ DBUG_ASSERT(i == altered_table->s->fields);
+
+ i = table->s->fields;
+
+ /* Add the InnoDB hidden FTS_DOC_ID column, if any. */
+ if (i + DATA_N_SYS_COLS < old_table->n_cols) {
+ /* There should be exactly one extra field,
+ the FTS_DOC_ID. */
+ DBUG_ASSERT(DICT_TF2_FLAG_IS_SET(old_table,
+ DICT_TF2_FTS_HAS_DOC_ID));
+ DBUG_ASSERT(i + DATA_N_SYS_COLS + 1 == old_table->n_cols);
+ DBUG_ASSERT(!strcmp(dict_table_get_col_name(
+ old_table, table->s->fields),
+ FTS_DOC_ID_COL_NAME));
+ if (altered_table->s->fields + DATA_N_SYS_COLS
+ < new_table->n_cols) {
+ DBUG_ASSERT(DICT_TF2_FLAG_IS_SET(
+ new_table,
+ DICT_TF2_FTS_HAS_DOC_ID));
+ DBUG_ASSERT(altered_table->s->fields
+ + DATA_N_SYS_COLS + 1
+ == new_table->n_cols);
+ col_map[i] = altered_table->s->fields;
+ } else {
+ DBUG_ASSERT(!DICT_TF2_FLAG_IS_SET(
+ new_table,
+ DICT_TF2_FTS_HAS_DOC_ID));
+ col_map[i] = ULINT_UNDEFINED;
}
- error = innobase_check_column_length(prebuilt->table,
- &key_info[i]);
+ i++;
+ } else {
+ DBUG_ASSERT(!DICT_TF2_FLAG_IS_SET(
+ old_table,
+ DICT_TF2_FTS_HAS_DOC_ID));
+ }
+
+ for (; i < old_table->n_cols; i++) {
+ col_map[i] = i + new_table->n_cols - old_table->n_cols;
+ }
+
+ DBUG_RETURN(col_map);
+}
+
+/** Drop newly create FTS index related auxiliary table during
+FIC create index process, before fts_add_index is called
+@param table table that was being rebuilt online
+@param trx transaction
+@return DB_SUCCESS if successful, otherwise last error code
+*/
+static
+dberr_t
+innobase_drop_fts_index_table(
+/*==========================*/
+ dict_table_t* table,
+ trx_t* trx)
+{
+ dberr_t ret_err = DB_SUCCESS;
+
+ for (dict_index_t* index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+ if (index->type & DICT_FTS) {
+ dberr_t err;
+
+ err = fts_drop_index_tables(trx, index);
- if (error) {
- dict_table_close(prebuilt->table, FALSE);
- DBUG_RETURN(error);
+ if (err != DB_SUCCESS) {
+ ret_err = err;
+ }
}
}
- heap = mem_heap_create(1024);
- trx_start_if_not_started(prebuilt->trx);
+ return(ret_err);
+}
+
+/** Update internal structures with concurrent writes blocked,
+while preparing ALTER TABLE.
+
+@param ha_alter_info Data used during in-place alter
+@param altered_table MySQL table that is being altered
+@param old_table MySQL table as it is before the ALTER operation
+@param user_table InnoDB table that is being altered
+@param user_trx User transaction, for locking the table
+@param table_name Table name in MySQL
+@param flags Table and tablespace flags
+@param flags2 Additional table flags
+@param heap Memory heap, or NULL
+@param drop_index Indexes to be dropped, or NULL
+@param n_drop_index Number of indexes to drop
+@param drop_foreign Foreign key constraints to be dropped, or NULL
+@param n_drop_foreign Number of foreign key constraints to drop
+@param fts_doc_id_col The column number of FTS_DOC_ID
+@param add_autoinc_col The number of an added AUTO_INCREMENT column,
+ or ULINT_UNDEFINED if none was added
+@param add_fts_doc_id Flag: add column FTS_DOC_ID?
+@param add_fts_doc_id_idx Flag: add index (FTS_DOC_ID)?
+
+@retval true Failure
+@retval false Success
+*/
+static __attribute__((warn_unused_result, nonnull(1,2,3,4)))
+bool
+prepare_inplace_alter_table_dict(
+/*=============================*/
+ Alter_inplace_info* ha_alter_info,
+ const TABLE* altered_table,
+ const TABLE* old_table,
+ dict_table_t* user_table,
+ trx_t* user_trx,
+ const char* table_name,
+ ulint flags,
+ ulint flags2,
+ mem_heap_t* heap,
+ dict_index_t** drop_index,
+ ulint n_drop_index,
+ dict_foreign_t** drop_foreign,
+ ulint n_drop_foreign,
+ dict_foreign_t** add_foreign,
+ ulint n_add_foreign,
+ ulint fts_doc_id_col,
+ ulint add_autoinc_col,
+ ulonglong autoinc_col_max_value,
+ bool add_fts_doc_id,
+ bool add_fts_doc_id_idx)
+{
+ trx_t* trx;
+ bool dict_locked = false;
+ dict_index_t** add_index; /* indexes to be created */
+ ulint* add_key_nums; /* MySQL key numbers */
+ ulint n_add_index;
+ index_def_t* index_defs; /* index definitions */
+ dict_index_t* fts_index = NULL;
+ dict_table_t* indexed_table = user_table;
+ ulint new_clustered = 0;
+ dberr_t error;
+ THD* user_thd = user_trx->mysql_thd;
+ const ulint* col_map = NULL;
+ dtuple_t* add_cols = NULL;
+ ulint num_fts_index;
+
+ DBUG_ENTER("prepare_inplace_alter_table_dict");
+ DBUG_ASSERT((add_autoinc_col != ULINT_UNDEFINED)
+ == (autoinc_col_max_value > 0));
+ DBUG_ASSERT(!n_drop_index == !drop_index);
+ DBUG_ASSERT(!n_drop_foreign == !drop_foreign);
+ DBUG_ASSERT(!add_fts_doc_id || add_fts_doc_id_idx);
+ DBUG_ASSERT(!add_fts_doc_id_idx
+ || innobase_fulltext_exist(altered_table->s));
+
+ trx_start_if_not_started_xa(user_trx);
/* Create a background transaction for the operations on
the data dictionary tables. */
trx = innobase_trx_allocate(user_thd);
- trx_start_if_not_started(trx);
- /* We don't want this table to be evicted from the cache while we
- are building an index on it. Another issue is that while we are
- building the index this table could be referred to in a foreign
- key relationship. In innobase_add_index_cleanup() we check for
- that condition before moving it back to the LRU list. */
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
- row_mysql_lock_data_dictionary(trx);
-
- if (prebuilt->table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(prebuilt->table);
+ if (!heap) {
+ heap = mem_heap_create(1024);
}
- row_mysql_unlock_data_dictionary(trx);
-
/* Create table containing all indexes to be built in this
- alter table add index so that they are in the correct order
+ ALTER TABLE ADD INDEX so that they are in the correct order
in the table. */
- num_of_idx = num_of_keys;
+ n_add_index = ha_alter_info->index_add_count;
- index_defs = innobase_create_key_def(
- trx, prebuilt->table, heap, key_info, num_of_idx,
- &num_fts_index, &fts_add_doc_id, &fts_add_doc_idx);
+ index_defs = innobase_create_key_defs(
+ heap, ha_alter_info, altered_table, n_add_index,
+ num_fts_index, row_table_got_default_clust_index(indexed_table),
+ fts_doc_id_col, add_fts_doc_id, add_fts_doc_id_idx);
- if (!index_defs) {
- error = DB_UNSUPPORTED;
- goto error_handling;
- }
+ new_clustered = DICT_CLUSTERED & index_defs[0].ind_type;
+
+ const bool locked =
+ !ha_alter_info->online
+ || add_autoinc_col != ULINT_UNDEFINED
+ || num_fts_index > 0
+ || (innobase_need_rebuild(ha_alter_info)
+ && innobase_fulltext_exist(altered_table->s));
- /* Currently, support create one single FULLTEXT index in parallel at
- a time */
if (num_fts_index > 1) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Only support create ONE Fulltext index"
- " at a time\n");
- error = DB_UNSUPPORTED;
- goto error_handling;
+ my_error(ER_INNODB_FT_LIMIT, MYF(0));
+ goto error_handled;
}
- new_primary = DICT_CLUSTERED & index_defs[0].ind_type;
+ if (locked && ha_alter_info->online) {
+ /* This should have been blocked in
+ check_if_supported_inplace_alter(). */
+ ut_ad(0);
+ my_error(ER_NOT_SUPPORTED_YET, MYF(0),
+ thd_query_string(user_thd)->str);
+ goto error_handled;
+ }
- /* If a new FTS Doc ID column is to be added, there will be
- one additional index to be built on the Doc ID column itself. */
- num_idx_create = (fts_add_doc_idx) ? num_of_idx + 1 : num_of_idx;
+ /* The primary index would be rebuilt if a FTS Doc ID
+ column is to be added, and the primary index definition
+ is just copied from old table and stored in indexdefs[0] */
+ DBUG_ASSERT(!add_fts_doc_id || new_clustered);
+ DBUG_ASSERT(!!new_clustered ==
+ (innobase_need_rebuild(ha_alter_info)
+ || add_fts_doc_id));
/* Allocate memory for dictionary index definitions */
- index = (dict_index_t**) mem_heap_alloc(
- heap, num_idx_create * sizeof *index);
- /* Flag this transaction as a dictionary operation, so that
- the data dictionary will be locked in crash recovery. */
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+ add_index = (dict_index_t**) mem_heap_alloc(
+ heap, n_add_index * sizeof *add_index);
+ add_key_nums = (ulint*) mem_heap_alloc(
+ heap, n_add_index * sizeof *add_key_nums);
+
+ /* This transaction should be dictionary operation, so that
+ the data dictionary will be locked during crash recovery. */
+
+ ut_ad(trx->dict_operation == TRX_DICT_OP_INDEX);
/* Acquire a lock on the table before creating any indexes. */
- error = row_merge_lock_table(prebuilt->trx, prebuilt->table,
- new_primary ? LOCK_X : LOCK_S);
- if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+ if (locked) {
+ error = row_merge_lock_table(
+ user_trx, indexed_table, LOCK_S);
- goto error_handling;
+ if (error != DB_SUCCESS) {
+
+ goto error_handling;
+ }
+ } else {
+ error = DB_SUCCESS;
}
/* Latch the InnoDB data dictionary exclusively so that no deadlocks
or lock waits can happen in it during an index create operation. */
row_mysql_lock_data_dictionary(trx);
- dict_locked = TRUE;
+ dict_locked = true;
- ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
+ /* Wait for background stats processing to stop using the table that
+ we are going to alter. We know bg stats will not start using it again
+ until we are holding the data dict locked and we are holding it here
+ at least until checking ut_ad(user_table->n_ref_count == 1) below.
+ XXX what may happen if bg stats opens the table after we
+ have unlocked data dictionary below? */
+ dict_stats_wait_bg_to_stop_using_tables(user_table, NULL, trx);
- /* If a new primary key is defined for the table we need
+ online_retry_drop_indexes_low(indexed_table, trx);
+
+ ut_d(dict_table_check_for_dup_indexes(
+ indexed_table, CHECK_ABORTED_OK));
+
+ /* If a new clustered index is defined for the table we need
to drop the original table and rebuild all indexes. */
- if (UNIV_UNLIKELY(new_primary)) {
- /* This transaction should be the only one
- operating on the table. The table get above
- would have incremented the ref count to 2. */
- ut_a(prebuilt->table->n_ref_count == 2);
+ if (new_clustered) {
+ char* new_table_name = dict_mem_create_temporary_tablename(
+ heap, indexed_table->name, indexed_table->id);
+ ulint n_cols;
- char* new_table_name = innobase_create_temporary_tablename(
- heap, '1', prebuilt->table->name);
+ if (innobase_check_foreigns(
+ ha_alter_info, altered_table, old_table,
+ user_table, drop_foreign, n_drop_foreign)) {
+ goto new_clustered_failed;
+ }
- /* Clone the table. */
+ n_cols = altered_table->s->fields;
+
+ if (add_fts_doc_id) {
+ n_cols++;
+ DBUG_ASSERT(flags2 & DICT_TF2_FTS);
+ DBUG_ASSERT(add_fts_doc_id_idx);
+ flags2 |= DICT_TF2_FTS_ADD_DOC_ID
+ | DICT_TF2_FTS_HAS_DOC_ID
+ | DICT_TF2_FTS;
+ }
+
+ DBUG_ASSERT(!add_fts_doc_id_idx || (flags2 & DICT_TF2_FTS));
+
+ /* Create the table. */
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- indexed_table = row_merge_create_temporary_table(
- new_table_name, index_defs, prebuilt->table, trx);
- if (!indexed_table) {
+ if (dict_table_get_low(new_table_name)) {
+ my_error(ER_TABLE_EXISTS_ERROR, MYF(0),
+ new_table_name);
+ goto new_clustered_failed;
+ }
- switch (trx->error_state) {
- case DB_TABLESPACE_ALREADY_EXISTS:
- case DB_DUPLICATE_KEY:
- innobase_convert_tablename(new_table_name);
- my_error(HA_ERR_TABLE_EXIST, MYF(0),
- new_table_name);
- error = HA_ERR_TABLE_EXIST;
- break;
- default:
- error = convert_error_code_to_mysql(
- trx->error_state,
- prebuilt->table->flags,
- user_thd);
+ /* The initial space id 0 may be overridden later. */
+ indexed_table = dict_mem_table_create(
+ new_table_name, 0, n_cols, flags, flags2);
+
+ if (DICT_TF_HAS_DATA_DIR(flags)) {
+ indexed_table->data_dir_path =
+ mem_heap_strdup(indexed_table->heap,
+ user_table->data_dir_path);
+ }
+
+ for (uint i = 0; i < altered_table->s->fields; i++) {
+ const Field* field = altered_table->field[i];
+ ulint is_unsigned;
+ ulint field_type
+ = (ulint) field->type();
+ ulint col_type
+ = get_innobase_type_from_mysql_type(
+ &is_unsigned, field);
+ ulint charset_no;
+ ulint col_len;
+
+ /* we assume in dtype_form_prtype() that this
+ fits in two bytes */
+ ut_a(field_type <= MAX_CHAR_COLL_NUM);
+
+ if (!field->real_maybe_null()) {
+ field_type |= DATA_NOT_NULL;
+ }
+
+ if (field->binary()) {
+ field_type |= DATA_BINARY_TYPE;
+ }
+
+ if (is_unsigned) {
+ field_type |= DATA_UNSIGNED;
}
- ut_d(dict_table_check_for_dup_indexes(prebuilt->table,
- TRUE));
- row_mysql_unlock_data_dictionary(trx);
- mem_heap_free(heap);
+ if (dtype_is_string_type(col_type)) {
+ charset_no = (ulint) field->charset()->number;
- innobase_add_index_cleanup(
- prebuilt, trx, prebuilt->table);
+ if (charset_no > MAX_CHAR_COLL_NUM) {
+ dict_mem_table_free(indexed_table);
+ my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+ field->field_name);
+ goto new_clustered_failed;
+ }
+ } else {
+ charset_no = 0;
+ }
+
+ col_len = field->pack_length();
+
+ /* The MySQL pack length contains 1 or 2 bytes
+ length field for a true VARCHAR. Let us
+ subtract that, so that the InnoDB column
+ length in the InnoDB data dictionary is the
+ real maximum byte length of the actual data. */
+
+ if (field->type() == MYSQL_TYPE_VARCHAR) {
+ uint32 length_bytes
+ = static_cast<const Field_varstring*>(
+ field)->length_bytes;
+
+ col_len -= length_bytes;
+
+ if (length_bytes == 2) {
+ field_type |= DATA_LONG_TRUE_VARCHAR;
+ }
+ }
- DBUG_RETURN(error);
+ if (dict_col_name_is_reserved(field->field_name)) {
+ dict_mem_table_free(indexed_table);
+ my_error(ER_WRONG_COLUMN_NAME, MYF(0),
+ field->field_name);
+ goto new_clustered_failed;
+ }
+
+ dict_mem_table_add_col(
+ indexed_table, heap,
+ field->field_name,
+ col_type,
+ dtype_form_prtype(field_type, charset_no),
+ col_len);
+ }
+
+ if (add_fts_doc_id) {
+ fts_add_doc_id_column(indexed_table, heap);
+ indexed_table->fts->doc_col = fts_doc_id_col;
+ ut_ad(fts_doc_id_col == altered_table->s->fields);
+ } else if (indexed_table->fts) {
+ indexed_table->fts->doc_col = fts_doc_id_col;
}
- trx->table_id = indexed_table->id;
+ error = row_create_table_for_mysql(indexed_table, trx, false);
+
+ switch (error) {
+ dict_table_t* temp_table;
+ case DB_SUCCESS:
+ /* We need to bump up the table ref count and
+ before we can use it we need to open the
+ table. The new_table must be in the data
+ dictionary cache, because we are still holding
+ the dict_sys->mutex. */
+ ut_ad(mutex_own(&dict_sys->mutex));
+ temp_table = dict_table_open_on_name(
+ indexed_table->name, TRUE, FALSE,
+ DICT_ERR_IGNORE_NONE);
+ ut_a(indexed_table == temp_table);
+ /* n_ref_count must be 1, because purge cannot
+ be executing on this very table as we are
+ holding dict_operation_lock X-latch. */
+ DBUG_ASSERT(indexed_table->n_ref_count == 1);
+ break;
+ case DB_TABLESPACE_EXISTS:
+ my_error(ER_TABLESPACE_EXISTS, MYF(0),
+ new_table_name);
+ goto new_clustered_failed;
+ case DB_DUPLICATE_KEY:
+ my_error(HA_ERR_TABLE_EXIST, MYF(0),
+ altered_table->s->table_name.str);
+ goto new_clustered_failed;
+ default:
+ my_error_innodb(error, table_name, flags);
+ new_clustered_failed:
+ DBUG_ASSERT(trx != user_trx);
+ trx_rollback_to_savepoint(trx, NULL);
+
+ ut_ad(user_table->n_ref_count == 1);
+
+ online_retry_drop_indexes_with_trx(user_table, trx);
+
+ goto err_exit;
+ }
+
+ if (ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_COLUMN) {
+
+ add_cols = dtuple_create(
+ heap, dict_table_get_n_cols(indexed_table));
+
+ dict_table_copy_types(add_cols, indexed_table);
+ }
+
+ col_map = innobase_build_col_map(
+ ha_alter_info, altered_table, old_table,
+ indexed_table, user_table,
+ add_cols, heap);
+ } else {
+ DBUG_ASSERT(!innobase_need_rebuild(ha_alter_info));
+
+ if (!indexed_table->fts
+ && innobase_fulltext_exist(altered_table->s)) {
+ indexed_table->fts = fts_create(indexed_table);
+ indexed_table->fts->doc_col = fts_doc_id_col;
+ }
}
+ /* Assign table_id, so that no table id of
+ fts_create_index_tables() will be written to the undo logs. */
+ DBUG_ASSERT(indexed_table->id != 0);
+ trx->table_id = indexed_table->id;
+
/* Create the indexes in SYS_INDEXES and load into dictionary. */
- for (num_created = 0; num_created < num_of_idx; num_created++) {
+ for (ulint num_created = 0; num_created < n_add_index; num_created++) {
- index[num_created] = row_merge_create_index(
+ add_index[num_created] = row_merge_create_index(
trx, indexed_table, &index_defs[num_created]);
- if (!index[num_created]) {
+ add_key_nums[num_created] = index_defs[num_created].key_number;
+
+ if (!add_index[num_created]) {
error = trx->error_state;
+ DBUG_ASSERT(error != DB_SUCCESS);
goto error_handling;
}
- if (index[num_created]->type & DICT_FTS) {
- fts_index = index[num_created];
- fts_create_index_tables(trx, fts_index);
+ if (add_index[num_created]->type & DICT_FTS) {
+ DBUG_ASSERT(num_fts_index);
+ DBUG_ASSERT(!fts_index);
+ DBUG_ASSERT(add_index[num_created]->type == DICT_FTS);
+ fts_index = add_index[num_created];
+ }
+ /* If only online ALTER TABLE operations have been
+ requested, allocate a modification log. If the table
+ will be locked anyway, the modification
+ log is unnecessary. When rebuilding the table
+ (new_clustered), we will allocate the log for the
+ clustered index of the old table, later. */
+ if (new_clustered
+ || locked
+ || user_table->ibd_file_missing
+ || dict_table_is_discarded(user_table)) {
+ /* No need to allocate a modification log. */
+ ut_ad(!add_index[num_created]->online_log);
+ } else if (add_index[num_created]->type & DICT_FTS) {
+ /* Fulltext indexes are not covered
+ by a modification log. */
+ } else {
+ DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter",
+ error = DB_OUT_OF_MEMORY;
+ goto error_handling;);
+ rw_lock_x_lock(&add_index[num_created]->lock);
+ bool ok = row_log_allocate(add_index[num_created],
+ NULL, true, NULL, NULL);
+ rw_lock_x_unlock(&add_index[num_created]->lock);
+
+ if (!ok) {
+ error = DB_OUT_OF_MEMORY;
+ goto error_handling;
+ }
}
}
- /* create FTS_DOC_ID_INDEX on the Doc ID column on the table */
- if (fts_add_doc_idx) {
- index[num_of_idx] = innobase_create_fts_doc_id_idx(
- indexed_table, trx, heap);
- /* FTS_DOC_ID_INDEX is internal defined new index */
- num_of_idx++;
- num_created++;
+ ut_ad(new_clustered == (indexed_table != user_table));
+
+ DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter",
+ error = DB_OUT_OF_MEMORY;
+ goto error_handling;);
+
+ if (new_clustered && !locked) {
+ /* Allocate a log for online table rebuild. */
+ dict_index_t* clust_index = dict_table_get_first_index(
+ user_table);
+
+ rw_lock_x_lock(&clust_index->lock);
+ bool ok = row_log_allocate(
+ clust_index, indexed_table,
+ !(ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_PK_INDEX),
+ add_cols, col_map);
+ rw_lock_x_unlock(&clust_index->lock);
+
+ if (!ok) {
+ error = DB_OUT_OF_MEMORY;
+ goto error_handling;
+ }
+
+ /* Assign a consistent read view for
+ row_merge_read_clustered_index(). */
+ trx_assign_read_view(user_trx);
}
- if (num_fts_index) {
+ if (fts_index) {
+ /* Ensure that the dictionary operation mode will
+ not change while creating the auxiliary tables. */
+ trx_dict_op_t op = trx_get_dict_operation(trx);
+
+#ifdef UNIV_DEBUG
+ switch (op) {
+ case TRX_DICT_OP_NONE:
+ break;
+ case TRX_DICT_OP_TABLE:
+ case TRX_DICT_OP_INDEX:
+ goto op_ok;
+ }
+ ut_error;
+op_ok:
+#endif /* UNIV_DEBUG */
+ ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(mutex_own(&dict_sys->mutex));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
DICT_TF2_FLAG_SET(indexed_table, DICT_TF2_FTS);
+ /* This function will commit the transaction and reset
+ the trx_t::dict_operation flag on success. */
+
+ error = fts_create_index_tables(trx, fts_index);
+
+ DBUG_EXECUTE_IF("innodb_test_fail_after_fts_index_table",
+ error = DB_LOCK_WAIT_TIMEOUT;
+ goto error_handling;);
+
+ if (error != DB_SUCCESS) {
+ goto error_handling;
+ }
+
+ trx_start_for_ddl(trx, op);
+
if (!indexed_table->fts
|| ib_vector_size(indexed_table->fts->indexes) == 0) {
- fts_create_common_tables(trx, indexed_table,
- prebuilt->table->name, TRUE);
+ error = fts_create_common_tables(
+ trx, indexed_table, user_table->name, TRUE);
+
+ DBUG_EXECUTE_IF("innodb_test_fail_after_fts_common_table",
+ error = DB_LOCK_WAIT_TIMEOUT;
+ goto error_handling;);
+
+ if (error != DB_SUCCESS) {
+ goto error_handling;
+ }
indexed_table->fts->fts_status |= TABLE_DICT_LOCKED;
- innobase_fts_load_stopword(
- indexed_table, trx, ha_thd());
+
+ error = innobase_fts_load_stopword(
+ indexed_table, trx, user_thd)
+ ? DB_SUCCESS : DB_ERROR;
indexed_table->fts->fts_status &= ~TABLE_DICT_LOCKED;
- }
- if (new_primary && prebuilt->table->fts) {
- indexed_table->fts->doc_col = prebuilt->table->fts->doc_col;
+ if (error != DB_SUCCESS) {
+ goto error_handling;
+ }
}
+
+ ut_ad(trx_get_dict_operation(trx) == op);
}
- ut_ad(error == DB_SUCCESS);
+ DBUG_ASSERT(error == DB_SUCCESS);
/* Commit the data dictionary transaction in order to release
the table locks on the system tables. This means that if
@@ -1278,633 +3000,2212 @@ ha_innobase::add_index(
trx_commit_for_mysql(trx);
row_mysql_unlock_data_dictionary(trx);
- dict_locked = FALSE;
+ dict_locked = false;
ut_a(trx->lock.n_active_thrs == 0);
- if (UNIV_UNLIKELY(new_primary)) {
- /* A primary key is to be built. Acquire an exclusive
- table lock also on the table that is being created. */
- ut_ad(indexed_table != prebuilt->table);
-
- error = row_merge_lock_table(prebuilt->trx, indexed_table,
- LOCK_X);
-
- if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
+error_handling:
+ /* After an error, remove all those index definitions from the
+ dictionary which were defined. */
- goto error_handling;
- }
+ switch (error) {
+ case DB_SUCCESS:
+ ut_a(!dict_locked);
+
+ ut_d(mutex_enter(&dict_sys->mutex));
+ ut_d(dict_table_check_for_dup_indexes(
+ user_table, CHECK_PARTIAL_OK));
+ ut_d(mutex_exit(&dict_sys->mutex));
+ ha_alter_info->handler_ctx = new ha_innobase_inplace_ctx(
+ user_trx, add_index, add_key_nums, n_add_index,
+ drop_index, n_drop_index,
+ drop_foreign, n_drop_foreign,
+ add_foreign, n_add_foreign,
+ !locked, heap, trx, indexed_table, col_map,
+ add_autoinc_col,
+ ha_alter_info->create_info->auto_increment_value,
+ autoinc_col_max_value,
+ add_cols);
+ DBUG_RETURN(false);
+ case DB_TABLESPACE_EXISTS:
+ my_error(ER_TABLESPACE_EXISTS, MYF(0), "(unknown)");
+ break;
+ case DB_DUPLICATE_KEY:
+ my_error(ER_DUP_KEY, MYF(0), "SYS_INDEXES");
+ break;
+ default:
+ my_error_innodb(error, table_name, user_table->flags);
}
- /* Read the clustered index of the table and build indexes
- based on this information using temporary files and merge sort. */
- error = row_merge_build_indexes(prebuilt->trx,
- prebuilt->table, indexed_table,
- index, num_of_idx, table);
+error_handled:
-error_handling:
-
- /* After an error, remove all those index definitions from the
- dictionary which were defined. */
+ user_trx->error_info = NULL;
+ trx->error_state = DB_SUCCESS;
if (!dict_locked) {
row_mysql_lock_data_dictionary(trx);
- dict_locked = TRUE;
}
- switch (error) {
- case DB_SUCCESS:
- ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
+ if (new_clustered) {
+ if (indexed_table != user_table) {
- *add = new ha_innobase_add_index(
- table, key_info, num_of_keys, indexed_table);
+ if (DICT_TF2_FLAG_IS_SET(indexed_table, DICT_TF2_FTS)) {
+ innobase_drop_fts_index_table(
+ indexed_table, trx);
+ }
- dict_table_close(prebuilt->table, dict_locked);
- break;
+ dict_table_close(indexed_table, TRUE, FALSE);
- case DB_TOO_BIG_RECORD:
- my_error(HA_ERR_TO_BIG_ROW, MYF(0));
- goto error_exit;
- case DB_PRIMARY_KEY_IS_NULL:
- my_error(ER_PRIMARY_CANT_HAVE_NULL, MYF(0));
- /* fall through */
- case DB_DUPLICATE_KEY:
- if (fts_add_doc_idx
- && prebuilt->trx->error_key_num == num_of_idx - 1) {
- prebuilt->trx->error_key_num = ULINT_UNDEFINED;
- }
-error_exit:
- prebuilt->trx->error_info = NULL;
- /* fall through */
- default:
- dict_table_close(prebuilt->table, dict_locked);
+#ifdef UNIV_DDL_DEBUG
+ /* Nobody should have initialized the stats of the
+ newly created table yet. When this is the case, we
+ know that it has not been added for background stats
+ gathering. */
+ ut_a(!indexed_table->stat_initialized);
+#endif /* UNIV_DDL_DEBUG */
- trx->error_state = DB_SUCCESS;
+ row_merge_drop_table(trx, indexed_table);
- if (new_primary) {
- if (indexed_table != prebuilt->table) {
- dict_table_close(indexed_table, dict_locked);
- row_merge_drop_table(trx, indexed_table);
+ /* Free the log for online table rebuild, if
+ one was allocated. */
+
+ dict_index_t* clust_index = dict_table_get_first_index(
+ user_table);
+
+ rw_lock_x_lock(&clust_index->lock);
+
+ if (clust_index->online_log) {
+ ut_ad(!locked);
+ row_log_abort_sec(clust_index);
+ clust_index->online_status
+ = ONLINE_INDEX_COMPLETE;
}
- } else {
- row_merge_drop_indexes(trx, indexed_table,
- index, num_created);
+
+ rw_lock_x_unlock(&clust_index->lock);
}
+
+ trx_commit_for_mysql(trx);
+ /* n_ref_count must be 1, because purge cannot
+ be executing on this very table as we are
+ holding dict_operation_lock X-latch. */
+ DBUG_ASSERT(user_table->n_ref_count == 1 || !locked);
+
+ online_retry_drop_indexes_with_trx(user_table, trx);
+ } else {
+ ut_ad(indexed_table == user_table);
+ row_merge_drop_indexes(trx, user_table, TRUE);
+ trx_commit_for_mysql(trx);
+ }
+
+ ut_d(dict_table_check_for_dup_indexes(user_table, CHECK_ALL_COMPLETE));
+ ut_ad(!user_table->drop_aborted);
+
+err_exit:
+ /* Clear the to_be_dropped flag in the data dictionary cache. */
+ for (ulint i = 0; i < n_drop_index; i++) {
+ DBUG_ASSERT(*drop_index[i]->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(drop_index[i]->to_be_dropped);
+ drop_index[i]->to_be_dropped = 0;
}
- ut_ad(!new_primary || prebuilt->table->n_ref_count == 1);
- trx_commit_for_mysql(trx);
- ut_ad(dict_locked);
row_mysql_unlock_data_dictionary(trx);
+
trx_free_for_mysql(trx);
mem_heap_free(heap);
- if (prebuilt->trx) {
- trx_commit_for_mysql(prebuilt->trx);
- }
+ trx_commit_for_mysql(user_trx);
/* There might be work for utility threads.*/
srv_active_wake_master_thread();
- DBUG_RETURN(convert_error_code_to_mysql(error, prebuilt->table->flags,
- user_thd));
+ DBUG_RETURN(true);
}
-/*******************************************************************//**
-Finalize or undo add_index().
-@return 0 or error number */
+/* Check whether an index is needed for the foreign key constraint.
+If so, if it is dropped, is there an equivalent index can play its role.
+@return true if the index is needed and can't be dropped */
+static __attribute__((warn_unused_result))
+bool
+innobase_check_foreign_key_index(
+/*=============================*/
+ Alter_inplace_info* ha_alter_info, /*!< in: Structure describing
+ changes to be done by ALTER
+ TABLE */
+ dict_index_t* index, /*!< in: index to check */
+ dict_table_t* indexed_table, /*!< in: table that owns the
+ foreign keys */
+ trx_t* trx, /*!< in/out: transaction */
+ dict_foreign_t** drop_fk, /*!< in: Foreign key constraints
+ to drop */
+ ulint n_drop_fk) /*!< in: Number of foreign keys
+ to drop */
+{
+ dict_foreign_t* foreign;
+
+ ut_ad(!index->to_be_dropped);
+
+ /* Check if the index is referenced. */
+ foreign = dict_table_get_referenced_constraint(indexed_table, index);
+
+ ut_ad(!foreign || indexed_table
+ == foreign->referenced_table);
+
+ if (foreign
+ && !dict_foreign_find_index(
+ indexed_table,
+ foreign->referenced_col_names,
+ foreign->n_fields, index,
+ /*check_charsets=*/TRUE,
+ /*check_null=*/FALSE)
+ && !innobase_find_equiv_index(
+ foreign->referenced_col_names,
+ foreign->n_fields,
+ ha_alter_info->key_info_buffer,
+ ha_alter_info->index_add_buffer,
+ ha_alter_info->index_add_count)
+ ) {
+ trx->error_info = index;
+ return(true);
+ }
+
+ /* Check if this index references some
+ other table */
+ foreign = dict_table_get_foreign_constraint(
+ indexed_table, index);
+
+ ut_ad(!foreign || indexed_table
+ == foreign->foreign_table);
+
+ if (foreign
+ && !innobase_dropping_foreign(
+ foreign, drop_fk, n_drop_fk)
+ && !dict_foreign_find_index(
+ indexed_table,
+ foreign->foreign_col_names,
+ foreign->n_fields, index,
+ /*check_charsets=*/TRUE,
+ /*check_null=*/FALSE)
+ && !innobase_find_equiv_index(
+ foreign->foreign_col_names,
+ foreign->n_fields,
+ ha_alter_info->key_info_buffer,
+ ha_alter_info->index_add_buffer,
+ ha_alter_info->index_add_count)
+ ) {
+ trx->error_info = index;
+ return(true);
+ }
+
+ return(false);
+}
+
+/** Allows InnoDB to update internal structures with concurrent
+writes blocked (provided that check_if_supported_inplace_alter()
+did not return HA_ALTER_INPLACE_NO_LOCK).
+This will be invoked before inplace_alter_table().
+
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+
+@retval true Failure
+@retval false Success
+*/
UNIV_INTERN
-int
-ha_innobase::final_add_index(
-/*=========================*/
- handler_add_index* add_arg,/*!< in: context from add_index() */
- bool commit) /*!< in: true=commit, false=rollback */
+bool
+ha_innobase::prepare_inplace_alter_table(
+/*=====================================*/
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info)
{
- ha_innobase_add_index* add;
- trx_t* trx;
- int err = 0;
+ dict_index_t** drop_index; /*!< Index to be dropped */
+ ulint n_drop_index; /*!< Number of indexes to drop */
+ dict_foreign_t**drop_fk; /*!< Foreign key constraints to drop */
+ ulint n_drop_fk; /*!< Number of foreign keys to drop */
+ dict_foreign_t**add_fk = NULL; /*!< Foreign key constraints to drop */
+ ulint n_add_fk; /*!< Number of foreign keys to drop */
+ dict_table_t* indexed_table; /*!< Table where indexes are created */
+ mem_heap_t* heap;
+ int error;
+ ulint flags;
+ ulint flags2;
+ ulint max_col_len;
+ ulint add_autoinc_col_no = ULINT_UNDEFINED;
+ ulonglong autoinc_col_max_value = 0;
+ ulint fts_doc_col_no = ULINT_UNDEFINED;
+ bool add_fts_doc_id = false;
+ bool add_fts_doc_id_idx = false;
+
+ DBUG_ENTER("prepare_inplace_alter_table");
+ DBUG_ASSERT(!ha_alter_info->handler_ctx);
+ DBUG_ASSERT(ha_alter_info->create_info);
+
+ if (srv_read_only_mode) {
+ DBUG_RETURN(false);
+ }
- DBUG_ENTER("ha_innobase::final_add_index");
+ MONITOR_ATOMIC_INC(MONITOR_PENDING_ALTER_TABLE);
- ut_ad(add_arg);
- add = static_cast<class ha_innobase_add_index*>(add_arg);
+#ifdef UNIV_DEBUG
+ for (dict_index_t* index = dict_table_get_first_index(prebuilt->table);
+ index;
+ index = dict_table_get_next_index(index)) {
+ ut_ad(!index->to_be_dropped);
+ }
+#endif /* UNIV_DEBUG */
- /* Create a background transaction for the operations on
- the data dictionary tables. */
- trx = innobase_trx_allocate(user_thd);
- trx_start_if_not_started(trx);
+ ut_d(mutex_enter(&dict_sys->mutex));
+ ut_d(dict_table_check_for_dup_indexes(
+ prebuilt->table, CHECK_ABORTED_OK));
+ ut_d(mutex_exit(&dict_sys->mutex));
- /* Flag this transaction as a dictionary operation, so that
- the data dictionary will be locked in crash recovery. */
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+ if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
+ /* Nothing to do */
+ goto func_exit;
+ }
- /* Latch the InnoDB data dictionary exclusively so that no deadlocks
- or lock waits can happen in it during an index create operation. */
- row_mysql_lock_data_dictionary(trx);
+ if (ha_alter_info->handler_flags
+ == Alter_inplace_info::CHANGE_CREATE_OPTION
+ && !innobase_need_rebuild(ha_alter_info)) {
+ goto func_exit;
+ }
- if (add->indexed_table != prebuilt->table) {
- ulint error;
+ if (ha_alter_info->handler_flags
+ & Alter_inplace_info::CHANGE_CREATE_OPTION) {
+ if (const char* invalid_opt = create_options_are_invalid(
+ user_thd, altered_table,
+ ha_alter_info->create_info,
+ prebuilt->table->space != 0)) {
+ my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
+ table_type(), invalid_opt);
+ goto err_exit_no_heap;
+ }
+ }
- /* We copied the table (new_primary). */
- if (commit) {
- mem_heap_t* heap;
- char* tmp_name;
+ /* Check if any index name is reserved. */
+ if (innobase_index_name_is_reserved(
+ user_thd,
+ ha_alter_info->key_info_buffer,
+ ha_alter_info->key_count)) {
+err_exit_no_heap:
+ DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0);
+ if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
+ online_retry_drop_indexes(prebuilt->table, user_thd);
+ }
+ DBUG_RETURN(true);
+ }
- heap = mem_heap_create(1024);
+ indexed_table = prebuilt->table;
- /* A new primary key was defined for the table
- and there was no error at this point. We can
- now rename the old table as a temporary table,
- rename the new temporary table as the old
- table and drop the old table. */
- tmp_name = innobase_create_temporary_tablename(
- heap, '2', prebuilt->table->name);
+ /* Check that index keys are sensible */
+ error = innobase_check_index_keys(ha_alter_info, indexed_table);
- error = row_merge_rename_tables(
- prebuilt->table, add->indexed_table,
- tmp_name, trx);
+ if (error) {
+ goto err_exit_no_heap;
+ }
- ut_a(prebuilt->table->n_ref_count == 1);
+ /* Prohibit renaming a column to something that the table
+ already contains. */
+ if (ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NAME) {
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
- switch (error) {
- case DB_TABLESPACE_ALREADY_EXISTS:
- case DB_DUPLICATE_KEY:
- ut_a(add->indexed_table->n_ref_count == 0);
- innobase_convert_tablename(tmp_name);
- my_error(HA_ERR_TABLE_EXIST, MYF(0), tmp_name);
- err = HA_ERR_TABLE_EXIST;
- break;
- default:
- err = convert_error_code_to_mysql(
- error, prebuilt->table->flags,
- user_thd);
- break;
+ for (Field** fp = table->field; *fp; fp++) {
+ if (!((*fp)->flags & FIELD_IS_RENAMED)) {
+ continue;
}
- mem_heap_free(heap);
+ const char* name = 0;
+
+ cf_it.rewind();
+ while (Create_field* cf = cf_it++) {
+ if (cf->field == *fp) {
+ name = cf->field_name;
+ goto check_if_ok_to_rename;
+ }
+ }
+
+ ut_error;
+check_if_ok_to_rename:
+ /* Prohibit renaming a column from FTS_DOC_ID
+ if full-text indexes exist. */
+ if (!my_strcasecmp(system_charset_info,
+ (*fp)->field_name,
+ FTS_DOC_ID_COL_NAME)
+ && innobase_fulltext_exist(altered_table->s)) {
+ my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN,
+ MYF(0), name);
+ goto err_exit_no_heap;
+ }
+
+ /* Prohibit renaming a column to an internal column. */
+ const char* s = prebuilt->table->col_names;
+ unsigned j;
+ /* Skip user columns.
+ MySQL should have checked these already.
+ We want to allow renaming of c1 to c2, c2 to c1. */
+ for (j = 0; j < table->s->fields; j++) {
+ s += strlen(s) + 1;
+ }
+
+ for (; j < prebuilt->table->n_def; j++) {
+ if (!my_strcasecmp(
+ system_charset_info, name, s)) {
+ my_error(ER_WRONG_COLUMN_NAME, MYF(0),
+ s);
+ goto err_exit_no_heap;
+ }
+
+ s += strlen(s) + 1;
+ }
}
+ }
- if (!commit || err) {
- dict_table_close(add->indexed_table, TRUE);
- error = row_merge_drop_table(trx, add->indexed_table);
- trx_commit_for_mysql(prebuilt->trx);
- } else {
- dict_table_t* old_table = prebuilt->table;
- trx_commit_for_mysql(prebuilt->trx);
- row_prebuilt_free(prebuilt, TRUE);
- error = row_merge_drop_table(trx, old_table);
- prebuilt = row_create_prebuilt(add->indexed_table,
- 0 /* XXX Do we know the mysql_row_len here?
- Before the addition of this parameter to
- row_create_prebuilt() the mysql_row_len
- member was left 0 (from zalloc) in the
- prebuilt object. */);
+ if (!innobase_table_flags(altered_table,
+ ha_alter_info->create_info,
+ user_thd,
+ srv_file_per_table
+ || indexed_table->space != 0,
+ &flags, &flags2)) {
+ goto err_exit_no_heap;
+ }
+
+ max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags);
+
+ /* Check each index's column length to make sure they do not
+ exceed limit */
+ for (ulint i = 0; i < ha_alter_info->index_add_count; i++) {
+ const KEY* key = &ha_alter_info->key_info_buffer[
+ ha_alter_info->index_add_buffer[i]];
+
+ if (key->flags & HA_FULLTEXT) {
+ /* The column length does not matter for
+ fulltext search indexes. But, UNIQUE
+ fulltext indexes are not supported. */
+ DBUG_ASSERT(!(key->flags & HA_NOSAME));
+ DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
+ & ~(HA_FULLTEXT
+ | HA_PACK_KEY
+ | HA_BINARY_PACK_KEY)));
+ continue;
}
- err = convert_error_code_to_mysql(
- error, prebuilt->table->flags, user_thd);
+ if (innobase_check_column_length(max_col_len, key)) {
+ my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
+ max_col_len);
+ goto err_exit_no_heap;
+ }
}
- if (add->indexed_table == prebuilt->table
- || DICT_TF2_FLAG_IS_SET(prebuilt->table, DICT_TF2_FTS_ADD_DOC_ID)) {
- /* We created secondary indexes (!new_primary) or create full
- text index and added a new Doc ID column, we will need to
- rename the secondary index on the Doc ID column to its
- official index name.. */
+ /* Check existing index definitions for too-long column
+ prefixes as well, in case max_col_len shrunk. */
+ for (const dict_index_t* index
+ = dict_table_get_first_index(indexed_table);
+ index;
+ index = dict_table_get_next_index(index)) {
+ if (index->type & DICT_FTS) {
+ DBUG_ASSERT(index->type == DICT_FTS
+ || (index->type & DICT_CORRUPT));
+ continue;
+ }
- if (commit) {
- err = convert_error_code_to_mysql(
- row_merge_rename_indexes(trx, prebuilt->table),
- prebuilt->table->flags, user_thd);
+ for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
+ const dict_field_t* field
+ = dict_index_get_nth_field(index, i);
+ if (field->prefix_len > max_col_len) {
+ my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
+ max_col_len);
+ goto err_exit_no_heap;
+ }
}
+ }
+
+ n_drop_index = 0;
+ n_drop_fk = 0;
+
+ if (ha_alter_info->handler_flags
+ & Alter_inplace_info::DROP_FOREIGN_KEY) {
+ DBUG_ASSERT(ha_alter_info->alter_info->drop_list.elements > 0);
- if (!commit || err) {
- dict_index_t* index;
- dict_index_t* next_index;
+ heap = mem_heap_create(1024);
- for (index = dict_table_get_first_index(
- prebuilt->table);
- index; index = next_index) {
+ drop_fk = static_cast<dict_foreign_t**>(
+ mem_heap_alloc(
+ heap,
+ ha_alter_info->alter_info->drop_list.elements
+ * sizeof(dict_foreign_t*)));
- next_index = dict_table_get_next_index(index);
+ List_iterator<Alter_drop> drop_it(
+ ha_alter_info->alter_info->drop_list);
- if (*index->name == TEMP_INDEX_PREFIX) {
- row_merge_drop_index(
- index, prebuilt->table, trx);
+ while (Alter_drop* drop = drop_it++) {
+ if (drop->type != Alter_drop::FOREIGN_KEY) {
+ continue;
+ }
+
+ for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+ prebuilt->table->foreign_list);
+ foreign != NULL;
+ foreign = UT_LIST_GET_NEXT(
+ foreign_list, foreign)) {
+ const char* fid = strchr(foreign->id, '/');
+
+ DBUG_ASSERT(fid);
+ /* If no database/ prefix was present in
+ the FOREIGN KEY constraint name, compare
+ to the full constraint name. */
+ fid = fid ? fid + 1 : foreign->id;
+
+ if (!my_strcasecmp(system_charset_info,
+ fid, drop->name)) {
+ drop_fk[n_drop_fk++] = foreign;
+ goto found_fk;
}
}
+
+ my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0),
+ drop->name);
+ goto err_exit;
+found_fk:
+ continue;
}
- DICT_TF2_FLAG_UNSET(prebuilt->table, DICT_TF2_FTS_ADD_DOC_ID);
+ DBUG_ASSERT(n_drop_fk > 0);
+ DBUG_ASSERT(n_drop_fk
+ == ha_alter_info->alter_info->drop_list.elements);
+ } else {
+ drop_fk = NULL;
+ heap = NULL;
}
- /* If index is successfully built, we will need to rebuild index
- translation table. Set valid index entry count in the translation
- table to zero. */
- if (err == 0 && commit) {
- ibool new_primary;
- dict_index_t* index;
- dict_index_t* next_index;
- ibool new_fts = FALSE;
- dict_index_t* primary;
+ if (ha_alter_info->index_drop_count) {
+ dict_index_t* drop_primary = NULL;
- new_primary = !my_strcasecmp(
- system_charset_info, add->key_info[0].name, "PRIMARY");
-
- primary = dict_table_get_first_index(add->indexed_table);
-
- if (!new_primary) {
- new_primary = !my_strcasecmp(
- system_charset_info, add->key_info[0].name,
- primary->name);
+ DBUG_ASSERT(ha_alter_info->handler_flags
+ & (Alter_inplace_info::DROP_INDEX
+ | Alter_inplace_info::DROP_UNIQUE_INDEX
+ | Alter_inplace_info::DROP_PK_INDEX));
+ /* Check which indexes to drop. */
+ if (!heap) {
+ heap = mem_heap_create(1024);
+ }
+ drop_index = static_cast<dict_index_t**>(
+ mem_heap_alloc(
+ heap, (ha_alter_info->index_drop_count + 1)
+ * sizeof *drop_index));
+
+ for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
+ const KEY* key
+ = ha_alter_info->index_drop_buffer[i];
+ dict_index_t* index
+ = dict_table_get_index_on_name_and_min_id(
+ indexed_table, key->name);
+
+ if (!index) {
+ push_warning_printf(
+ user_thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_WRONG_INDEX,
+ "InnoDB could not find key "
+ "with name %s", key->name);
+ } else {
+ ut_ad(!index->to_be_dropped);
+ if (!dict_index_is_clust(index)) {
+ drop_index[n_drop_index++] = index;
+ } else {
+ drop_primary = index;
+ }
+ }
}
- share->idx_trans_tbl.index_count = 0;
+ /* If all FULLTEXT indexes were removed, drop an
+ internal FTS_DOC_ID_INDEX as well, unless it exists in
+ the table. */
+
+ if (innobase_fulltext_exist(table->s)
+ && !innobase_fulltext_exist(altered_table->s)
+ && !DICT_TF2_FLAG_IS_SET(
+ indexed_table, DICT_TF2_FTS_HAS_DOC_ID)) {
+ dict_index_t* fts_doc_index
+ = dict_table_get_index_on_name(
+ indexed_table, FTS_DOC_ID_INDEX_NAME);
+
+ // Add some fault tolerance for non-debug builds.
+ if (fts_doc_index == NULL) {
+ goto check_if_can_drop_indexes;
+ }
- if (new_primary) {
- for (index = primary; index; index = next_index) {
+ DBUG_ASSERT(!fts_doc_index->to_be_dropped);
+
+ for (uint i = 0; i < table->s->keys; i++) {
+ if (!my_strcasecmp(
+ system_charset_info,
+ FTS_DOC_ID_INDEX_NAME,
+ table->s->key_info[i].name)) {
+ /* The index exists in the MySQL
+ data dictionary. Do not drop it,
+ even though it is no longer needed
+ by InnoDB fulltext search. */
+ goto check_if_can_drop_indexes;
+ }
+ }
- next_index = dict_table_get_next_index(index);
+ drop_index[n_drop_index++] = fts_doc_index;
+ }
- if (index->type & DICT_FTS) {
- fts_add_index(index,
- add->indexed_table);
- new_fts = TRUE;
+check_if_can_drop_indexes:
+ /* Check if the indexes can be dropped. */
+
+ /* Prevent a race condition between DROP INDEX and
+ CREATE TABLE adding FOREIGN KEY constraints. */
+ row_mysql_lock_data_dictionary(prebuilt->trx);
+
+ if (prebuilt->trx->check_foreigns) {
+ for (uint i = 0; i < n_drop_index; i++) {
+ dict_index_t* index = drop_index[i];
+
+ if (innobase_check_foreign_key_index(
+ ha_alter_info, index, indexed_table,
+ prebuilt->trx, drop_fk, n_drop_fk)) {
+ row_mysql_unlock_data_dictionary(
+ prebuilt->trx);
+ prebuilt->trx->error_info = index;
+ print_error(HA_ERR_DROP_INDEX_FK,
+ MYF(0));
+ goto err_exit;
}
}
+
+ /* If a primary index is dropped, need to check
+ any depending foreign constraints get affected */
+ if (drop_primary
+ && innobase_check_foreign_key_index(
+ ha_alter_info, drop_primary, indexed_table,
+ prebuilt->trx, drop_fk, n_drop_fk)) {
+ row_mysql_unlock_data_dictionary(prebuilt->trx);
+ print_error(HA_ERR_DROP_INDEX_FK, MYF(0));
+ goto err_exit;
+ }
+ }
+
+ if (!n_drop_index) {
+ drop_index = NULL;
} else {
- ulint i;
- for (i = 0; i < add->num_of_keys; i++) {
- if (add->key_info[i].flags & HA_FULLTEXT) {
- dict_index_t* fts_index;
-
- fts_index =
- dict_table_get_index_on_name(
- prebuilt->table,
- add->key_info[i].name);
-
- ut_ad(fts_index);
- fts_add_index(fts_index,
- prebuilt->table);
- new_fts = TRUE;
+ /* Flag all indexes that are to be dropped. */
+ for (ulint i = 0; i < n_drop_index; i++) {
+ ut_ad(!drop_index[i]->to_be_dropped);
+ drop_index[i]->to_be_dropped = 1;
+ }
+ }
+
+ row_mysql_unlock_data_dictionary(prebuilt->trx);
+ } else {
+ drop_index = NULL;
+ }
+
+ n_add_fk = 0;
+
+ if (ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_FOREIGN_KEY) {
+ ut_ad(!prebuilt->trx->check_foreigns);
+
+ if (!heap) {
+ heap = mem_heap_create(1024);
+ }
+
+ add_fk = static_cast<dict_foreign_t**>(
+ mem_heap_zalloc(
+ heap,
+ ha_alter_info->alter_info->key_list.elements
+ * sizeof(dict_foreign_t*)));
+
+ if (!innobase_get_foreign_key_info(
+ ha_alter_info, table_share, prebuilt->table,
+ add_fk, &n_add_fk, heap, prebuilt->trx)) {
+err_exit:
+ if (n_drop_index) {
+ row_mysql_lock_data_dictionary(prebuilt->trx);
+
+ /* Clear the to_be_dropped flags, which might
+ have been set at this point. */
+ for (ulint i = 0; i < n_drop_index; i++) {
+ DBUG_ASSERT(*drop_index[i]->name
+ != TEMP_INDEX_PREFIX);
+ drop_index[i]->to_be_dropped = 0;
}
+
+ row_mysql_unlock_data_dictionary(prebuilt->trx);
}
+
+ if (heap) {
+ mem_heap_free(heap);
+ }
+ goto err_exit_no_heap;
}
+ }
- if (new_fts) {
- fts_optimize_add_table(prebuilt->table);
+ if (!(ha_alter_info->handler_flags & INNOBASE_INPLACE_CREATE)) {
+ if (heap) {
+ ha_alter_info->handler_ctx
+ = new ha_innobase_inplace_ctx(
+ prebuilt->trx, 0, 0, 0,
+ drop_index, n_drop_index,
+ drop_fk, n_drop_fk,
+ add_fk, n_add_fk,
+ ha_alter_info->online,
+ heap, 0, indexed_table, 0,
+ ULINT_UNDEFINED, 0, 0, 0);
}
+
+func_exit:
+ DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0);
+ if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
+ online_retry_drop_indexes(prebuilt->table, user_thd);
+ }
+ DBUG_RETURN(false);
}
- trx_commit_for_mysql(trx);
- if (prebuilt->trx) {
- trx_commit_for_mysql(prebuilt->trx);
+ /* If we are to build a full-text search index, check whether
+ the table already has a DOC ID column. If not, we will need to
+ add a Doc ID hidden column and rebuild the primary index */
+ if (innobase_fulltext_exist(altered_table->s)) {
+ ulint doc_col_no;
+
+ if (!innobase_fts_check_doc_id_col(
+ prebuilt->table, altered_table, &fts_doc_col_no)) {
+ fts_doc_col_no = altered_table->s->fields;
+ add_fts_doc_id = true;
+ add_fts_doc_id_idx = true;
+
+ push_warning_printf(
+ user_thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ HA_ERR_WRONG_INDEX,
+ "InnoDB rebuilding table to add column "
+ FTS_DOC_ID_COL_NAME);
+ } else if (fts_doc_col_no == ULINT_UNDEFINED) {
+ goto err_exit;
+ }
+
+ switch (innobase_fts_check_doc_id_index(
+ prebuilt->table, altered_table, &doc_col_no)) {
+ case FTS_NOT_EXIST_DOC_ID_INDEX:
+ add_fts_doc_id_idx = true;
+ break;
+ case FTS_INCORRECT_DOC_ID_INDEX:
+ my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
+ FTS_DOC_ID_INDEX_NAME);
+ goto err_exit;
+ case FTS_EXIST_DOC_ID_INDEX:
+ DBUG_ASSERT(doc_col_no == fts_doc_col_no
+ || doc_col_no == ULINT_UNDEFINED
+ || (ha_alter_info->handler_flags
+ & (Alter_inplace_info::ALTER_COLUMN_ORDER
+ | Alter_inplace_info::DROP_COLUMN
+ | Alter_inplace_info::ADD_COLUMN)));
+ }
}
- ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
+ /* See if an AUTO_INCREMENT column was added. */
+ uint i = 0;
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
+ while (const Create_field* new_field = cf_it++) {
+ const Field* field;
- ut_a(fts_check_cached_index(prebuilt->table));
+ DBUG_ASSERT(i < altered_table->s->fields);
- row_mysql_unlock_data_dictionary(trx);
+ for (uint old_i = 0; table->field[old_i]; old_i++) {
+ if (new_field->field == table->field[old_i]) {
+ goto found_col;
+ }
+ }
- trx_free_for_mysql(trx);
+ /* This is an added column. */
+ DBUG_ASSERT(!new_field->field);
+ DBUG_ASSERT(ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_COLUMN);
- /* There might be work for utility threads.*/
- srv_active_wake_master_thread();
+ field = altered_table->field[i];
- delete add;
- DBUG_RETURN(err);
+ DBUG_ASSERT((MTYP_TYPENR(field->unireg_check)
+ == Field::NEXT_NUMBER)
+ == !!(field->flags & AUTO_INCREMENT_FLAG));
+
+ if (field->flags & AUTO_INCREMENT_FLAG) {
+ if (add_autoinc_col_no != ULINT_UNDEFINED) {
+ /* This should have been blocked earlier. */
+ ut_ad(0);
+ my_error(ER_WRONG_AUTO_KEY, MYF(0));
+ goto err_exit;
+ }
+ add_autoinc_col_no = i;
+
+ autoinc_col_max_value = innobase_get_int_col_max_value(
+ field);
+ }
+found_col:
+ i++;
+ }
+
+ DBUG_ASSERT(user_thd == prebuilt->trx->mysql_thd);
+ DBUG_RETURN(prepare_inplace_alter_table_dict(
+ ha_alter_info, altered_table, table,
+ prebuilt->table, prebuilt->trx,
+ table_share->table_name.str,
+ flags, flags2,
+ heap, drop_index, n_drop_index,
+ drop_fk, n_drop_fk, add_fk, n_add_fk,
+ fts_doc_col_no, add_autoinc_col_no,
+ autoinc_col_max_value, add_fts_doc_id,
+ add_fts_doc_id_idx));
}
-/*******************************************************************//**
-Prepare to drop some indexes of a table.
-@return 0 or error number */
+
+/** Alter the table structure in-place with operations
+specified using Alter_inplace_info.
+The level of concurrency allowed during this operation depends
+on the return value from check_if_supported_inplace_alter().
+
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+
+@retval true Failure
+@retval false Success
+*/
UNIV_INTERN
-int
-ha_innobase::prepare_drop_index(
-/*============================*/
- TABLE* in_table, /*!< in: Table where indexes are dropped */
- uint* key_num, /*!< in: Key nums to be dropped */
- uint num_of_keys) /*!< in: Number of keys to be dropped */
+bool
+ha_innobase::inplace_alter_table(
+/*=============================*/
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info)
{
- trx_t* trx;
- int err = 0;
- uint n_key;
+ dberr_t error;
- DBUG_ENTER("ha_innobase::prepare_drop_index");
- ut_ad(table);
- ut_ad(key_num);
- ut_ad(num_of_keys);
- if (srv_created_new_raw || srv_force_recovery) {
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+ DBUG_ENTER("inplace_alter_table");
+
+ if (srv_read_only_mode) {
+ DBUG_RETURN(false);
}
- update_thd();
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+ ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
- trx_search_latch_release_if_reserved(prebuilt->trx);
- trx = prebuilt->trx;
+ DEBUG_SYNC(user_thd, "innodb_inplace_alter_table_enter");
- /* Test and mark all the indexes to be dropped */
+ if (!(ha_alter_info->handler_flags & INNOBASE_INPLACE_CREATE)) {
+ok_exit:
+ DEBUG_SYNC(user_thd, "innodb_after_inplace_alter_table");
+ DBUG_RETURN(false);
+ }
- row_mysql_lock_data_dictionary(trx);
- ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
+ if (ha_alter_info->handler_flags
+ == Alter_inplace_info::CHANGE_CREATE_OPTION
+ && !innobase_need_rebuild(ha_alter_info)) {
+ goto ok_exit;
+ }
- /* Check that none of the indexes have previously been flagged
- for deletion. */
- {
- const dict_index_t* index
- = dict_table_get_first_index(prebuilt->table);
- do {
- ut_a(!index->to_be_dropped);
- index = dict_table_get_next_index(index);
- } while (index);
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>
+ (ha_alter_info->handler_ctx);
+
+ DBUG_ASSERT(ctx);
+ DBUG_ASSERT(ctx->trx);
+
+ if (prebuilt->table->ibd_file_missing
+ || dict_table_is_discarded(prebuilt->table)) {
+ goto all_done;
+ }
+
+ /* Read the clustered index of the table and build
+ indexes based on this information using temporary
+ files and merge sort. */
+ DBUG_EXECUTE_IF("innodb_OOM_inplace_alter",
+ error = DB_OUT_OF_MEMORY; goto oom;);
+ error = row_merge_build_indexes(
+ prebuilt->trx,
+ prebuilt->table, ctx->indexed_table,
+ ctx->online,
+ ctx->add, ctx->add_key_numbers, ctx->num_to_add,
+ altered_table, ctx->add_cols, ctx->col_map,
+ ctx->add_autoinc, ctx->sequence);
+#ifndef DBUG_OFF
+oom:
+#endif /* !DBUG_OFF */
+ if (error == DB_SUCCESS && ctx->online
+ && ctx->indexed_table != prebuilt->table) {
+ DEBUG_SYNC_C("row_log_table_apply1_before");
+ error = row_log_table_apply(
+ ctx->thr, prebuilt->table, altered_table);
}
- for (n_key = 0; n_key < num_of_keys; n_key++) {
- const KEY* key;
- dict_index_t* index;
+ DEBUG_SYNC_C("inplace_after_index_build");
- key = table->key_info + key_num[n_key];
- index = dict_table_get_index_on_name_and_min_id(
- prebuilt->table, key->name);
+ DBUG_EXECUTE_IF("create_index_fail",
+ error = DB_DUPLICATE_KEY;);
- if (!index) {
- sql_print_error("InnoDB could not find key n:o %u "
- "with name %s for table %s",
- key_num[n_key],
- key ? key->name : "NULL",
- prebuilt->table->name);
+ /* After an error, remove all those index definitions
+ from the dictionary which were defined. */
- err = HA_ERR_KEY_NOT_FOUND;
- goto func_exit;
+ switch (error) {
+ KEY* dup_key;
+ all_done:
+ case DB_SUCCESS:
+ ut_d(mutex_enter(&dict_sys->mutex));
+ ut_d(dict_table_check_for_dup_indexes(
+ prebuilt->table, CHECK_PARTIAL_OK));
+ ut_d(mutex_exit(&dict_sys->mutex));
+ /* prebuilt->table->n_ref_count can be anything here,
+ given that we hold at most a shared lock on the table. */
+ goto ok_exit;
+ case DB_DUPLICATE_KEY:
+ if (prebuilt->trx->error_key_num == ULINT_UNDEFINED
+ || ha_alter_info->key_count == 0) {
+ /* This should be the hidden index on
+ FTS_DOC_ID, or there is no PRIMARY KEY in the
+ table. Either way, we should be seeing and
+ reporting a bogus duplicate key error. */
+ dup_key = NULL;
+ } else {
+ DBUG_ASSERT(prebuilt->trx->error_key_num
+ < ha_alter_info->key_count);
+ dup_key = &ha_alter_info->key_info_buffer[
+ prebuilt->trx->error_key_num];
}
+ print_keydup_error(altered_table, dup_key, MYF(0));
+ break;
+ case DB_ONLINE_LOG_TOO_BIG:
+ DBUG_ASSERT(ctx->online);
+ my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
+ (prebuilt->trx->error_key_num == ULINT_UNDEFINED)
+ ? FTS_DOC_ID_INDEX_NAME
+ : ha_alter_info->key_info_buffer[
+ prebuilt->trx->error_key_num].name);
+ break;
+ case DB_INDEX_CORRUPT:
+ my_error(ER_INDEX_CORRUPT, MYF(0),
+ (prebuilt->trx->error_key_num == ULINT_UNDEFINED)
+ ? FTS_DOC_ID_INDEX_NAME
+ : ha_alter_info->key_info_buffer[
+ prebuilt->trx->error_key_num].name);
+ break;
+ default:
+ my_error_innodb(error,
+ table_share->table_name.str,
+ prebuilt->table->flags);
+ }
- /* Refuse to drop the clustered index. It would be
- better to automatically generate a clustered index,
- but mysql_alter_table() will call this method only
- after ha_innobase::add_index(). */
+ /* prebuilt->table->n_ref_count can be anything here, given
+ that we hold at most a shared lock on the table. */
+ prebuilt->trx->error_info = NULL;
+ ctx->trx->error_state = DB_SUCCESS;
- if (dict_index_is_clust(index)) {
- my_error(ER_REQUIRES_PRIMARY_KEY, MYF(0));
- err = -1;
- goto func_exit;
- }
+ DBUG_RETURN(true);
+}
- rw_lock_x_lock(dict_index_get_lock(index));
- index->to_be_dropped = TRUE;
- rw_lock_x_unlock(dict_index_get_lock(index));
+/** Free the modification log for online table rebuild.
+@param table table that was being rebuilt online */
+static
+void
+innobase_online_rebuild_log_free(
+/*=============================*/
+ dict_table_t* table)
+{
+ dict_index_t* clust_index = dict_table_get_first_index(table);
+
+ ut_ad(mutex_own(&dict_sys->mutex));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ rw_lock_x_lock(&clust_index->lock);
+
+ if (clust_index->online_log) {
+ ut_ad(dict_index_get_online_status(clust_index)
+ == ONLINE_INDEX_CREATION);
+ clust_index->online_status = ONLINE_INDEX_COMPLETE;
+ row_log_free(clust_index->online_log);
+ DEBUG_SYNC_C("innodb_online_rebuild_log_free_aborted");
+ }
+
+ DBUG_ASSERT(dict_index_get_online_status(clust_index)
+ == ONLINE_INDEX_COMPLETE);
+ rw_lock_x_unlock(&clust_index->lock);
+}
+
+/** Rollback a secondary index creation, drop the indexes with
+temparary index prefix
+@param prebuilt the prebuilt struct
+@param table_share the TABLE_SHARE
+@param trx the transaction
+*/
+static
+void
+innobase_rollback_sec_index(
+/*========================*/
+ row_prebuilt_t* prebuilt,
+ const TABLE_SHARE* table_share,
+ trx_t* trx)
+{
+ row_merge_drop_indexes(trx, prebuilt->table, FALSE);
+
+ /* Free the table->fts only if there is no FTS_DOC_ID
+ in the table */
+ if (prebuilt->table->fts
+ && !DICT_TF2_FLAG_IS_SET(prebuilt->table,
+ DICT_TF2_FTS_HAS_DOC_ID)
+ && !innobase_fulltext_exist(table_share)) {
+ fts_free(prebuilt->table);
}
+}
- /* If FOREIGN_KEY_CHECKS = 1 you may not drop an index defined
- for a foreign key constraint because InnoDB requires that both
- tables contain indexes for the constraint. Such index can
- be dropped only if FOREIGN_KEY_CHECKS is set to 0.
- Note that CREATE INDEX id ON table does a CREATE INDEX and
- DROP INDEX, and we can ignore here foreign keys because a
- new index for the foreign key has already been created.
+/** Roll back the changes made during prepare_inplace_alter_table()
+and inplace_alter_table() inside the storage engine. Note that the
+allowed level of concurrency during this operation will be the same as
+for inplace_alter_table() and thus might be higher than during
+prepare_inplace_alter_table(). (E.g concurrent writes were blocked
+during prepare, but might not be during commit).
+
+@param ha_alter_info Data used during in-place alter.
+@param table_share the TABLE_SHARE
+@param prebuilt the prebuilt struct
+@retval true Failure
+@retval false Success
+*/
+inline
+bool
+rollback_inplace_alter_table(
+/*=========================*/
+ Alter_inplace_info* ha_alter_info,
+ const TABLE_SHARE* table_share,
+ row_prebuilt_t* prebuilt)
+{
+ bool fail = false;
- We check for the foreign key constraints after marking the
- candidate indexes for deletion, because when we check for an
- equivalent foreign index we don't want to select an index that
- is later deleted. */
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>
+ (ha_alter_info->handler_ctx);
- if (trx->check_foreigns
- && thd_sql_command(user_thd) != SQLCOM_CREATE_INDEX) {
- dict_index_t* index;
+ DBUG_ENTER("rollback_inplace_alter_table");
- for (index = dict_table_get_first_index(prebuilt->table);
- index;
- index = dict_table_get_next_index(index)) {
- dict_foreign_t* foreign;
+ if (!ctx || !ctx->trx) {
+ /* If we have not started a transaction yet,
+ (almost) nothing has been or needs to be done. */
+ goto func_exit;
+ }
- if (!index->to_be_dropped) {
+ row_mysql_lock_data_dictionary(ctx->trx);
- continue;
+ if (prebuilt->table != ctx->indexed_table) {
+ dberr_t err;
+ ulint flags = ctx->indexed_table->flags;
+
+ /* DML threads can access ctx->indexed_table via the
+ online rebuild log. Free it first. */
+ innobase_online_rebuild_log_free(prebuilt->table);
+
+ /* Since the FTS index specific auxiliary tables has
+ not yet registered with "table->fts" by fts_add_index(),
+ we will need explicitly delete them here */
+ if (DICT_TF2_FLAG_IS_SET(ctx->indexed_table, DICT_TF2_FTS)) {
+
+ err = innobase_drop_fts_index_table(
+ ctx->indexed_table, ctx->trx);
+
+ if (err != DB_SUCCESS) {
+ my_error_innodb(
+ err, table_share->table_name.str,
+ flags);
+ fail = true;
}
+ }
- /* Check if the index is referenced. */
- foreign = dict_table_get_referenced_constraint(
- prebuilt->table, index);
+ /* Drop the table. */
+ dict_table_close(ctx->indexed_table, TRUE, FALSE);
- if (foreign) {
-index_needed:
- trx_set_detailed_error(
- trx,
- "Index needed in foreign key "
- "constraint");
+#ifdef UNIV_DDL_DEBUG
+ /* Nobody should have initialized the stats of the
+ newly created table yet. When this is the case, we
+ know that it has not been added for background stats
+ gathering. */
+ ut_a(!ctx->indexed_table->stat_initialized);
+#endif /* UNIV_DDL_DEBUG */
- trx->error_info = index;
+ err = row_merge_drop_table(ctx->trx, ctx->indexed_table);
- err = HA_ERR_DROP_INDEX_FK;
- break;
- } else {
- /* Check if this index references some
- other table */
- foreign = dict_table_get_foreign_constraint(
- prebuilt->table, index);
+ switch (err) {
+ case DB_SUCCESS:
+ break;
+ default:
+ my_error_innodb(err, table_share->table_name.str,
+ flags);
+ fail = true;
+ }
+ } else {
+ DBUG_ASSERT(!(ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_PK_INDEX));
- if (foreign) {
- ut_a(foreign->foreign_index == index);
+ trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
- /* Search for an equivalent index that
- the foreign key constraint could use
- if this index were to be deleted. */
- if (!dict_foreign_find_equiv_index(
- foreign)) {
+ innobase_rollback_sec_index(prebuilt, table_share, ctx->trx);
+ }
- goto index_needed;
- }
- }
+ trx_commit_for_mysql(ctx->trx);
+ row_mysql_unlock_data_dictionary(ctx->trx);
+ trx_free_for_mysql(ctx->trx);
+
+
+func_exit:
+#ifndef DBUG_OFF
+ dict_index_t* clust_index = dict_table_get_first_index(
+ prebuilt->table);
+ DBUG_ASSERT(!clust_index->online_log);
+ DBUG_ASSERT(dict_index_get_online_status(clust_index)
+ == ONLINE_INDEX_COMPLETE);
+#endif /* !DBUG_OFF */
+
+ if (ctx) {
+ if (ctx->num_to_add_fk) {
+ for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
+ dict_foreign_free(ctx->add_fk[i]);
}
}
- } else if (thd_sql_command(user_thd) == SQLCOM_CREATE_INDEX) {
- /* This is a drop of a foreign key constraint index that
- was created by MySQL when the constraint was added. MySQL
- does this when the user creates an index explicitly which
- can be used in place of the automatically generated index. */
- dict_index_t* index;
+ if (ctx->num_to_drop) {
+ row_mysql_lock_data_dictionary(prebuilt->trx);
+
+ /* Clear the to_be_dropped flags
+ in the data dictionary cache.
+ The flags may already have been cleared,
+ in case an error was detected in
+ commit_inplace_alter_table(). */
+ for (ulint i = 0; i < ctx->num_to_drop; i++) {
+ dict_index_t* index = ctx->drop[i];
+ DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+
+ index->to_be_dropped = 0;
+ }
+
+ row_mysql_unlock_data_dictionary(prebuilt->trx);
+ }
+ }
+
+ trx_commit_for_mysql(prebuilt->trx);
+ srv_active_wake_master_thread();
+ MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
+ DBUG_RETURN(fail);
+}
+
+/** Drop a FOREIGN KEY constraint.
+@param table_share the TABLE_SHARE
+@param trx data dictionary transaction
+@param foreign the foreign key constraint, will be freed
+@retval true Failure
+@retval false Success */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_drop_foreign(
+/*==================*/
+ const TABLE_SHARE* table_share,
+ trx_t* trx,
+ dict_foreign_t* foreign)
+{
+ DBUG_ENTER("innobase_drop_foreign");
+
+ DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+ ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(mutex_own(&dict_sys->mutex));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ /* Drop the constraint from the data dictionary. */
+ static const char sql[] =
+ "PROCEDURE DROP_FOREIGN_PROC () IS\n"
+ "BEGIN\n"
+ "DELETE FROM SYS_FOREIGN WHERE ID=:id;\n"
+ "DELETE FROM SYS_FOREIGN_COLS WHERE ID=:id;\n"
+ "END;\n";
+
+ dberr_t error;
+ pars_info_t* info;
+
+ info = pars_info_create();
+ pars_info_add_str_literal(info, "id", foreign->id);
+
+ trx->op_info = "dropping foreign key constraint from dictionary";
+ error = que_eval_sql(info, sql, FALSE, trx);
+ trx->op_info = "";
+
+ DBUG_EXECUTE_IF("ib_drop_foreign_error",
+ error = DB_OUT_OF_FILE_SPACE;);
+
+ if (error != DB_SUCCESS) {
+ my_error_innodb(error, table_share->table_name.str, 0);
+ trx->error_state = DB_SUCCESS;
+ DBUG_RETURN(true);
+ }
+
+ /* Drop the foreign key constraint from the data dictionary cache. */
+ dict_foreign_remove_from_cache(foreign);
+ DBUG_RETURN(false);
+}
+
+/** Rename a column.
+@param table_share the TABLE_SHARE
+@param prebuilt the prebuilt struct
+@param trx data dictionary transaction
+@param nth_col 0-based index of the column
+@param from old column name
+@param to new column name
+@param new_clustered whether the table has been rebuilt
+@retval true Failure
+@retval false Success */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_rename_column(
+/*===================*/
+ const TABLE_SHARE* table_share,
+ row_prebuilt_t* prebuilt,
+ trx_t* trx,
+ ulint nth_col,
+ const char* from,
+ const char* to,
+ bool new_clustered)
+{
+ pars_info_t* info;
+ dberr_t error;
- for (index = dict_table_get_first_index(prebuilt->table);
- index;
- index = dict_table_get_next_index(index)) {
- dict_foreign_t* foreign;
+ DBUG_ENTER("innobase_rename_column");
- if (!index->to_be_dropped) {
+ DBUG_ASSERT(trx_get_dict_operation(trx)
+ == new_clustered ? TRX_DICT_OP_TABLE : TRX_DICT_OP_INDEX);
+ ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(mutex_own(&dict_sys->mutex));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ if (new_clustered) {
+ goto rename_foreign;
+ }
+
+ info = pars_info_create();
+
+ pars_info_add_ull_literal(info, "tableid", prebuilt->table->id);
+ pars_info_add_int4_literal(info, "nth", nth_col);
+ pars_info_add_str_literal(info, "old", from);
+ pars_info_add_str_literal(info, "new", to);
+
+ trx->op_info = "renaming column in SYS_COLUMNS";
+
+ error = que_eval_sql(
+ info,
+ "PROCEDURE RENAME_SYS_COLUMNS_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_COLUMNS SET NAME=:new\n"
+ "WHERE TABLE_ID=:tableid AND NAME=:old\n"
+ "AND POS=:nth;\n"
+ "END;\n",
+ FALSE, trx);
+
+ DBUG_EXECUTE_IF("ib_rename_column_error",
+ error = DB_OUT_OF_FILE_SPACE;);
+
+ if (error != DB_SUCCESS) {
+err_exit:
+ my_error_innodb(error, table_share->table_name.str, 0);
+ trx->error_state = DB_SUCCESS;
+ trx->op_info = "";
+ DBUG_RETURN(true);
+ }
+
+ trx->op_info = "renaming column in SYS_FIELDS";
+
+ for (dict_index_t* index = dict_table_get_first_index(prebuilt->table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
+ if (strcmp(dict_index_get_nth_field(index, i)->name,
+ from)) {
continue;
}
- /* Check if this index references some other table */
- foreign = dict_table_get_foreign_constraint(
- prebuilt->table, index);
+ info = pars_info_create();
- if (foreign == NULL) {
+ pars_info_add_ull_literal(info, "indexid", index->id);
+ pars_info_add_int4_literal(info, "nth", i);
+ pars_info_add_str_literal(info, "old", from);
+ pars_info_add_str_literal(info, "new", to);
- continue;
+ error = que_eval_sql(
+ info,
+ "PROCEDURE RENAME_SYS_FIELDS_PROC () IS\n"
+ "BEGIN\n"
+
+ "UPDATE SYS_FIELDS SET COL_NAME=:new\n"
+ "WHERE INDEX_ID=:indexid AND COL_NAME=:old\n"
+ "AND POS=:nth;\n"
+
+ /* Try again, in case there is a prefix_len
+ encoded in SYS_FIELDS.POS */
+
+ "UPDATE SYS_FIELDS SET COL_NAME=:new\n"
+ "WHERE INDEX_ID=:indexid AND COL_NAME=:old\n"
+ "AND POS>=65536*:nth AND POS<65536*(:nth+1);\n"
+
+ "END;\n",
+ FALSE, trx);
+
+ if (error != DB_SUCCESS) {
+ goto err_exit;
}
+ }
+ }
- ut_a(foreign->foreign_index == index);
+rename_foreign:
+ trx->op_info = "renaming column in SYS_FOREIGN_COLS";
- /* Search for an equivalent index that the
- foreign key constraint could use if this index
- were to be deleted. */
+ for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+ prebuilt->table->foreign_list);
+ foreign != NULL;
+ foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
+ for (unsigned i = 0; i < foreign->n_fields; i++) {
+ if (strcmp(foreign->foreign_col_names[i], from)) {
+ continue;
+ }
- if (!dict_foreign_find_equiv_index(foreign)) {
- trx_set_detailed_error(
- trx,
- "Index needed in foreign key "
- "constraint");
+ info = pars_info_create();
+
+ pars_info_add_str_literal(info, "id", foreign->id);
+ pars_info_add_int4_literal(info, "nth", i);
+ pars_info_add_str_literal(info, "old", from);
+ pars_info_add_str_literal(info, "new", to);
+
+ error = que_eval_sql(
+ info,
+ "PROCEDURE RENAME_SYS_FOREIGN_F_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_FOREIGN_COLS\n"
+ "SET FOR_COL_NAME=:new\n"
+ "WHERE ID=:id AND POS=:nth\n"
+ "AND FOR_COL_NAME=:old;\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (error != DB_SUCCESS) {
+ goto err_exit;
+ }
+ }
+ }
- trx->error_info = foreign->foreign_index;
+ for (dict_foreign_t* foreign = UT_LIST_GET_FIRST(
+ prebuilt->table->referenced_list);
+ foreign != NULL;
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
+ for (unsigned i = 0; i < foreign->n_fields; i++) {
+ if (strcmp(foreign->referenced_col_names[i], from)) {
+ continue;
+ }
- err = HA_ERR_DROP_INDEX_FK;
- break;
+ info = pars_info_create();
+
+ pars_info_add_str_literal(info, "id", foreign->id);
+ pars_info_add_int4_literal(info, "nth", i);
+ pars_info_add_str_literal(info, "old", from);
+ pars_info_add_str_literal(info, "new", to);
+
+ error = que_eval_sql(
+ info,
+ "PROCEDURE RENAME_SYS_FOREIGN_R_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_FOREIGN_COLS\n"
+ "SET REF_COL_NAME=:new\n"
+ "WHERE ID=:id AND POS=:nth\n"
+ "AND REF_COL_NAME=:old;\n"
+ "END;\n",
+ FALSE, trx);
+
+ if (error != DB_SUCCESS) {
+ goto err_exit;
}
}
}
-func_exit:
- if (err) {
- /* Undo our changes since there was some sort of error. */
- dict_index_t* index
- = dict_table_get_first_index(prebuilt->table);
+ trx->op_info = "";
+ if (!new_clustered) {
+ /* Rename the column in the data dictionary cache. */
+ dict_mem_table_col_rename(prebuilt->table, nth_col, from, to);
+ }
+ DBUG_RETURN(false);
+}
+
+/** Rename columns.
+@param ha_alter_info Data used during in-place alter.
+@param new_clustered whether the table has been rebuilt
+@param table the TABLE
+@param table_share the TABLE_SHARE
+@param prebuilt the prebuilt struct
+@param trx data dictionary transaction
+@retval true Failure
+@retval false Success */
+static __attribute__((nonnull, warn_unused_result))
+bool
+innobase_rename_columns(
+/*====================*/
+ Alter_inplace_info* ha_alter_info,
+ bool new_clustered,
+ const TABLE* table,
+ const TABLE_SHARE* table_share,
+ row_prebuilt_t* prebuilt,
+ trx_t* trx)
+{
+ List_iterator_fast<Create_field> cf_it(
+ ha_alter_info->alter_info->create_list);
+ uint i = 0;
+
+ for (Field** fp = table->field; *fp; fp++, i++) {
+ if (!((*fp)->flags & FIELD_IS_RENAMED)) {
+ continue;
+ }
+
+ cf_it.rewind();
+ while (Create_field* cf = cf_it++) {
+ if (cf->field == *fp) {
+ if (innobase_rename_column(
+ table_share,
+ prebuilt, trx, i,
+ cf->field->field_name,
+ cf->field_name, new_clustered)) {
+ return(true);
+ }
+ goto processed_field;
+ }
+ }
- do {
- rw_lock_x_lock(dict_index_get_lock(index));
- index->to_be_dropped = FALSE;
- rw_lock_x_unlock(dict_index_get_lock(index));
- index = dict_table_get_next_index(index);
- } while (index);
+ ut_error;
+processed_field:
+ continue;
}
- ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
- row_mysql_unlock_data_dictionary(trx);
+ return(false);
+}
- DBUG_RETURN(err);
+/** Undo the in-memory addition of foreign key on table->foreign_list
+and table->referenced_list.
+@param ctx saved alter table context
+@param table the foreign table */
+static __attribute__((nonnull))
+void
+innobase_undo_add_fk(
+/*=================*/
+ ha_innobase_inplace_ctx* ctx,
+ dict_table_t* fk_table)
+{
+ for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
+ UT_LIST_REMOVE(
+ foreign_list,
+ fk_table->foreign_list,
+ ctx->add_fk[i]);
+
+ if (ctx->add_fk[i]->referenced_table) {
+ UT_LIST_REMOVE(
+ referenced_list,
+ ctx->add_fk[i]->referenced_table
+ ->referenced_list,
+ ctx->add_fk[i]);
+ }
+ }
}
-/*******************************************************************//**
-Drop the indexes that were passed to a successful prepare_drop_index().
-@return 0 or error number */
+/** Commit or rollback the changes made during
+prepare_inplace_alter_table() and inplace_alter_table() inside
+the storage engine. Note that the allowed level of concurrency
+during this operation will be the same as for
+inplace_alter_table() and thus might be higher than during
+prepare_inplace_alter_table(). (E.g concurrent writes were
+blocked during prepare, but might not be during commit).
+@param altered_table TABLE object for new version of table.
+@param ha_alter_info Structure describing changes to be done
+by ALTER TABLE and holding data used during in-place alter.
+@param commit true => Commit, false => Rollback.
+@retval true Failure
+@retval false Success
+*/
UNIV_INTERN
-int
-ha_innobase::final_drop_index(
-/*==========================*/
- TABLE* iin_table) /*!< in: Table where indexes
- are dropped */
+bool
+ha_innobase::commit_inplace_alter_table(
+/*====================================*/
+ TABLE* altered_table,
+ Alter_inplace_info* ha_alter_info,
+ bool commit)
{
- dict_index_t* index; /*!< Index to be dropped */
- trx_t* trx; /*!< Transaction */
- int err;
-
- DBUG_ENTER("ha_innobase::final_drop_index");
- ut_ad(table);
+ ha_innobase_inplace_ctx* ctx
+ = static_cast<ha_innobase_inplace_ctx*>
+ (ha_alter_info->handler_ctx);
+ trx_t* trx;
+ trx_t* fk_trx = NULL;
+ int err = 0;
+ bool new_clustered;
+ dict_table_t* fk_table = NULL;
+ ulonglong max_autoinc;
+
+ ut_ad(!srv_read_only_mode);
+
+ DBUG_ENTER("commit_inplace_alter_table");
+
+ DEBUG_SYNC_C("innodb_commit_inplace_alter_table_enter");
+
+ DEBUG_SYNC_C("innodb_commit_inplace_alter_table_wait");
+
+ if (!commit) {
+ /* A rollback is being requested. So far we may at
+ most have created some indexes. If any indexes were to
+ be dropped, they would actually be dropped in this
+ method if commit=true. */
+ DBUG_RETURN(rollback_inplace_alter_table(
+ ha_alter_info, table_share, prebuilt));
+ }
- if (srv_created_new_raw || srv_force_recovery) {
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+ if (!altered_table->found_next_number_field) {
+ /* There is no AUTO_INCREMENT column in the table
+ after the ALTER operation. */
+ max_autoinc = 0;
+ } else if (ctx && ctx->add_autoinc != ULINT_UNDEFINED) {
+ /* An AUTO_INCREMENT column was added. Get the last
+ value from the sequence, which may be based on a
+ supplied AUTO_INCREMENT value. */
+ max_autoinc = ctx->sequence.last();
+ } else if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::CHANGE_CREATE_OPTION)
+ && (ha_alter_info->create_info->used_fields
+ & HA_CREATE_USED_AUTO)) {
+ /* An AUTO_INCREMENT value was supplied, but the table
+ was not rebuilt. Get the user-supplied value. */
+ max_autoinc = ha_alter_info->create_info->auto_increment_value;
+ } else {
+ /* An AUTO_INCREMENT value was not specified.
+ Read the old counter value from the table. */
+ ut_ad(table->found_next_number_field);
+ dict_table_autoinc_lock(prebuilt->table);
+ max_autoinc = dict_table_autoinc_read(prebuilt->table);
+ dict_table_autoinc_unlock(prebuilt->table);
}
- update_thd();
+ if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
+ DBUG_ASSERT(!ctx);
+ /* We may want to update table attributes. */
+ goto func_exit;
+ }
- trx_search_latch_release_if_reserved(prebuilt->trx);
trx_start_if_not_started_xa(prebuilt->trx);
- /* Create a background transaction for the operations on
- the data dictionary tables. */
- trx = innobase_trx_allocate(user_thd);
- trx_start_if_not_started_xa(trx);
-
- /* Flag this transaction as a dictionary operation, so that
- the data dictionary will be locked in crash recovery. */
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
-
- /* Lock the table exclusively, to ensure that no active
- transaction depends on an index that is being dropped. */
- err = convert_error_code_to_mysql(
- row_merge_lock_table(prebuilt->trx, prebuilt->table, LOCK_X),
- prebuilt->table->flags, user_thd);
-
- /* Delete corresponding rows from the stats table.
- Marko advises not to edit both user tables and SYS_* tables in one
- trx, thus we use prebuilt->trx instead of trx. Because of this the
- drop from SYS_* and from the stats table cannot happen in one
- transaction and eventually if a crash occurs below, between
- trx_commit_for_mysql(trx); which drops the indexes from SYS_* and
- trx_commit_for_mysql(prebuilt->trx);
- then an orphaned rows will be left in the stats table. */
- for (index = dict_table_get_first_index(prebuilt->table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
+ {
+ /* Exclusively lock the table, to ensure that no other
+ transaction is holding locks on the table while we
+ change the table definition. The MySQL meta-data lock
+ should normally guarantee that no conflicting locks
+ exist. However, FOREIGN KEY constraints checks and any
+ transactions collected during crash recovery could be
+ holding InnoDB locks only, not MySQL locks. */
+ dberr_t error = row_merge_lock_table(
+ prebuilt->trx, prebuilt->table, LOCK_X);
+
+ if (error != DB_SUCCESS) {
+ my_error_innodb(error, table_share->table_name.str, 0);
+ DBUG_RETURN(true);
+ }
- if (index->to_be_dropped) {
+ DEBUG_SYNC(user_thd, "innodb_alter_commit_after_lock_table");
+ }
- enum db_err ret;
- char errstr[1024];
+ if (ctx) {
+ if (ctx->indexed_table != prebuilt->table) {
+ for (dict_index_t* index = dict_table_get_first_index(
+ ctx->indexed_table);
+ index;
+ index = dict_table_get_next_index(index)) {
+ DBUG_ASSERT(dict_index_get_online_status(index)
+ == ONLINE_INDEX_COMPLETE);
+ DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ if (dict_index_is_corrupted(index)) {
+ my_error(ER_INDEX_CORRUPT, MYF(0),
+ index->name);
+ DBUG_RETURN(true);
+ }
+ }
+ } else {
+ for (ulint i = 0; i < ctx->num_to_add; i++) {
+ dict_index_t* index = ctx->add[i];
+ DBUG_ASSERT(dict_index_get_online_status(index)
+ == ONLINE_INDEX_COMPLETE);
+ DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX);
+ if (dict_index_is_corrupted(index)) {
+ /* Report a duplicate key
+ error for the index that was
+ flagged corrupted, most likely
+ because a duplicate value was
+ inserted (directly or by
+ rollback) after
+ ha_innobase::inplace_alter_table()
+ completed. */
+ my_error(ER_DUP_UNKNOWN_IN_INDEX,
+ MYF(0), index->name + 1);
+ DBUG_RETURN(true);
+ }
+ }
+ }
+ }
- ret = dict_stats_delete_index_stats(
- index, prebuilt->trx,
- errstr, sizeof(errstr));
+ if (!ctx || !ctx->trx) {
+ /* Create a background transaction for the operations on
+ the data dictionary tables. */
+ trx = innobase_trx_allocate(user_thd);
- if (ret != DB_SUCCESS) {
- push_warning(user_thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_LOCK_WAIT_TIMEOUT,
- errstr);
- }
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+ new_clustered = false;
+ } else {
+ trx_dict_op_t op;
+
+ trx = ctx->trx;
+
+ new_clustered = ctx->indexed_table != prebuilt->table;
+
+ op = (new_clustered) ? TRX_DICT_OP_TABLE : TRX_DICT_OP_INDEX;
+
+ trx_start_for_ddl(trx, op);
+ }
+
+ if (new_clustered) {
+ if (prebuilt->table->fts) {
+ ut_ad(!prebuilt->table->fts->add_wq);
+ fts_optimize_remove_table(prebuilt->table);
+ }
+
+ if (ctx->indexed_table->fts) {
+ ut_ad(!ctx->indexed_table->fts->add_wq);
+ fts_optimize_remove_table(ctx->indexed_table);
}
}
+ /* Latch the InnoDB data dictionary exclusively so that no deadlocks
+ or lock waits can happen in it during the data dictionary operation. */
row_mysql_lock_data_dictionary(trx);
- ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
- if (UNIV_UNLIKELY(err)) {
+ /* Wait for background stats processing to stop using the
+ indexes that we are going to drop (if any). */
+ if (ctx) {
+ dict_stats_wait_bg_to_stop_using_tables(
+ prebuilt->table, ctx->indexed_table, trx);
+ }
- /* Unmark the indexes to be dropped. */
- for (index = dict_table_get_first_index(prebuilt->table);
- index; index = dict_table_get_next_index(index)) {
+ /* Final phase of add foreign key processing */
+ if (ctx && ctx->num_to_add_fk > 0) {
+ ulint highest_id_so_far;
+ dberr_t error;
+
+ /* If it runs concurrently with create index or table
+ rebuild, we will need a separate trx to do the system
+ table change, since in the case of failure to rebuild/create
+ index, it will need to commit the trx that drops the newly
+ created table/index, while for FK, it needs to rollback
+ the metadata change */
+ if (new_clustered || ctx->num_to_add) {
+ fk_trx = innobase_trx_allocate(user_thd);
- rw_lock_x_lock(dict_index_get_lock(index));
- index->to_be_dropped = FALSE;
- rw_lock_x_unlock(dict_index_get_lock(index));
+ trx_start_for_ddl(fk_trx, TRX_DICT_OP_INDEX);
+
+ fk_trx->dict_operation_lock_mode =
+ trx->dict_operation_lock_mode;
+ } else {
+ fk_trx = trx;
}
- goto func_exit;
+ ut_ad(ha_alter_info->handler_flags
+ & Alter_inplace_info::ADD_FOREIGN_KEY);
+
+ highest_id_so_far = dict_table_get_highest_foreign_id(
+ prebuilt->table);
+
+ highest_id_so_far++;
+
+ fk_table = ctx->indexed_table;
+
+ for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
+
+ /* Get the new dict_table_t */
+ if (new_clustered) {
+ ctx->add_fk[i]->foreign_table
+ = fk_table;
+ }
+
+ /* Add Foreign Key info to in-memory metadata */
+ UT_LIST_ADD_LAST(foreign_list,
+ fk_table->foreign_list,
+ ctx->add_fk[i]);
+
+ if (ctx->add_fk[i]->referenced_table) {
+ UT_LIST_ADD_LAST(
+ referenced_list,
+ ctx->add_fk[i]->referenced_table->referenced_list,
+ ctx->add_fk[i]);
+ }
+
+ if (!ctx->add_fk[i]->foreign_index) {
+ ctx->add_fk[i]->foreign_index
+ = dict_foreign_find_index(
+ fk_table,
+ ctx->add_fk[i]->foreign_col_names,
+ ctx->add_fk[i]->n_fields, NULL,
+ TRUE, FALSE);
+
+ ut_ad(ctx->add_fk[i]->foreign_index);
+
+ if (!innobase_check_fk_option(
+ ctx->add_fk[i])) {
+ my_error(ER_FK_INCORRECT_OPTION,
+ MYF(0),
+ table_share->table_name.str,
+ ctx->add_fk[i]->id);
+ goto undo_add_fk;
+ }
+ }
+
+ /* System table change */
+ error = dict_create_add_foreign_to_dictionary(
+ &highest_id_so_far, prebuilt->table,
+ ctx->add_fk[i], fk_trx);
+
+ DBUG_EXECUTE_IF(
+ "innodb_test_cannot_add_fk_system",
+ error = DB_ERROR;);
+
+ if (error != DB_SUCCESS) {
+ my_error(ER_FK_FAIL_ADD_SYSTEM, MYF(0),
+ ctx->add_fk[i]->id);
+ goto undo_add_fk;
+ }
+ }
+
+ /* Make sure the tables are moved to non-lru side of
+ dictionary list */
+ error = dict_load_foreigns(prebuilt->table->name, FALSE, TRUE);
+
+ if (error != DB_SUCCESS) {
+ my_error(ER_CANNOT_ADD_FOREIGN, MYF(0));
+
+undo_add_fk:
+ err = -1;
+
+ if (new_clustered) {
+ goto drop_new_clustered;
+ } else if (ctx->num_to_add > 0) {
+ ut_ad(trx != fk_trx);
+
+ innobase_rollback_sec_index(
+ prebuilt, table_share, trx);
+ innobase_undo_add_fk(ctx, fk_table);
+ trx_rollback_for_mysql(fk_trx);
+
+ goto trx_commit;
+ } else {
+ goto trx_rollback;
+ }
+ }
+ }
+
+ if (new_clustered) {
+ dberr_t error;
+ char* tmp_name;
+
+ /* Clear the to_be_dropped flag in the data dictionary. */
+ for (ulint i = 0; i < ctx->num_to_drop; i++) {
+ dict_index_t* index = ctx->drop[i];
+ DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->to_be_dropped);
+ index->to_be_dropped = 0;
+ }
+
+ /* We copied the table. Any indexes that were
+ requested to be dropped were not created in the copy
+ of the table. Apply any last bit of the rebuild log
+ and then rename the tables. */
+
+ if (ctx->online) {
+ DEBUG_SYNC_C("row_log_table_apply2_before");
+ error = row_log_table_apply(
+ ctx->thr, prebuilt->table, altered_table);
+
+ switch (error) {
+ KEY* dup_key;
+ case DB_SUCCESS:
+ break;
+ case DB_DUPLICATE_KEY:
+ if (prebuilt->trx->error_key_num
+ == ULINT_UNDEFINED) {
+ /* This should be the hidden index on
+ FTS_DOC_ID. */
+ dup_key = NULL;
+ } else {
+ DBUG_ASSERT(
+ prebuilt->trx->error_key_num
+ < ha_alter_info->key_count);
+ dup_key = &ha_alter_info
+ ->key_info_buffer[
+ prebuilt->trx
+ ->error_key_num];
+ }
+ print_keydup_error(altered_table, dup_key, MYF(0));
+ break;
+ case DB_ONLINE_LOG_TOO_BIG:
+ my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
+ ha_alter_info->key_info_buffer[0]
+ .name);
+ break;
+ case DB_INDEX_CORRUPT:
+ my_error(ER_INDEX_CORRUPT, MYF(0),
+ (prebuilt->trx->error_key_num
+ == ULINT_UNDEFINED)
+ ? FTS_DOC_ID_INDEX_NAME
+ : ha_alter_info->key_info_buffer[
+ prebuilt->trx->error_key_num]
+ .name);
+ break;
+ default:
+ my_error_innodb(error,
+ table_share->table_name.str,
+ prebuilt->table->flags);
+ }
+
+ if (error != DB_SUCCESS) {
+ err = -1;
+ goto drop_new_clustered;
+ }
+ }
+
+ if ((ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NAME)
+ && innobase_rename_columns(ha_alter_info, true, table,
+ table_share, prebuilt, trx)) {
+ err = -1;
+ goto drop_new_clustered;
+ }
+
+ /* A new clustered index was defined for the table
+ and there was no error at this point. We can
+ now rename the old table as a temporary table,
+ rename the new temporary table as the old
+ table and drop the old table. */
+ tmp_name = dict_mem_create_temporary_tablename(
+ ctx->heap, ctx->indexed_table->name,
+ ctx->indexed_table->id);
+
+ /* Rename table will reload and refresh the in-memory
+ foreign key constraint metadata. This is a rename operation
+ in preparing for dropping the old table. Set the table
+ to_be_dropped bit here, so to make sure DML foreign key
+ constraint check does not use the stale dict_foreign_t.
+ This is done because WL#6049 (FK MDL) has not been
+ implemented yet */
+ prebuilt->table->to_be_dropped = true;
+
+ DBUG_EXECUTE_IF("ib_ddl_crash_before_rename",
+ DBUG_SUICIDE(););
+
+ /* The new table must inherit the flag from the
+ "parent" table. */
+ if (dict_table_is_discarded(prebuilt->table)) {
+ ctx->indexed_table->ibd_file_missing = true;
+ ctx->indexed_table->flags2 |= DICT_TF2_DISCARDED;
+ }
+
+ error = row_merge_rename_tables(
+ prebuilt->table, ctx->indexed_table,
+ tmp_name, trx);
+
+ DBUG_EXECUTE_IF("ib_ddl_crash_after_rename",
+ DBUG_SUICIDE(););
+
+ /* n_ref_count must be 1, because purge cannot
+ be executing on this very table as we are
+ holding dict_operation_lock X-latch. */
+ ut_a(prebuilt->table->n_ref_count == 1);
+
+ switch (error) {
+ dict_table_t* old_table;
+ case DB_SUCCESS:
+ old_table = prebuilt->table;
+
+ DBUG_EXECUTE_IF("ib_ddl_crash_before_commit",
+ DBUG_SUICIDE(););
+
+ trx_commit_for_mysql(prebuilt->trx);
+
+ DBUG_EXECUTE_IF("ib_ddl_crash_after_commit",
+ DBUG_SUICIDE(););
+
+ if (fk_trx) {
+ ut_ad(fk_trx != trx);
+ trx_commit_for_mysql(fk_trx);
+ }
+
+ row_prebuilt_free(prebuilt, TRUE);
+ error = row_merge_drop_table(trx, old_table);
+ prebuilt = row_create_prebuilt(
+ ctx->indexed_table, table->s->reclength);
+ err = 0;
+ break;
+ case DB_TABLESPACE_EXISTS:
+ ut_a(ctx->indexed_table->n_ref_count == 1);
+ my_error(ER_TABLESPACE_EXISTS, MYF(0), tmp_name);
+ err = HA_ERR_TABLESPACE_EXISTS;
+ goto drop_new_clustered;
+ case DB_DUPLICATE_KEY:
+ ut_a(ctx->indexed_table->n_ref_count == 1);
+ my_error(ER_TABLE_EXISTS_ERROR, MYF(0), tmp_name);
+ err = HA_ERR_TABLE_EXIST;
+ goto drop_new_clustered;
+ default:
+ my_error_innodb(error,
+ table_share->table_name.str,
+ prebuilt->table->flags);
+ err = -1;
+
+drop_new_clustered:
+ /* Reset the to_be_dropped bit for the old table,
+ since we are aborting the operation and dropping
+ the new table due to some error conditions */
+ prebuilt->table->to_be_dropped = false;
+
+ /* Need to drop the added foreign key first */
+ if (fk_trx) {
+ ut_ad(fk_trx != trx);
+ innobase_undo_add_fk(ctx, fk_table);
+ trx_rollback_for_mysql(fk_trx);
+ }
+
+ dict_table_close(ctx->indexed_table, TRUE, FALSE);
+
+#ifdef UNIV_DDL_DEBUG
+ /* Nobody should have initialized the stats of the
+ newly created table yet. When this is the case, we
+ know that it has not been added for background stats
+ gathering. */
+ ut_a(!ctx->indexed_table->stat_initialized);
+#endif /* UNIV_DDL_DEBUG */
+
+ row_merge_drop_table(trx, ctx->indexed_table);
+ ctx->indexed_table = NULL;
+ goto trx_commit;
+ }
+ } else if (ctx) {
+ dberr_t error;
+
+ /* We altered the table in place. */
+ /* Lose the TEMP_INDEX_PREFIX. */
+ for (ulint i = 0; i < ctx->num_to_add; i++) {
+ dict_index_t* index = ctx->add[i];
+ DBUG_ASSERT(dict_index_get_online_status(index)
+ == ONLINE_INDEX_COMPLETE);
+ DBUG_ASSERT(*index->name
+ == TEMP_INDEX_PREFIX);
+ index->name++;
+ error = row_merge_rename_index_to_add(
+ trx, prebuilt->table->id,
+ index->id);
+ if (error != DB_SUCCESS) {
+ sql_print_error(
+ "InnoDB: rename index to add: %lu\n",
+ (ulong) error);
+ DBUG_ASSERT(0);
+ }
+ }
+
+ /* Drop any indexes that were requested to be dropped.
+ Rename them to TEMP_INDEX_PREFIX in the data
+ dictionary first. We do not bother to rename
+ index->name in the dictionary cache, because the index
+ is about to be freed after row_merge_drop_indexes_dict(). */
+
+ for (ulint i = 0; i < ctx->num_to_drop; i++) {
+ dict_index_t* index = ctx->drop[i];
+ DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->table == prebuilt->table);
+ DBUG_ASSERT(index->to_be_dropped);
+
+ error = row_merge_rename_index_to_drop(
+ trx, index->table->id, index->id);
+ if (error != DB_SUCCESS) {
+ sql_print_error(
+ "InnoDB: rename index to drop: %lu\n",
+ (ulong) error);
+ DBUG_ASSERT(0);
+ }
+ }
+ }
+
+ if (err == 0
+ && (ha_alter_info->handler_flags
+ & Alter_inplace_info::DROP_FOREIGN_KEY)) {
+ DBUG_ASSERT(ctx->num_to_drop_fk > 0);
+ DBUG_ASSERT(ctx->num_to_drop_fk
+ == ha_alter_info->alter_info->drop_list.elements);
+ for (ulint i = 0; i < ctx->num_to_drop_fk; i++) {
+ DBUG_ASSERT(prebuilt->table
+ == ctx->drop_fk[i]->foreign_table);
+
+ if (innobase_drop_foreign(
+ table_share, trx, ctx->drop_fk[i])) {
+ err = -1;
+ }
+ }
+ }
+
+ if (err == 0 && !new_clustered
+ && (ha_alter_info->handler_flags
+ & Alter_inplace_info::ALTER_COLUMN_NAME)
+ && innobase_rename_columns(ha_alter_info, false, table,
+ table_share, prebuilt, trx)) {
+ err = -1;
}
- /* Drop indexes marked to be dropped */
+ if (err == 0) {
+ if (fk_trx && fk_trx != trx) {
+ /* This needs to be placed before "trx_commit" marker,
+ since anyone called "goto trx_commit" has committed
+ or rolled back fk_trx before jumping here */
+ trx_commit_for_mysql(fk_trx);
+ }
+trx_commit:
+ trx_commit_for_mysql(trx);
+ } else {
+trx_rollback:
+ /* undo the addition of foreign key */
+ if (fk_trx) {
+ innobase_undo_add_fk(ctx, fk_table);
- index = dict_table_get_first_index(prebuilt->table);
+ if (fk_trx != trx) {
+ trx_rollback_for_mysql(fk_trx);
+ }
+ }
- while (index) {
- dict_index_t* next_index;
+ trx_rollback_for_mysql(trx);
+
+ /* If there are newly added secondary indexes, above
+ rollback will revert the rename operation and put the
+ new indexes with the temp index prefix, we can drop
+ them here */
+ if (ctx && !new_clustered) {
+ ulint i;
+
+ /* Need to drop the in-memory dict_index_t first
+ to avoid dict_table_check_for_dup_indexes()
+ assertion in row_merge_drop_indexes() in the case
+ of add and drop the same index */
+ for (i = 0; i < ctx->num_to_add; i++) {
+ dict_index_t* index = ctx->add[i];
+ dict_index_remove_from_cache(
+ prebuilt->table, index);
+ }
- next_index = dict_table_get_next_index(index);
+ if (ctx->num_to_add) {
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+ row_merge_drop_indexes(trx, prebuilt->table,
+ FALSE);
+ trx_commit_for_mysql(trx);
+ }
- if (index->to_be_dropped) {
- row_merge_drop_index(index, prebuilt->table, trx);
+ for (i = 0; i < ctx->num_to_drop; i++) {
+ dict_index_t* index = ctx->drop[i];
+ index->to_be_dropped = false;
+ }
}
+ }
- index = next_index;
+ /* Flush the log to reduce probability that the .frm files and
+ the InnoDB data dictionary get out-of-sync if the user runs
+ with innodb_flush_log_at_trx_commit = 0 */
+
+ log_buffer_flush_to_disk();
+
+ if (new_clustered) {
+ innobase_online_rebuild_log_free(prebuilt->table);
}
- /* Check that all flagged indexes were dropped. */
- for (index = dict_table_get_first_index(prebuilt->table);
- index; index = dict_table_get_next_index(index)) {
- ut_a(!index->to_be_dropped);
+ if (err == 0 && ctx) {
+ /* The changes were successfully performed. */
+ bool add_fts = false;
+
+ /* Rebuild the index translation table.
+ This should only be needed when !new_clustered. */
+ share->idx_trans_tbl.index_count = 0;
+
+ /* Publish the created fulltext index, if any.
+ Note that a fulltext index can be created without
+ creating the clustered index, if there already exists
+ a suitable FTS_DOC_ID column. If not, one will be
+ created, implying new_clustered */
+ for (ulint i = 0; i < ctx->num_to_add; i++) {
+ dict_index_t* index = ctx->add[i];
+
+ if (index->type & DICT_FTS) {
+ DBUG_ASSERT(index->type == DICT_FTS);
+ fts_add_index(index, prebuilt->table);
+ add_fts = true;
+ }
+ }
+
+ if (!new_clustered && ha_alter_info->index_drop_count) {
+
+ /* Really drop the indexes that were dropped.
+ The transaction had to be committed first
+ (after renaming the indexes), so that in the
+ event of a crash, crash recovery will drop the
+ indexes, because it drops all indexes whose
+ names start with TEMP_INDEX_PREFIX. Once we
+ have started dropping an index tree, there is
+ no way to roll it back. */
+
+ trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
+
+ for (ulint i = 0; i < ctx->num_to_drop; i++) {
+ dict_index_t* index = ctx->drop[i];
+ DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->table == prebuilt->table);
+ DBUG_ASSERT(index->to_be_dropped);
+
+ /* Replace the indexes in foreign key
+ constraints if needed. */
+
+ dict_foreign_replace_index(
+ prebuilt->table, index, prebuilt->trx);
+
+ /* Mark the index dropped
+ in the data dictionary cache. */
+ rw_lock_x_lock(dict_index_get_lock(index));
+ index->page = FIL_NULL;
+ rw_lock_x_unlock(dict_index_get_lock(index));
+ }
+
+ row_merge_drop_indexes_dict(trx, prebuilt->table->id);
+
+ for (ulint i = 0; i < ctx->num_to_drop; i++) {
+ dict_index_t* index = ctx->drop[i];
+ DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
+ DBUG_ASSERT(index->table == prebuilt->table);
+
+ if (index->type & DICT_FTS) {
+ DBUG_ASSERT(index->type == DICT_FTS
+ || (index->type
+ & DICT_CORRUPT));
+ DBUG_ASSERT(prebuilt->table->fts);
+ fts_drop_index(
+ prebuilt->table, index, trx);
+ }
+
+ dict_index_remove_from_cache(
+ prebuilt->table, index);
+ }
+
+ trx_commit_for_mysql(trx);
+ }
+
+ ut_d(dict_table_check_for_dup_indexes(
+ prebuilt->table, CHECK_ALL_COMPLETE));
+ DBUG_ASSERT(new_clustered == !prebuilt->trx);
+
+ if (add_fts) {
+ fts_optimize_add_table(prebuilt->table);
+ }
}
- /* We will need to rebuild index translation table. Set
- valid index entry count in the translation table to zero */
- share->idx_trans_tbl.index_count = 0;
+ if (!prebuilt->trx) {
+ /* We created a new clustered index and committed the
+ user transaction already, so that we were able to
+ drop the old table. */
+ update_thd();
+ prebuilt->trx->will_lock++;
-func_exit:
- ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
+ DBUG_EXECUTE_IF("ib_ddl_crash_after_user_trx_commit",
+ DBUG_SUICIDE(););
+
+ trx_start_if_not_started_xa(prebuilt->trx);
+ }
+ ut_d(dict_table_check_for_dup_indexes(
+ prebuilt->table, CHECK_ABORTED_OK));
ut_a(fts_check_cached_index(prebuilt->table));
+ row_mysql_unlock_data_dictionary(trx);
+ if (fk_trx && fk_trx != trx) {
+ fk_trx->dict_operation_lock_mode = 0;
+ trx_free_for_mysql(fk_trx);
+ }
+ trx_free_for_mysql(trx);
+
+ if (ctx && trx == ctx->trx) {
+ ctx->trx = NULL;
+ }
+
+ if (err == 0) {
+ /* Delete corresponding rows from the stats table. We update
+ the statistics in a separate transaction from trx, because
+ lock waits are not allowed in a data dictionary transaction.
+ (Lock waits are possible on the statistics table, because it
+ is directly accessible by users, not covered by the
+ dict_operation_lock.)
+
+ Because the data dictionary changes were already committed,
+ orphaned rows may be left in the statistics table if the
+ system crashes. */
+
+ for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
+ const KEY* key
+ = ha_alter_info->index_drop_buffer[i];
+ dberr_t ret;
+ char errstr[1024];
+
+ ret = dict_stats_drop_index(
+ prebuilt->table->name, key->name,
+ errstr, sizeof(errstr));
+
+ if (ret != DB_SUCCESS) {
+ push_warning(user_thd,
+ Sql_condition::WARN_LEVEL_WARN,
+ ER_LOCK_WAIT_TIMEOUT,
+ errstr);
+ }
+ }
+
+ if (ctx && !dict_table_is_discarded(prebuilt->table)) {
+ bool stats_init_called = false;
+
+ for (uint i = 0; i < ctx->num_to_add; i++) {
+ dict_index_t* index = ctx->add[i];
+
+ if (!(index->type & DICT_FTS)) {
+
+ if (!stats_init_called) {
+ innobase_copy_frm_flags_from_table_share(
+ index->table,
+ altered_table->s);
+
+ dict_stats_init(index->table);
+
+ stats_init_called = true;
+ }
+
+ dict_stats_update_for_index(index);
+ }
+ }
+ }
+ }
- trx_commit_for_mysql(trx);
trx_commit_for_mysql(prebuilt->trx);
- row_mysql_unlock_data_dictionary(trx);
/* Flush the log to reduce probability that the .frm files and
the InnoDB data dictionary get out-of-sync if the user runs
@@ -1912,12 +5213,106 @@ func_exit:
log_buffer_flush_to_disk();
- trx_free_for_mysql(trx);
-
/* Tell the InnoDB server that there might be work for
utility threads: */
srv_active_wake_master_thread();
- DBUG_RETURN(err);
+func_exit:
+
+ if (err == 0 && altered_table->found_next_number_field != 0) {
+ dict_table_autoinc_lock(prebuilt->table);
+ dict_table_autoinc_initialize(prebuilt->table, max_autoinc);
+ dict_table_autoinc_unlock(prebuilt->table);
+ }
+
+#ifndef DBUG_OFF
+ dict_index_t* clust_index = dict_table_get_first_index(
+ prebuilt->table);
+ DBUG_ASSERT(!clust_index->online_log);
+ DBUG_ASSERT(dict_index_get_online_status(clust_index)
+ == ONLINE_INDEX_COMPLETE);
+#endif /* !DBUG_OFF */
+
+#ifdef UNIV_DEBUG
+ for (dict_index_t* index = dict_table_get_first_index(
+ prebuilt->table);
+ index;
+ index = dict_table_get_next_index(index)) {
+ ut_ad(!index->to_be_dropped);
+ }
+#endif /* UNIV_DEBUG */
+
+ if (err == 0) {
+ MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
+
+#ifdef UNIV_DDL_DEBUG
+ /* Invoke CHECK TABLE atomically after a successful
+ ALTER TABLE. */
+ TABLE* old_table = table;
+ table = altered_table;
+ ut_a(check(user_thd, 0) == HA_ADMIN_OK);
+ table = old_table;
+#endif /* UNIV_DDL_DEBUG */
+ }
+
+ DBUG_RETURN(err != 0);
+}
+
+/**
+@param thd - the session
+@param start_value - the lower bound
+@param max_value - the upper bound (inclusive) */
+ib_sequence_t::ib_sequence_t(
+ THD* thd,
+ ulonglong start_value,
+ ulonglong max_value)
+ :
+ m_max_value(max_value),
+ m_increment(0),
+ m_offset(0),
+ m_next_value(start_value),
+ m_eof(false)
+{
+ if (thd != 0 && m_max_value > 0) {
+
+ thd_get_autoinc(thd, &m_offset, &m_increment);
+
+ if (m_increment > 1 || m_offset > 1) {
+
+ /* If there is an offset or increment specified
+ then we need to work out the exact next value. */
+
+ m_next_value = innobase_next_autoinc(
+ start_value, 1,
+ m_increment, m_offset, m_max_value);
+
+ } else if (start_value == 0) {
+ /* The next value can never be 0. */
+ m_next_value = 1;
+ }
+ } else {
+ m_eof = true;
+ }
+}
+
+/**
+Postfix increment
+@return the next value to insert */
+ulonglong
+ib_sequence_t::operator++(int) UNIV_NOTHROW
+{
+ ulonglong current = m_next_value;
+
+ ut_ad(!m_eof);
+ ut_ad(m_max_value > 0);
+
+ m_next_value = innobase_next_autoinc(
+ current, 1, m_increment, m_offset, m_max_value);
+
+ if (m_next_value == m_max_value && current == m_next_value) {
+ m_eof = true;
+ }
+
+ return(current);
}
diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc
index 882f5040a38..4f84f477b3a 100644
--- a/storage/innobase/handler/i_s.cc
+++ b/storage/innobase/handler/i_s.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,6 +39,7 @@ Created July 18, 2007 Vasil Dimov
#include "btr0types.h"
#include "buf0buddy.h" /* for i_s_cmpmem */
#include "buf0buf.h" /* for buf_pool */
+#include "dict0dict.h" /* for dict_table_stats_lock() */
#include "dict0load.h" /* for file sys_tables related info. */
#include "dict0mem.h"
#include "dict0types.h"
@@ -57,14 +58,12 @@ Created July 18, 2007 Vasil Dimov
/** structure associates a name string with a file page type and/or buffer
page state. */
-struct buffer_page_desc_str_struct{
+struct buf_page_desc_t{
const char* type_str; /*!< String explain the page
type/state */
ulint type_value; /*!< Page type or page state */
};
-typedef struct buffer_page_desc_str_struct buf_page_desc_str_t;
-
/** Any states greater than FIL_PAGE_TYPE_LAST would be treated as unknown. */
#define I_S_PAGE_TYPE_UNKNOWN (FIL_PAGE_TYPE_LAST + 1)
@@ -73,7 +72,7 @@ in i_s_page_type[] array */
#define I_S_PAGE_TYPE_INDEX 1
/** Name string for File Page Types */
-static buf_page_desc_str_t i_s_page_type[] = {
+static buf_page_desc_t i_s_page_type[] = {
{"ALLOCATED", FIL_PAGE_TYPE_ALLOCATED},
{"INDEX", FIL_PAGE_INDEX},
{"UNDO_LOG", FIL_PAGE_UNDO_LOG},
@@ -98,7 +97,7 @@ static buf_page_desc_str_t i_s_page_type[] = {
/** This structure defines information we will fetch from pages
currently cached in the buffer pool. It will be used to populate
table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE */
-struct buffer_page_info_struct{
+struct buf_page_info_t{
ulint block_id; /*!< Buffer Pool block ID */
unsigned space_id:32; /*!< Tablespace ID */
unsigned page_num:32; /*!< Page number/offset */
@@ -131,8 +130,6 @@ struct buffer_page_info_struct{
index_id_t index_id; /*!< Index ID if a index page */
};
-typedef struct buffer_page_info_struct buf_page_info_t;
-
/** maximum number of buffer page info we would cache. */
#define MAX_BUF_INFO_CACHED 10000
@@ -282,6 +279,43 @@ field_store_string(
}
/*******************************************************************//**
+Store the name of an index in a MYSQL_TYPE_VARCHAR field.
+Handles the names of incomplete secondary indexes.
+@return 0 on success */
+static
+int
+field_store_index_name(
+/*===================*/
+ Field* field, /*!< in/out: target field for
+ storage */
+ const char* index_name) /*!< in: NUL-terminated utf-8
+ index name, possibly starting with
+ TEMP_INDEX_PREFIX */
+{
+ int ret;
+
+ ut_ad(index_name != NULL);
+ ut_ad(field->real_type() == MYSQL_TYPE_VARCHAR);
+
+ /* Since TEMP_INDEX_PREFIX is not a valid UTF8, we need to convert
+ it to something else. */
+ if (index_name[0] == TEMP_INDEX_PREFIX) {
+ char buf[NAME_LEN + 1];
+ buf[0] = '?';
+ memcpy(buf + 1, index_name + 1, strlen(index_name));
+ ret = field->store(buf, strlen(buf),
+ system_charset_info);
+ } else {
+ ret = field->store(index_name, strlen(index_name),
+ system_charset_info);
+ }
+
+ field->set_notnull();
+
+ return(ret);
+}
+
+/*******************************************************************//**
Auxiliary function to store ulint value in MYSQL_TYPE_LONGLONG field.
If the value is ULINT_UNDEFINED then the field it set to NULL.
@return 0 on success */
@@ -713,7 +747,7 @@ static struct st_mysql_information_schema i_s_info =
MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
};
-UNIV_INTERN struct st_maria_plugin i_s_innodb_trx =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_trx =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -757,9 +791,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_trx =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_locks */
@@ -923,16 +961,9 @@ fill_innodb_locks_from_cache(
/* lock_index */
if (row->lock_index != NULL) {
-
- bufend = innobase_convert_name(buf, sizeof(buf),
- row->lock_index,
- strlen(row->lock_index),
- thd, FALSE);
- OK(fields[IDX_LOCK_INDEX]->store(buf, bufend - buf,
- system_charset_info));
- fields[IDX_LOCK_INDEX]->set_notnull();
+ OK(field_store_index_name(fields[IDX_LOCK_INDEX],
+ row->lock_index));
} else {
-
fields[IDX_LOCK_INDEX]->set_null();
}
@@ -979,7 +1010,7 @@ innodb_locks_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_locks =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_locks =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -1023,9 +1054,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_locks =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_lock_waits */
@@ -1162,7 +1197,7 @@ innodb_lock_waits_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_lock_waits =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_lock_waits =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -1206,9 +1241,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_lock_waits =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/*******************************************************************//**
@@ -1495,7 +1534,7 @@ i_s_cmp_reset_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_cmp =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_cmp =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -1539,12 +1578,16 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_cmp =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
-UNIV_INTERN struct st_maria_plugin i_s_innodb_cmp_reset =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_cmp_reset =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -1589,9 +1632,371 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_cmp_reset =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
+};
+
+/* Fields of the dynamic tables
+information_schema.innodb_cmp_per_index and
+information_schema.innodb_cmp_per_index_reset. */
+static ST_FIELD_INFO i_s_cmp_per_index_fields_info[] =
+{
+#define IDX_DATABASE_NAME 0
+ {STRUCT_FLD(field_name, "database_name"),
+ STRUCT_FLD(field_length, 192),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define IDX_TABLE_NAME 1
+ {STRUCT_FLD(field_name, "table_name"),
+ STRUCT_FLD(field_length, 192),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define IDX_INDEX_NAME 2
+ {STRUCT_FLD(field_name, "index_name"),
+ STRUCT_FLD(field_length, 192),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define IDX_COMPRESS_OPS 3
+ {STRUCT_FLD(field_name, "compress_ops"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define IDX_COMPRESS_OPS_OK 4
+ {STRUCT_FLD(field_name, "compress_ops_ok"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define IDX_COMPRESS_TIME 5
+ {STRUCT_FLD(field_name, "compress_time"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define IDX_UNCOMPRESS_OPS 6
+ {STRUCT_FLD(field_name, "uncompress_ops"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define IDX_UNCOMPRESS_TIME 7
+ {STRUCT_FLD(field_name, "uncompress_time"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+ END_OF_ST_FIELD_INFO
+};
+
+/*******************************************************************//**
+Fill the dynamic table
+information_schema.innodb_cmp_per_index or
+information_schema.innodb_cmp_per_index_reset.
+@return 0 on success, 1 on failure */
+static
+int
+i_s_cmp_per_index_fill_low(
+/*=======================*/
+ THD* thd, /*!< in: thread */
+ TABLE_LIST* tables, /*!< in/out: tables to fill */
+ Item* , /*!< in: condition (ignored) */
+ ibool reset) /*!< in: TRUE=reset cumulated counts */
+{
+ TABLE* table = tables->table;
+ Field** fields = table->field;
+ int status = 0;
+
+ DBUG_ENTER("i_s_cmp_per_index_fill_low");
+
+ /* deny access to non-superusers */
+ if (check_global_access(thd, PROCESS_ACL)) {
+
+ DBUG_RETURN(0);
+ }
+
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
+
+ /* Create a snapshot of the stats so we do not bump into lock
+ order violations with dict_sys->mutex below. */
+ mutex_enter(&page_zip_stat_per_index_mutex);
+ page_zip_stat_per_index_t snap (page_zip_stat_per_index);
+ mutex_exit(&page_zip_stat_per_index_mutex);
+
+ mutex_enter(&dict_sys->mutex);
+
+ page_zip_stat_per_index_t::iterator iter;
+ ulint i;
+
+ for (iter = snap.begin(), i = 0; iter != snap.end(); iter++, i++) {
+
+ char name[192];
+ dict_index_t* index = dict_index_find_on_id_low(iter->first);
+
+ if (index != NULL) {
+ char db_utf8[MAX_DB_UTF8_LEN];
+ char table_utf8[MAX_TABLE_UTF8_LEN];
+
+ dict_fs2utf8(index->table_name,
+ db_utf8, sizeof(db_utf8),
+ table_utf8, sizeof(table_utf8));
+
+ field_store_string(fields[IDX_DATABASE_NAME], db_utf8);
+ field_store_string(fields[IDX_TABLE_NAME], table_utf8);
+ field_store_index_name(fields[IDX_INDEX_NAME],
+ index->name);
+ } else {
+ /* index not found */
+ ut_snprintf(name, sizeof(name),
+ "index_id:" IB_ID_FMT, iter->first);
+ field_store_string(fields[IDX_DATABASE_NAME],
+ "unknown");
+ field_store_string(fields[IDX_TABLE_NAME],
+ "unknown");
+ field_store_string(fields[IDX_INDEX_NAME],
+ name);
+ }
+
+ fields[IDX_COMPRESS_OPS]->store(
+ iter->second.compressed);
+
+ fields[IDX_COMPRESS_OPS_OK]->store(
+ iter->second.compressed_ok);
+
+ fields[IDX_COMPRESS_TIME]->store(
+ (long) (iter->second.compressed_usec / 1000000));
+
+ fields[IDX_UNCOMPRESS_OPS]->store(
+ iter->second.decompressed);
+
+ fields[IDX_UNCOMPRESS_TIME]->store(
+ (long) (iter->second.decompressed_usec / 1000000));
+
+ if (schema_table_store_record(thd, table)) {
+ status = 1;
+ break;
+ }
+
+ /* Release and reacquire the dict mutex to allow other
+ threads to proceed. This could eventually result in the
+ contents of INFORMATION_SCHEMA.innodb_cmp_per_index being
+ inconsistent, but it is an acceptable compromise. */
+ if (i % 1000 == 0) {
+ mutex_exit(&dict_sys->mutex);
+ mutex_enter(&dict_sys->mutex);
+ }
+ }
+
+ mutex_exit(&dict_sys->mutex);
+
+ if (reset) {
+ page_zip_reset_stat_per_index();
+ }
+
+ DBUG_RETURN(status);
+}
+
+/*******************************************************************//**
+Fill the dynamic table information_schema.innodb_cmp_per_index.
+@return 0 on success, 1 on failure */
+static
+int
+i_s_cmp_per_index_fill(
+/*===================*/
+ THD* thd, /*!< in: thread */
+ TABLE_LIST* tables, /*!< in/out: tables to fill */
+ Item* cond) /*!< in: condition (ignored) */
+{
+ return(i_s_cmp_per_index_fill_low(thd, tables, cond, FALSE));
+}
+
+/*******************************************************************//**
+Fill the dynamic table information_schema.innodb_cmp_per_index_reset.
+@return 0 on success, 1 on failure */
+static
+int
+i_s_cmp_per_index_reset_fill(
+/*=========================*/
+ THD* thd, /*!< in: thread */
+ TABLE_LIST* tables, /*!< in/out: tables to fill */
+ Item* cond) /*!< in: condition (ignored) */
+{
+ return(i_s_cmp_per_index_fill_low(thd, tables, cond, TRUE));
+}
+
+/*******************************************************************//**
+Bind the dynamic table information_schema.innodb_cmp_per_index.
+@return 0 on success */
+static
+int
+i_s_cmp_per_index_init(
+/*===================*/
+ void* p) /*!< in/out: table schema object */
+{
+ DBUG_ENTER("i_s_cmp_init");
+ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
+
+ schema->fields_info = i_s_cmp_per_index_fields_info;
+ schema->fill_table = i_s_cmp_per_index_fill;
+
+ DBUG_RETURN(0);
+}
+
+/*******************************************************************//**
+Bind the dynamic table information_schema.innodb_cmp_per_index_reset.
+@return 0 on success */
+static
+int
+i_s_cmp_per_index_reset_init(
+/*=========================*/
+ void* p) /*!< in/out: table schema object */
+{
+ DBUG_ENTER("i_s_cmp_reset_init");
+ ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
+
+ schema->fields_info = i_s_cmp_per_index_fields_info;
+ schema->fill_table = i_s_cmp_per_index_reset_fill;
+
+ DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_cmp_per_index =
+{
+ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
+ /* int */
+ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+ /* pointer to type-specific plugin descriptor */
+ /* void* */
+ STRUCT_FLD(info, &i_s_info),
+
+ /* plugin name */
+ /* const char* */
+ STRUCT_FLD(name, "INNODB_CMP_PER_INDEX"),
+
+ /* plugin author (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(author, plugin_author),
+
+ /* general descriptive text (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(descr, "Statistics for the InnoDB compression (per index)"),
+
+ /* the plugin license (PLUGIN_LICENSE_XXX) */
+ /* int */
+ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+ /* the function to invoke when plugin is loaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(init, i_s_cmp_per_index_init),
+
+ /* the function to invoke when plugin is unloaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(deinit, i_s_common_deinit),
+
+ /* plugin version (for SHOW PLUGINS) */
+ /* unsigned int */
+ STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+ /* struct st_mysql_show_var* */
+ STRUCT_FLD(status_vars, NULL),
+
+ /* struct st_mysql_sys_var** */
+ STRUCT_FLD(system_vars, NULL),
+
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
+};
+
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_cmp_per_index_reset =
+{
+ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
+ /* int */
+ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+ /* pointer to type-specific plugin descriptor */
+ /* void* */
+ STRUCT_FLD(info, &i_s_info),
+
+ /* plugin name */
+ /* const char* */
+ STRUCT_FLD(name, "INNODB_CMP_PER_INDEX_RESET"),
+
+ /* plugin author (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(author, plugin_author),
+
+ /* general descriptive text (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(descr, "Statistics for the InnoDB compression (per index);"
+ " reset cumulated counts"),
+
+ /* the plugin license (PLUGIN_LICENSE_XXX) */
+ /* int */
+ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+ /* the function to invoke when plugin is loaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(init, i_s_cmp_per_index_reset_init),
+
+ /* the function to invoke when plugin is unloaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(deinit, i_s_common_deinit),
+
+ /* plugin version (for SHOW PLUGINS) */
+ /* unsigned int */
+ STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+ /* struct st_mysql_show_var* */
+ STRUCT_FLD(status_vars, NULL),
+
+ /* struct st_mysql_sys_var** */
+ STRUCT_FLD(system_vars, NULL),
+
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/* Fields of the dynamic table information_schema.innodb_cmpmem. */
@@ -1695,8 +2100,8 @@ i_s_cmpmem_fill_low(
table->field[3]->store(UNIV_LIKELY(x < BUF_BUDDY_SIZES)
? UT_LIST_GET_LEN(buf_pool->zip_free[x])
: 0);
- table->field[4]->store((longlong)
- buddy_stat->relocated, true);
+ table->field[4]->store(
+ (longlong) buddy_stat->relocated, true);
table->field[5]->store(
(ulong) (buddy_stat->relocated_usec / 1000000));
@@ -1786,7 +2191,7 @@ i_s_cmpmem_reset_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_cmpmem =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_cmpmem =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -1830,12 +2235,16 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_cmpmem =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
-UNIV_INTERN struct st_maria_plugin i_s_innodb_cmpmem_reset =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_cmpmem_reset =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -1880,9 +2289,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_cmpmem_reset =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_metrics */
@@ -1935,7 +2348,7 @@ static ST_FIELD_INFO innodb_metrics_fields_info[] =
#define METRIC_AVG_VALUE_START 5
{STRUCT_FLD(field_name, "AVG_COUNT"),
- STRUCT_FLD(field_length, 0),
+ STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
STRUCT_FLD(value, 0),
STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
@@ -1971,7 +2384,7 @@ static ST_FIELD_INFO innodb_metrics_fields_info[] =
#define METRIC_AVG_VALUE_RESET 9
{STRUCT_FLD(field_name, "AVG_COUNT_RESET"),
- STRUCT_FLD(field_length, 0),
+ STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
STRUCT_FLD(value, 0),
STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
@@ -2360,7 +2773,7 @@ innodb_metrics_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_metrics =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_metrics =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -2404,9 +2817,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_metrics =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_ft_default_stopword */
static ST_FIELD_INFO i_s_stopword_fields_info[] =
@@ -2473,7 +2890,7 @@ i_s_stopword_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_default_stopword =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_default_stopword =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -2481,7 +2898,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_default_stopword =
/* pointer to type-specific plugin descriptor */
/* void* */
- STRUCT_FLD(info, &i_s_info),
+ STRUCT_FLD(info, &i_s_stopword_fields_info),
/* plugin name */
/* const char* */
@@ -2517,9 +2934,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_default_stopword =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED
@@ -2571,8 +2992,8 @@ i_s_fts_deleted_generic_fill(
deleted = fts_doc_ids_create();
- user_table = dict_table_open_on_name_no_stats(
- fts_internal_tbl_name, FALSE, DICT_ERR_IGNORE_NONE);
+ user_table = dict_table_open_on_name(
+ fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
if (!user_table) {
DBUG_RETURN(0);
@@ -2603,7 +3024,7 @@ i_s_fts_deleted_generic_fill(
fts_doc_ids_free(deleted);
- dict_table_close(user_table, FALSE);
+ dict_table_close(user_table, FALSE, FALSE);
DBUG_RETURN(0);
}
@@ -2642,7 +3063,7 @@ i_s_fts_deleted_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_deleted =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_deleted =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -2650,7 +3071,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_deleted =
/* pointer to type-specific plugin descriptor */
/* void* */
- STRUCT_FLD(info, &i_s_info),
+ STRUCT_FLD(info, &i_s_fts_doc_fields_info),
/* plugin name */
/* const char* */
@@ -2686,9 +3107,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_deleted =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/*******************************************************************//**
@@ -2725,7 +3150,7 @@ i_s_fts_being_deleted_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_being_deleted =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_being_deleted =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -2733,7 +3158,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_being_deleted =
/* pointer to type-specific plugin descriptor */
/* void* */
- STRUCT_FLD(info, &i_s_info),
+ STRUCT_FLD(info, &i_s_fts_doc_fields_info),
/* plugin name */
/* const char* */
@@ -2769,9 +3194,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_being_deleted =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/*******************************************************************//**
@@ -2803,8 +3232,8 @@ i_s_fts_inserted_fill(
DBUG_RETURN(0);
}
- user_table = dict_table_open_on_name_no_stats(
- fts_internal_tbl_name, FALSE, DICT_ERR_IGNORE_NONE);
+ user_table = dict_table_open_on_name(
+ fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
if (!user_table) {
DBUG_RETURN(0);
@@ -2835,7 +3264,7 @@ i_s_fts_inserted_fill(
fts_doc_ids_free(inserted);
- dict_table_close(user_table, FALSE);
+ dict_table_close(user_table, FALSE, FALSE);
DBUG_RETURN(0);
}
@@ -2858,7 +3287,7 @@ i_s_fts_inserted_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_inserted =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_inserted =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -2866,7 +3295,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_inserted =
/* pointer to type-specific plugin descriptor */
/* void* */
- STRUCT_FLD(info, &i_s_info),
+ STRUCT_FLD(info, &i_s_fts_doc_fields_info),
/* plugin name */
/* const char* */
@@ -2902,9 +3331,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_inserted =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED and
@@ -3078,8 +3511,8 @@ i_s_fts_index_cache_fill(
DBUG_RETURN(0);
}
- user_table = dict_table_open_on_name_no_stats(
- fts_internal_tbl_name, FALSE, DICT_ERR_IGNORE_NONE);
+ user_table = dict_table_open_on_name(
+ fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
if (!user_table) {
DBUG_RETURN(0);
@@ -3098,7 +3531,7 @@ i_s_fts_index_cache_fill(
i_s_fts_index_cache_fill_one_index(index_cache, thd, tables);
}
- dict_table_close(user_table, FALSE);
+ dict_table_close(user_table, FALSE, FALSE);
DBUG_RETURN(0);
}
@@ -3121,7 +3554,7 @@ i_s_fts_index_cache_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_index_cache =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_index_cache =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -3129,7 +3562,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_index_cache =
/* pointer to type-specific plugin descriptor */
/* void* */
- STRUCT_FLD(info, &i_s_info),
+ STRUCT_FLD(info, &i_s_fts_index_fields_info),
/* plugin name */
/* const char* */
@@ -3165,9 +3598,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_index_cache =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/*******************************************************************//**
@@ -3276,6 +3713,7 @@ i_s_fts_index_table_fill_one_index(
ulint num_row_fill;
DBUG_ENTER("i_s_fts_index_cache_fill_one_index");
+ DBUG_ASSERT(!dict_index_is_online_ddl(index));
heap = mem_heap_create(1024);
@@ -3384,8 +3822,8 @@ i_s_fts_index_table_fill(
DBUG_RETURN(0);
}
- user_table = dict_table_open_on_name_no_stats(
- fts_internal_tbl_name, FALSE, DICT_ERR_IGNORE_NONE);
+ user_table = dict_table_open_on_name(
+ fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
if (!user_table) {
DBUG_RETURN(0);
@@ -3398,7 +3836,7 @@ i_s_fts_index_table_fill(
}
}
- dict_table_close(user_table, FALSE);
+ dict_table_close(user_table, FALSE, FALSE);
DBUG_RETURN(0);
}
@@ -3421,7 +3859,7 @@ i_s_fts_index_table_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_index_table =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_index_table =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -3429,7 +3867,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_index_table =
/* pointer to type-specific plugin descriptor */
/* void* */
- STRUCT_FLD(info, &i_s_info),
+ STRUCT_FLD(info, &i_s_fts_index_fields_info),
/* plugin name */
/* const char* */
@@ -3465,9 +3903,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_index_table =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG */
@@ -3541,8 +3983,8 @@ i_s_fts_config_fill(
fields = table->field;
- user_table = dict_table_open_on_name_no_stats(
- fts_internal_tbl_name, FALSE, DICT_ERR_IGNORE_NONE);
+ user_table = dict_table_open_on_name(
+ fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
if (!user_table) {
DBUG_RETURN(0);
@@ -3556,6 +3998,7 @@ i_s_fts_config_fill(
if (!ib_vector_is_empty(user_table->fts->indexes)) {
index = (dict_index_t*) ib_vector_getp_const(
user_table->fts->indexes, 0);
+ DBUG_ASSERT(!dict_index_is_online_ddl(index));
}
while (fts_config_key[i]) {
@@ -3567,10 +4010,10 @@ i_s_fts_config_fill(
value.f_str = str;
- if (strcmp(fts_config_key[i], FTS_TOTAL_WORD_COUNT) == 0
- && index) {
+ if (index
+ && strcmp(fts_config_key[i], FTS_TOTAL_WORD_COUNT) == 0) {
key_name = fts_config_create_index_param_name(
- fts_config_key[i], index);
+ fts_config_key[i], index);
allocated = TRUE;
} else {
key_name = (char*) fts_config_key[i];
@@ -3597,7 +4040,7 @@ i_s_fts_config_fill(
trx_free_for_background(trx);
- dict_table_close(user_table, FALSE);
+ dict_table_close(user_table, FALSE, FALSE);
DBUG_RETURN(0);
}
@@ -3620,7 +4063,7 @@ i_s_fts_config_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_config =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_config =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -3628,7 +4071,7 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_config =
/* pointer to type-specific plugin descriptor */
/* void* */
- STRUCT_FLD(info, &i_s_info),
+ STRUCT_FLD(info, &i_s_fts_config_fields_info),
/* plugin name */
/* const char* */
@@ -3664,9 +4107,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_ft_config =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/* Fields of the dynamic table INNODB_BUFFER_POOL_STATS. */
@@ -3782,7 +4229,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_stats_fields_info[] =
#define IDX_BUF_STATS_PAGE_YOUNG_RATE 12
{STRUCT_FLD(field_name, "PAGES_MADE_YOUNG_RATE"),
- STRUCT_FLD(field_length, 0),
+ STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
STRUCT_FLD(value, 0),
STRUCT_FLD(field_flags, 0),
@@ -3791,7 +4238,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_stats_fields_info[] =
#define IDX_BUF_STATS_PAGE_NOT_YOUNG_RATE 13
{STRUCT_FLD(field_name, "PAGES_MADE_NOT_YOUNG_RATE"),
- STRUCT_FLD(field_length, 0),
+ STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
STRUCT_FLD(value, 0),
STRUCT_FLD(field_flags, 0),
@@ -3827,7 +4274,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_stats_fields_info[] =
#define IDX_BUF_STATS_PAGE_READ_RATE 17
{STRUCT_FLD(field_name, "PAGES_READ_RATE"),
- STRUCT_FLD(field_length, 0),
+ STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
STRUCT_FLD(value, 0),
STRUCT_FLD(field_flags, 0),
@@ -3836,7 +4283,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_stats_fields_info[] =
#define IDX_BUF_STATS_PAGE_CREATE_RATE 18
{STRUCT_FLD(field_name, "PAGES_CREATE_RATE"),
- STRUCT_FLD(field_length, 0),
+ STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
STRUCT_FLD(value, 0),
STRUCT_FLD(field_flags, 0),
@@ -3845,7 +4292,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_stats_fields_info[] =
#define IDX_BUF_STATS_PAGE_WRITTEN_RATE 19
{STRUCT_FLD(field_name, "PAGES_WRITTEN_RATE"),
- STRUCT_FLD(field_length, 0),
+ STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
STRUCT_FLD(value, 0),
STRUCT_FLD(field_flags, 0),
@@ -3908,7 +4355,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_stats_fields_info[] =
#define IDX_BUF_STATS_READ_AHEAD_RATE 26
{STRUCT_FLD(field_name, "READ_AHEAD_RATE"),
- STRUCT_FLD(field_length, 0),
+ STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
STRUCT_FLD(value, 0),
STRUCT_FLD(field_flags, 0),
@@ -3917,7 +4364,7 @@ static ST_FIELD_INFO i_s_innodb_buffer_stats_fields_info[] =
#define IDX_BUF_STATS_READ_AHEAD_EVICT_RATE 27
{STRUCT_FLD(field_name, "READ_AHEAD_EVICTED_RATE"),
- STRUCT_FLD(field_length, 0),
+ STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
STRUCT_FLD(value, 0),
STRUCT_FLD(field_flags, 0),
@@ -4023,11 +4470,13 @@ i_s_innodb_stats_fill(
OK(fields[IDX_BUF_STATS_PAGE_WRITTEN]->store(info->n_pages_written));
+ OK(fields[IDX_BUF_STATS_GET]->store(info->n_page_gets));
+
OK(fields[IDX_BUF_STATS_PAGE_READ_RATE]->store(info->pages_read_rate));
- OK(fields[IDX_BUF_STATS_PAGE_CREATED]->store(info->pages_created_rate));
+ OK(fields[IDX_BUF_STATS_PAGE_CREATE_RATE]->store(info->pages_created_rate));
- OK(fields[IDX_BUF_STATS_PAGE_WRITTEN]->store(info->pages_written_rate));
+ OK(fields[IDX_BUF_STATS_PAGE_WRITTEN_RATE]->store(info->pages_written_rate));
if (info->n_page_get_delta) {
OK(fields[IDX_BUF_STATS_HIT_RATE]->store(
@@ -4137,7 +4586,7 @@ i_s_innodb_buffer_pool_stats_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_stats =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_stats =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -4181,9 +4630,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_stats =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/* Fields of the dynamic table INNODB_BUFFER_POOL_PAGE. */
@@ -4384,9 +4837,8 @@ i_s_innodb_buffer_page_fill(
TABLE_LIST* tables, /*!< in/out: tables to fill */
const buf_page_info_t* info_array, /*!< in: array cached page
info */
- ulint num_page, /*!< in: number of page info
- cached */
- mem_heap_t* heap) /*!< in: temp heap memory */
+ ulint num_page) /*!< in: number of page info
+ cached */
{
TABLE* table;
Field** fields;
@@ -4400,15 +4852,13 @@ i_s_innodb_buffer_page_fill(
/* Iterate through the cached array and fill the I_S table rows */
for (ulint i = 0; i < num_page; i++) {
const buf_page_info_t* page_info;
- const char* table_name;
- const char* index_name;
+ char table_name[MAX_FULL_NAME_LEN + 1];
+ const char* table_name_end = NULL;
const char* state_str;
enum buf_page_state state;
page_info = info_array + i;
- table_name = NULL;
- index_name = NULL;
state_str = NULL;
OK(fields[IDX_BUFFER_POOL_ID]->store(page_info->pool_id));
@@ -4446,6 +4896,10 @@ i_s_innodb_buffer_page_fill(
OK(fields[IDX_BUFFER_PAGE_ACCESS_TIME]->store(
page_info->access_time));
+ fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_null();
+
+ fields[IDX_BUFFER_PAGE_INDEX_NAME]->set_null();
+
/* If this is an index page, fetch the index name
and table name */
if (page_info->page_type == I_S_PAGE_TYPE_INDEX) {
@@ -4455,32 +4909,28 @@ i_s_innodb_buffer_page_fill(
index = dict_index_get_if_in_cache_low(
page_info->index_id);
- /* Copy the index/table name under mutex. We
- do not want to hold the InnoDB mutex while
- filling the IS table */
if (index) {
- const char* name_ptr = index->name;
-
- if (name_ptr[0] == TEMP_INDEX_PREFIX) {
- name_ptr++;
- }
-
- index_name = mem_heap_strdup(heap, name_ptr);
-
- table_name = mem_heap_strdup(heap,
- index->table_name);
+ table_name_end = innobase_convert_name(
+ table_name, sizeof(table_name),
+ index->table_name,
+ strlen(index->table_name),
+ thd, TRUE);
+
+ OK(fields[IDX_BUFFER_PAGE_TABLE_NAME]->store(
+ table_name,
+ table_name_end - table_name,
+ system_charset_info));
+ fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_notnull();
+
+ OK(field_store_index_name(
+ fields[IDX_BUFFER_PAGE_INDEX_NAME],
+ index->name));
}
mutex_exit(&dict_sys->mutex);
}
- OK(field_store_string(
- fields[IDX_BUFFER_PAGE_TABLE_NAME], table_name));
-
- OK(field_store_string(
- fields[IDX_BUFFER_PAGE_INDEX_NAME], index_name));
-
OK(fields[IDX_BUFFER_PAGE_NUM_RECS]->store(
page_info->num_recs));
@@ -4593,7 +5043,7 @@ i_s_innodb_set_page_type(
/* Encountered an unknown page type */
page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
} else {
- /* Make sure we get the righ index into the
+ /* Make sure we get the right index into the
i_s_page_type[] array */
ut_a(page_type == i_s_page_type[page_type].type_value);
@@ -4751,7 +5201,7 @@ i_s_innodb_fill_buffer_pool(
just collected from the buffer chunk scan */
status = i_s_innodb_buffer_page_fill(
thd, tables, info_buffer,
- num_page, heap);
+ num_page);
/* If something goes wrong, break and return */
if (status) {
@@ -4830,7 +5280,7 @@ i_s_innodb_buffer_page_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_page =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_page =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -4874,9 +5324,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_page =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
@@ -5094,13 +5548,11 @@ i_s_innodb_buf_page_lru_fill(
/* Iterate through the cached array and fill the I_S table rows */
for (ulint i = 0; i < num_page; i++) {
const buf_page_info_t* page_info;
- const char* table_name;
- const char* index_name;
+ char table_name[MAX_FULL_NAME_LEN + 1];
+ const char* table_name_end = NULL;
const char* state_str;
enum buf_page_state state;
- table_name = NULL;
- index_name = NULL;
state_str = NULL;
page_info = info_array + i;
@@ -5140,6 +5592,10 @@ i_s_innodb_buf_page_lru_fill(
OK(fields[IDX_BUF_LRU_PAGE_ACCESS_TIME]->store(
page_info->access_time));
+ fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_null();
+
+ fields[IDX_BUF_LRU_PAGE_INDEX_NAME]->set_null();
+
/* If this is an index page, fetch the index name
and table name */
if (page_info->page_type == I_S_PAGE_TYPE_INDEX) {
@@ -5149,30 +5605,28 @@ i_s_innodb_buf_page_lru_fill(
index = dict_index_get_if_in_cache_low(
page_info->index_id);
- /* Copy the index/table name under mutex. We
- do not want to hold the InnoDB mutex while
- filling the IS table */
if (index) {
- const char* name_ptr = index->name;
-
- if (name_ptr[0] == TEMP_INDEX_PREFIX) {
- name_ptr++;
- }
-
- index_name = mem_heap_strdup(heap, name_ptr);
- table_name = mem_heap_strdup(heap,
- index->table_name);
+ table_name_end = innobase_convert_name(
+ table_name, sizeof(table_name),
+ index->table_name,
+ strlen(index->table_name),
+ thd, TRUE);
+
+ OK(fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->store(
+ table_name,
+ table_name_end - table_name,
+ system_charset_info));
+ fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_notnull();
+
+ OK(field_store_index_name(
+ fields[IDX_BUF_LRU_PAGE_INDEX_NAME],
+ index->name));
}
mutex_exit(&dict_sys->mutex);
}
- OK(field_store_string(
- fields[IDX_BUF_LRU_PAGE_TABLE_NAME], table_name));
-
- OK(field_store_string(
- fields[IDX_BUF_LRU_PAGE_INDEX_NAME], index_name));
OK(fields[IDX_BUF_LRU_PAGE_NUM_RECS]->store(
page_info->num_recs));
@@ -5372,7 +5826,7 @@ i_s_innodb_buffer_page_lru_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_page_lru =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_buffer_page_lru =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -5416,9 +5870,13 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_page_lru =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
/*******************************************************************//**
@@ -5437,10 +5895,11 @@ i_s_common_deinit(
DBUG_RETURN(0);
}
+/** SYS_TABLES ***************************************************/
/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLES */
static ST_FIELD_INFO innodb_sys_tables_fields_info[] =
{
-#define SYS_TABLE_ID 0
+#define SYS_TABLES_ID 0
{STRUCT_FLD(field_name, "TABLE_ID"),
STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
@@ -5449,7 +5908,7 @@ static ST_FIELD_INFO innodb_sys_tables_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define SYS_TABLE_NAME 1
+#define SYS_TABLES_NAME 1
{STRUCT_FLD(field_name, "NAME"),
STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1),
STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
@@ -5458,7 +5917,7 @@ static ST_FIELD_INFO innodb_sys_tables_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define SYS_TABLE_FLAG 2
+#define SYS_TABLES_FLAG 2
{STRUCT_FLD(field_name, "FLAG"),
STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
@@ -5467,7 +5926,7 @@ static ST_FIELD_INFO innodb_sys_tables_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define SYS_TABLE_NUM_COLUMN 3
+#define SYS_TABLES_NUM_COLUMN 3
{STRUCT_FLD(field_name, "N_COLS"),
STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
@@ -5476,7 +5935,7 @@ static ST_FIELD_INFO innodb_sys_tables_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define SYS_TABLE_SPACE 4
+#define SYS_TABLES_SPACE 4
{STRUCT_FLD(field_name, "SPACE"),
STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
@@ -5485,6 +5944,33 @@ static ST_FIELD_INFO innodb_sys_tables_fields_info[] =
STRUCT_FLD(old_name, ""),
STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+#define SYS_TABLES_FILE_FORMAT 5
+ {STRUCT_FLD(field_name, "FILE_FORMAT"),
+ STRUCT_FLD(field_length, 10),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_TABLES_ROW_FORMAT 6
+ {STRUCT_FLD(field_name, "ROW_FORMAT"),
+ STRUCT_FLD(field_length, 12),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_TABLES_ZIP_PAGE_SIZE 7
+ {STRUCT_FLD(field_name, "ZIP_PAGE_SIZE"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
END_OF_ST_FIELD_INFO
};
@@ -5501,20 +5987,42 @@ i_s_dict_fill_sys_tables(
TABLE* table_to_fill) /*!< in/out: fill this table */
{
Field** fields;
+ ulint compact = DICT_TF_GET_COMPACT(table->flags);
+ ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(table->flags);
+ ulint zip_size = dict_tf_get_zip_size(table->flags);
+ const char* file_format;
+ const char* row_format;
+
+ file_format = trx_sys_file_format_id_to_name(atomic_blobs);
+ if (!compact) {
+ row_format = "Redundant";
+ } else if (!atomic_blobs) {
+ row_format = "Compact";
+ } else if DICT_TF_GET_ZIP_SSIZE(table->flags) {
+ row_format = "Compressed";
+ } else {
+ row_format = "Dynamic";
+ }
DBUG_ENTER("i_s_dict_fill_sys_tables");
fields = table_to_fill->field;
- OK(fields[SYS_TABLE_ID]->store(longlong(table->id), TRUE));
+ OK(fields[SYS_TABLES_ID]->store(longlong(table->id), TRUE));
+
+ OK(field_store_string(fields[SYS_TABLES_NAME], table->name));
+
+ OK(fields[SYS_TABLES_FLAG]->store(table->flags));
- OK(field_store_string(fields[SYS_TABLE_NAME], table->name));
+ OK(fields[SYS_TABLES_NUM_COLUMN]->store(table->n_cols));
- OK(fields[SYS_TABLE_FLAG]->store(table->flags));
+ OK(fields[SYS_TABLES_SPACE]->store(table->space));
- OK(fields[SYS_TABLE_NUM_COLUMN]->store(table->n_cols));
+ OK(field_store_string(fields[SYS_TABLES_FILE_FORMAT], file_format));
- OK(fields[SYS_TABLE_SPACE]->store(table->space));
+ OK(field_store_string(fields[SYS_TABLES_ROW_FORMAT], row_format));
+
+ OK(fields[SYS_TABLES_ZIP_PAGE_SIZE]->store(zip_size));
OK(schema_table_store_record(thd, table_to_fill));
@@ -5614,7 +6122,7 @@ innodb_sys_tables_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_tables =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_tables =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -5658,11 +6166,16 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_tables =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
+/** SYS_TABLESTATS ***********************************************/
/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLESTATS */
static ST_FIELD_INFO innodb_sys_tablestats_fields_info[] =
{
@@ -5772,24 +6285,37 @@ i_s_dict_fill_sys_tablestats(
OK(field_store_string(fields[SYS_TABLESTATS_NAME], table->name));
+ dict_table_stats_lock(table, RW_S_LATCH);
+
if (table->stat_initialized) {
OK(field_store_string(fields[SYS_TABLESTATS_INIT],
"Initialized"));
+
+ OK(fields[SYS_TABLESTATS_NROW]->store(table->stat_n_rows,
+ TRUE));
+
+ OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(
+ table->stat_clustered_index_size));
+
+ OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(
+ table->stat_sum_of_other_index_sizes));
+
+ OK(fields[SYS_TABLESTATS_MODIFIED]->store(
+ (ulint) table->stat_modified_counter));
} else {
OK(field_store_string(fields[SYS_TABLESTATS_INIT],
"Uninitialized"));
- }
- OK(fields[SYS_TABLESTATS_NROW]->store(table->stat_n_rows, TRUE));
+ OK(fields[SYS_TABLESTATS_NROW]->store(0, TRUE));
- OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(
- table->stat_clustered_index_size));
+ OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(0));
- OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(
- table->stat_sum_of_other_index_sizes));
+ OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(0));
- OK(fields[SYS_TABLESTATS_MODIFIED]->store(
- table->stat_modified_counter));
+ OK(fields[SYS_TABLESTATS_MODIFIED]->store(0));
+ }
+
+ dict_table_stats_unlock(table, RW_S_LATCH);
OK(fields[SYS_TABLESTATS_AUTONINC]->store(table->autoinc, TRUE));
@@ -5889,7 +6415,7 @@ innodb_sys_tablestats_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_tablestats =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_tablestats =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -5933,11 +6459,16 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_tablestats =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
+/** SYS_INDEXES **************************************************/
/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_INDEXES */
static ST_FIELD_INFO innodb_sysindex_fields_info[] =
{
@@ -6022,17 +6553,12 @@ i_s_dict_fill_sys_indexes(
TABLE* table_to_fill) /*!< in/out: fill this table */
{
Field** fields;
- const char* name_ptr = index->name;
DBUG_ENTER("i_s_dict_fill_sys_indexes");
fields = table_to_fill->field;
- if (name_ptr[0] == TEMP_INDEX_PREFIX) {
- name_ptr++;
- }
-
- OK(field_store_string(fields[SYS_INDEX_NAME], name_ptr));
+ OK(field_store_index_name(fields[SYS_INDEX_NAME], index->name));
OK(fields[SYS_INDEX_ID]->store(longlong(index->id), TRUE));
@@ -6144,7 +6670,7 @@ innodb_sys_indexes_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_indexes =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_indexes =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -6188,12 +6714,17 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_indexes =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
-/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_COLUMNS */
+/** SYS_COLUMNS **************************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_COLUMNS */
static ST_FIELD_INFO innodb_sys_columns_fields_info[] =
{
#define SYS_COLUMN_TABLE_ID 0
@@ -6379,7 +6910,7 @@ innodb_sys_columns_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_columns =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_columns =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -6423,11 +6954,17 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_columns =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
-/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_fields */
+
+/** SYS_FIELDS ***************************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FIELDS */
static ST_FIELD_INFO innodb_sys_fields_fields_info[] =
{
#define SYS_FIELD_INDEX_ID 0
@@ -6586,7 +7123,7 @@ innodb_sys_fields_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_fields =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_fields =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -6630,12 +7167,17 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_fields =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
-/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign */
+/** SYS_FOREIGN ********************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FOREIGN */
static ST_FIELD_INFO innodb_sys_foreign_fields_info[] =
{
#define SYS_FOREIGN_ID 0
@@ -6720,6 +7262,7 @@ i_s_dict_fill_sys_foreign(
DBUG_RETURN(0);
}
+
/*******************************************************************//**
Function to populate INFORMATION_SCHEMA.innodb_sys_foreign table. Loop
through each record in SYS_FOREIGN, and extract the foreign key
@@ -6786,6 +7329,7 @@ i_s_sys_foreign_fill_table(
DBUG_RETURN(0);
}
+
/*******************************************************************//**
Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign
@return 0 on success */
@@ -6807,7 +7351,7 @@ innodb_sys_foreign_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_foreign =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_foreign =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -6851,11 +7395,17 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_foreign =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
-/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign_cols */
+
+/** SYS_FOREIGN_COLS ********************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FOREIGN_COLS */
static ST_FIELD_INFO innodb_sys_foreign_cols_fields_info[] =
{
#define SYS_FOREIGN_COL_ID 0
@@ -7021,7 +7571,7 @@ innodb_sys_foreign_cols_init(
DBUG_RETURN(0);
}
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_foreign_cols =
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_foreign_cols =
{
/* the plugin type (a MYSQL_XXX_PLUGIN value) */
/* int */
@@ -7065,8 +7615,470 @@ UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_foreign_cols =
/* struct st_mysql_sys_var** */
STRUCT_FLD(system_vars, NULL),
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
+};
+
+/** SYS_TABLESPACES ********************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES */
+static ST_FIELD_INFO innodb_sys_tablespaces_fields_info[] =
+{
+#define SYS_TABLESPACES_SPACE 0
+ {STRUCT_FLD(field_name, "SPACE"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_NAME 1
+ {STRUCT_FLD(field_name, "NAME"),
+ STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_FLAGS 2
+ {STRUCT_FLD(field_name, "FLAG"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_FILE_FORMAT 3
+ {STRUCT_FLD(field_name, "FILE_FORMAT"),
+ STRUCT_FLD(field_length, 10),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_ROW_FORMAT 4
+ {STRUCT_FLD(field_name, "ROW_FORMAT"),
+ STRUCT_FLD(field_length, 22),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_PAGE_SIZE 5
+ {STRUCT_FLD(field_name, "PAGE_SIZE"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_TABLESPACES_ZIP_PAGE_SIZE 6
+ {STRUCT_FLD(field_name, "ZIP_PAGE_SIZE"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+ END_OF_ST_FIELD_INFO
+
+};
+
+/**********************************************************************//**
+Function to fill INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES with information
+collected by scanning SYS_TABLESPACESS table.
+@return 0 on success */
+static
+int
+i_s_dict_fill_sys_tablespaces(
+/*==========================*/
+ THD* thd, /*!< in: thread */
+ ulint space, /*!< in: space ID */
+ const char* name, /*!< in: tablespace name */
+ ulint flags, /*!< in: tablespace flags */
+ TABLE* table_to_fill) /*!< in/out: fill this table */
+{
+ Field** fields;
+ ulint atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
+ ulint page_size = fsp_flags_get_page_size(flags);;
+ ulint zip_size = fsp_flags_get_zip_size(flags);
+ const char* file_format;
+ const char* row_format;
+
+ DBUG_ENTER("i_s_dict_fill_sys_tablespaces");
+
+ file_format = trx_sys_file_format_id_to_name(atomic_blobs);
+ if (!atomic_blobs) {
+ row_format = "Compact or Redundant";
+ } else if DICT_TF_GET_ZIP_SSIZE(flags) {
+ row_format = "Compressed";
+ } else {
+ row_format = "Dynamic";
+ }
+
+ fields = table_to_fill->field;
+
+ OK(fields[SYS_TABLESPACES_SPACE]->store(space));
+
+ OK(field_store_string(fields[SYS_TABLESPACES_NAME], name));
+
+ OK(fields[SYS_TABLESPACES_FLAGS]->store(flags));
+
+ OK(field_store_string(fields[SYS_TABLESPACES_FILE_FORMAT],
+ file_format));
+
+ OK(field_store_string(fields[SYS_TABLESPACES_ROW_FORMAT],
+ row_format));
+
+ OK(fields[SYS_TABLESPACES_PAGE_SIZE]->store(page_size));
+
+ OK(fields[SYS_TABLESPACES_ZIP_PAGE_SIZE]->store(zip_size));
+
+ OK(schema_table_store_record(thd, table_to_fill));
+
+ DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Function to populate INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES table.
+Loop through each record in SYS_TABLESPACES, and extract the column
+information and fill the INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES table.
+@return 0 on success */
+static
+int
+i_s_sys_tablespaces_fill_table(
+/*===========================*/
+ THD* thd, /*!< in: thread */
+ TABLE_LIST* tables, /*!< in/out: tables to fill */
+ Item* ) /*!< in: condition (not used) */
+{
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ mem_heap_t* heap;
+ mtr_t mtr;
+
+ DBUG_ENTER("i_s_sys_tablespaces_fill_table");
+
+ /* deny access to user without PROCESS_ACL privilege */
+ if (check_global_access(thd, PROCESS_ACL)) {
+ DBUG_RETURN(0);
+ }
+
+ heap = mem_heap_create(1000);
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+
+ rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
+
+ while (rec) {
+ const char* err_msg;
+ ulint space;
+ const char* name;
+ ulint flags;
+
+ /* Extract necessary information from a SYS_TABLESPACES row */
+ err_msg = dict_process_sys_tablespaces(
+ heap, rec, &space, &name, &flags);
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+
+ if (!err_msg) {
+ i_s_dict_fill_sys_tablespaces(
+ thd, space, name, flags,
+ tables->table);
+ } else {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_CANT_FIND_SYSTEM_REC, "%s",
+ err_msg);
+ }
+
+ mem_heap_empty(heap);
+
+ /* Get the next record */
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+ rec = dict_getnext_system(&pcur, &mtr);
+ }
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+ mem_heap_free(heap);
+
+ DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Bind the dynamic table INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES
+@return 0 on success */
+static
+int
+innodb_sys_tablespaces_init(
+/*========================*/
+ void* p) /*!< in/out: table schema object */
+{
+ ST_SCHEMA_TABLE* schema;
+
+ DBUG_ENTER("innodb_sys_tablespaces_init");
+
+ schema = (ST_SCHEMA_TABLE*) p;
+
+ schema->fields_info = innodb_sys_tablespaces_fields_info;
+ schema->fill_table = i_s_sys_tablespaces_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_tablespaces =
+{
+ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
+ /* int */
+ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+ /* pointer to type-specific plugin descriptor */
+ /* void* */
+ STRUCT_FLD(info, &i_s_info),
+
+ /* plugin name */
+ /* const char* */
+ STRUCT_FLD(name, "INNODB_SYS_TABLESPACES"),
+
+ /* plugin author (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(author, plugin_author),
+
+ /* general descriptive text (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(descr, "InnoDB SYS_TABLESPACES"),
+
+ /* the plugin license (PLUGIN_LICENSE_XXX) */
+ /* int */
+ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+ /* the function to invoke when plugin is loaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(init, innodb_sys_tablespaces_init),
+
+ /* the function to invoke when plugin is unloaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(deinit, i_s_common_deinit),
+
+ /* plugin version (for SHOW PLUGINS) */
+ /* unsigned int */
+ STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+ /* struct st_mysql_show_var* */
+ STRUCT_FLD(status_vars, NULL),
+
+ /* struct st_mysql_sys_var** */
+ STRUCT_FLD(system_vars, NULL),
+
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
};
+/** SYS_DATAFILES ************************************************/
+/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_DATAFILES */
+static ST_FIELD_INFO innodb_sys_datafiles_fields_info[] =
+{
+#define SYS_DATAFILES_SPACE 0
+ {STRUCT_FLD(field_name, "SPACE"),
+ STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
+ STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+#define SYS_DATAFILES_PATH 1
+ {STRUCT_FLD(field_name, "PATH"),
+ STRUCT_FLD(field_length, OS_FILE_MAX_PATH),
+ STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
+ STRUCT_FLD(value, 0),
+ STRUCT_FLD(field_flags, 0),
+ STRUCT_FLD(old_name, ""),
+ STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
+
+ END_OF_ST_FIELD_INFO
+};
+
+/**********************************************************************//**
+Function to fill INFORMATION_SCHEMA.INNODB_SYS_DATAFILES with information
+collected by scanning SYS_DATAFILESS table.
+@return 0 on success */
+static
+int
+i_s_dict_fill_sys_datafiles(
+/*========================*/
+ THD* thd, /*!< in: thread */
+ ulint space, /*!< in: space ID */
+ const char* path, /*!< in: absolute path */
+ TABLE* table_to_fill) /*!< in/out: fill this table */
+{
+ Field** fields;
+
+ DBUG_ENTER("i_s_dict_fill_sys_datafiles");
+
+ fields = table_to_fill->field;
+
+ OK(field_store_ulint(fields[SYS_DATAFILES_SPACE], space));
+
+ OK(field_store_string(fields[SYS_DATAFILES_PATH], path));
+
+ OK(schema_table_store_record(thd, table_to_fill));
+
+ DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Function to populate INFORMATION_SCHEMA.INNODB_SYS_DATAFILES table.
+Loop through each record in SYS_DATAFILES, and extract the column
+information and fill the INFORMATION_SCHEMA.INNODB_SYS_DATAFILES table.
+@return 0 on success */
+static
+int
+i_s_sys_datafiles_fill_table(
+/*=========================*/
+ THD* thd, /*!< in: thread */
+ TABLE_LIST* tables, /*!< in/out: tables to fill */
+ Item* ) /*!< in: condition (not used) */
+{
+ btr_pcur_t pcur;
+ const rec_t* rec;
+ mem_heap_t* heap;
+ mtr_t mtr;
+
+ DBUG_ENTER("i_s_sys_datafiles_fill_table");
+
+ /* deny access to user without PROCESS_ACL privilege */
+ if (check_global_access(thd, PROCESS_ACL)) {
+ DBUG_RETURN(0);
+ }
+
+ heap = mem_heap_create(1000);
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+
+ rec = dict_startscan_system(&pcur, &mtr, SYS_DATAFILES);
+
+ while (rec) {
+ const char* err_msg;
+ ulint space;
+ const char* path;
+
+ /* Extract necessary information from a SYS_DATAFILES row */
+ err_msg = dict_process_sys_datafiles(
+ heap, rec, &space, &path);
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+
+ if (!err_msg) {
+ i_s_dict_fill_sys_datafiles(
+ thd, space, path, tables->table);
+ } else {
+ push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
+ ER_CANT_FIND_SYSTEM_REC, "%s",
+ err_msg);
+ }
+
+ mem_heap_empty(heap);
+
+ /* Get the next record */
+ mutex_enter(&dict_sys->mutex);
+ mtr_start(&mtr);
+ rec = dict_getnext_system(&pcur, &mtr);
+ }
+
+ mtr_commit(&mtr);
+ mutex_exit(&dict_sys->mutex);
+ mem_heap_free(heap);
+
+ DBUG_RETURN(0);
+}
+/*******************************************************************//**
+Bind the dynamic table INFORMATION_SCHEMA.INNODB_SYS_DATAFILES
+@return 0 on success */
+static
+int
+innodb_sys_datafiles_init(
+/*======================*/
+ void* p) /*!< in/out: table schema object */
+{
+ ST_SCHEMA_TABLE* schema;
+
+ DBUG_ENTER("innodb_sys_datafiles_init");
+
+ schema = (ST_SCHEMA_TABLE*) p;
+
+ schema->fields_info = innodb_sys_datafiles_fields_info;
+ schema->fill_table = i_s_sys_datafiles_fill_table;
+
+ DBUG_RETURN(0);
+}
+
+UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_datafiles =
+{
+ /* the plugin type (a MYSQL_XXX_PLUGIN value) */
+ /* int */
+ STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
+
+ /* pointer to type-specific plugin descriptor */
+ /* void* */
+ STRUCT_FLD(info, &i_s_info),
+
+ /* plugin name */
+ /* const char* */
+ STRUCT_FLD(name, "INNODB_SYS_DATAFILES"),
+
+ /* plugin author (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(author, plugin_author),
+
+ /* general descriptive text (for SHOW PLUGINS) */
+ /* const char* */
+ STRUCT_FLD(descr, "InnoDB SYS_DATAFILES"),
+
+ /* the plugin license (PLUGIN_LICENSE_XXX) */
+ /* int */
+ STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
+
+ /* the function to invoke when plugin is loaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(init, innodb_sys_datafiles_init),
+
+ /* the function to invoke when plugin is unloaded */
+ /* int (*)(void*); */
+ STRUCT_FLD(deinit, i_s_common_deinit),
+
+ /* plugin version (for SHOW PLUGINS) */
+ /* unsigned int */
+ STRUCT_FLD(version, INNODB_VERSION_SHORT),
+
+ /* struct st_mysql_show_var* */
+ STRUCT_FLD(status_vars, NULL),
+
+ /* struct st_mysql_sys_var** */
+ STRUCT_FLD(system_vars, NULL),
+
+ /* reserved for dependency checking */
+ /* void* */
+ STRUCT_FLD(__reserved1, NULL),
+
+ /* Plugin flags */
+ /* unsigned long */
+ STRUCT_FLD(flags, 0UL),
+};
diff --git a/storage/innobase/handler/i_s.h b/storage/innobase/handler/i_s.h
index 7fc7b091795..9e3e651706a 100644
--- a/storage/innobase/handler/i_s.h
+++ b/storage/innobase/handler/i_s.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,30 +28,34 @@ Created July 18, 2007 Vasil Dimov
const char plugin_author[] = "Oracle Corporation";
-extern struct st_maria_plugin i_s_innodb_trx;
-extern struct st_maria_plugin i_s_innodb_locks;
-extern struct st_maria_plugin i_s_innodb_lock_waits;
-extern struct st_maria_plugin i_s_innodb_cmp;
-extern struct st_maria_plugin i_s_innodb_cmp_reset;
-extern struct st_maria_plugin i_s_innodb_cmpmem;
-extern struct st_maria_plugin i_s_innodb_cmpmem_reset;
-extern struct st_maria_plugin i_s_innodb_metrics;
-extern struct st_maria_plugin i_s_innodb_ft_default_stopword;
-extern struct st_maria_plugin i_s_innodb_ft_inserted;
-extern struct st_maria_plugin i_s_innodb_ft_deleted;
-extern struct st_maria_plugin i_s_innodb_ft_being_deleted;
-extern struct st_maria_plugin i_s_innodb_ft_index_cache;
-extern struct st_maria_plugin i_s_innodb_ft_index_table;
-extern struct st_maria_plugin i_s_innodb_ft_config;
-extern struct st_maria_plugin i_s_innodb_buffer_page;
-extern struct st_maria_plugin i_s_innodb_buffer_page_lru;
-extern struct st_maria_plugin i_s_innodb_buffer_stats;
-extern struct st_maria_plugin i_s_innodb_sys_tables;
-extern struct st_maria_plugin i_s_innodb_sys_tablestats;
-extern struct st_maria_plugin i_s_innodb_sys_indexes;
-extern struct st_maria_plugin i_s_innodb_sys_columns;
-extern struct st_maria_plugin i_s_innodb_sys_fields;
-extern struct st_maria_plugin i_s_innodb_sys_foreign;
-extern struct st_maria_plugin i_s_innodb_sys_foreign_cols;
+extern struct st_mysql_plugin i_s_innodb_trx;
+extern struct st_mysql_plugin i_s_innodb_locks;
+extern struct st_mysql_plugin i_s_innodb_lock_waits;
+extern struct st_mysql_plugin i_s_innodb_cmp;
+extern struct st_mysql_plugin i_s_innodb_cmp_reset;
+extern struct st_mysql_plugin i_s_innodb_cmp_per_index;
+extern struct st_mysql_plugin i_s_innodb_cmp_per_index_reset;
+extern struct st_mysql_plugin i_s_innodb_cmpmem;
+extern struct st_mysql_plugin i_s_innodb_cmpmem_reset;
+extern struct st_mysql_plugin i_s_innodb_metrics;
+extern struct st_mysql_plugin i_s_innodb_ft_default_stopword;
+extern struct st_mysql_plugin i_s_innodb_ft_inserted;
+extern struct st_mysql_plugin i_s_innodb_ft_deleted;
+extern struct st_mysql_plugin i_s_innodb_ft_being_deleted;
+extern struct st_mysql_plugin i_s_innodb_ft_index_cache;
+extern struct st_mysql_plugin i_s_innodb_ft_index_table;
+extern struct st_mysql_plugin i_s_innodb_ft_config;
+extern struct st_mysql_plugin i_s_innodb_buffer_page;
+extern struct st_mysql_plugin i_s_innodb_buffer_page_lru;
+extern struct st_mysql_plugin i_s_innodb_buffer_stats;
+extern struct st_mysql_plugin i_s_innodb_sys_tables;
+extern struct st_mysql_plugin i_s_innodb_sys_tablestats;
+extern struct st_mysql_plugin i_s_innodb_sys_indexes;
+extern struct st_mysql_plugin i_s_innodb_sys_columns;
+extern struct st_mysql_plugin i_s_innodb_sys_fields;
+extern struct st_mysql_plugin i_s_innodb_sys_foreign;
+extern struct st_mysql_plugin i_s_innodb_sys_foreign_cols;
+extern struct st_mysql_plugin i_s_innodb_sys_tablespaces;
+extern struct st_mysql_plugin i_s_innodb_sys_datafiles;
#endif /* i_s_h */
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
index cd9de39f3c6..168da732bc0 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.cc
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -25,6 +25,10 @@ Created 7/19/1997 Heikki Tuuri
#include "ibuf0ibuf.h"
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+UNIV_INTERN my_bool srv_ibuf_disable_background_merge;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
/** Number of bits describing a single page */
#define IBUF_BITS_PER_PAGE 4
#if IBUF_BITS_PER_PAGE % 2
@@ -56,6 +60,7 @@ Created 7/19/1997 Heikki Tuuri
#include "log0recv.h"
#include "que0que.h"
#include "srv0start.h" /* srv_shutdown_state */
+#include "ha_prototypes.h"
/* STRUCTURE OF AN INSERT BUFFER RECORD
@@ -284,16 +289,16 @@ type, counter, and some flags. */
/** The mutex used to block pessimistic inserts to ibuf trees */
-static mutex_t ibuf_pessimistic_insert_mutex;
+static ib_mutex_t ibuf_pessimistic_insert_mutex;
/** The mutex protecting the insert buffer structs */
-static mutex_t ibuf_mutex;
+static ib_mutex_t ibuf_mutex;
/** The mutex protecting the insert buffer bitmaps */
-static mutex_t ibuf_bitmap_mutex;
+static ib_mutex_t ibuf_bitmap_mutex;
/** The area in pages from which contract looks for page numbers for merge */
-#define IBUF_MERGE_AREA 8
+#define IBUF_MERGE_AREA 8UL
/** Inside the merge area, pages which have at most 1 per this number less
buffered entries compared to maximum volume that can buffered for a single
@@ -507,7 +512,7 @@ ibuf_init_at_db_start(void)
dict_index_t* index;
ulint n_used;
page_t* header_page;
- ulint error;
+ dberr_t error;
ibuf = static_cast<ibuf_t*>(mem_zalloc(sizeof(ibuf_t)));
@@ -2485,6 +2490,73 @@ ibuf_get_merge_page_nos_func(
return(sum_volumes);
}
+/*******************************************************************//**
+Get the matching records for space id.
+@return current rec or NULL */
+static __attribute__((nonnull, warn_unused_result))
+const rec_t*
+ibuf_get_user_rec(
+/*===============*/
+ btr_pcur_t* pcur, /*!< in: the current cursor */
+ mtr_t* mtr) /*!< in: mini transaction */
+{
+ do {
+ const rec_t* rec = btr_pcur_get_rec(pcur);
+
+ if (page_rec_is_user_rec(rec)) {
+ return(rec);
+ }
+ } while (btr_pcur_move_to_next(pcur, mtr));
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Reads page numbers for a space id from an ibuf tree.
+@return a lower limit for the combined volume of records which will be
+merged */
+static __attribute__((nonnull, warn_unused_result))
+ulint
+ibuf_get_merge_pages(
+/*=================*/
+ btr_pcur_t* pcur, /*!< in/out: cursor */
+ ulint space, /*!< in: space for which to merge */
+ ulint limit, /*!< in: max page numbers to read */
+ ulint* pages, /*!< out: pages read */
+ ulint* spaces, /*!< out: spaces read */
+ ib_int64_t* versions,/*!< out: space versions read */
+ ulint* n_pages,/*!< out: number of pages read */
+ mtr_t* mtr) /*!< in: mini transaction */
+{
+ const rec_t* rec;
+ ulint volume = 0;
+ ib_int64_t version = fil_space_get_version(space);
+
+ ut_a(space != ULINT_UNDEFINED);
+
+ *n_pages = 0;
+
+ while ((rec = ibuf_get_user_rec(pcur, mtr)) != 0
+ && ibuf_rec_get_space(mtr, rec) == space
+ && *n_pages < limit) {
+
+ ulint page_no = ibuf_rec_get_page_no(mtr, rec);
+
+ if (*n_pages == 0 || pages[*n_pages - 1] != page_no) {
+ spaces[*n_pages] = space;
+ pages[*n_pages] = page_no;
+ versions[*n_pages] = version;
+ ++*n_pages;
+ }
+
+ volume += ibuf_rec_get_volume(mtr, rec);
+
+ btr_pcur_move_to_next(pcur, mtr);
+ }
+
+ return(volume);
+}
+
/*********************************************************************//**
Contracts insert buffer trees by reading pages to the buffer pool.
@return a lower limit for the combined size in bytes of entries which
@@ -2492,32 +2564,22 @@ will be merged from ibuf trees to the pages read, 0 if ibuf is
empty */
static
ulint
-ibuf_contract_ext(
-/*==============*/
- ulint* n_pages,/*!< out: number of pages to which merged */
- ibool sync) /*!< in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
+ibuf_merge_pages(
+/*=============*/
+ ulint* n_pages, /*!< out: number of pages to which merged */
+ bool sync) /*!< in: TRUE if the caller wants to wait for
+ the issued read with the highest tablespace
+ address to complete */
{
+ mtr_t mtr;
btr_pcur_t pcur;
+ ulint sum_sizes;
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
- ulint sum_sizes;
- mtr_t mtr;
*n_pages = 0;
- /* We perform a dirty read of ibuf->empty, without latching
- the insert buffer root page. We trust this dirty read except
- when a slow shutdown is being executed. During a slow
- shutdown, the insert buffer merge must be completed. */
-
- if (UNIV_UNLIKELY(ibuf->empty)
- && UNIV_LIKELY(!srv_shutdown_state)) {
- return(0);
- }
-
ibuf_mtr_start(&mtr);
/* Open a cursor to a randomly chosen leaf of the tree, at a random
@@ -2554,18 +2616,159 @@ ibuf_contract_ext(
ibuf_mtr_commit(&mtr);
btr_pcur_close(&pcur);
- buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
- *n_pages);
+ buf_read_ibuf_merge_pages(
+ sync, space_ids, space_versions, page_nos, *n_pages);
return(sum_sizes + 1);
}
/*********************************************************************//**
+Get the table instance from the table id.
+@return table instance */
+static __attribute__((warn_unused_result))
+dict_table_t*
+ibuf_get_table(
+/*===========*/
+ table_id_t table_id) /*!< in: valid table id */
+{
+ rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__);
+
+ dict_table_t* table = dict_table_open_on_id(table_id, FALSE, FALSE);
+
+ rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+
+ return(table);
+}
+
+/*********************************************************************//**
Contracts insert buffer trees by reading pages to the buffer pool.
@return a lower limit for the combined size in bytes of entries which
will be merged from ibuf trees to the pages read, 0 if ibuf is
empty */
-UNIV_INTERN
+static
+ulint
+ibuf_merge_space(
+/*=============*/
+ ulint space, /*!< in: tablespace id to merge */
+ ulint* n_pages)/*!< out: number of pages to which merged */
+{
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ mem_heap_t* heap = mem_heap_create(512);
+ dtuple_t* tuple = ibuf_search_tuple_build(space, 0, heap);
+
+ ibuf_mtr_start(&mtr);
+
+ /* Position the cursor on the first matching record. */
+
+ btr_pcur_open(
+ ibuf->index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+
+ mem_heap_free(heap);
+
+ ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
+
+ ulint sum_sizes = 0;
+ ulint pages[IBUF_MAX_N_PAGES_MERGED];
+ ulint spaces[IBUF_MAX_N_PAGES_MERGED];
+ ib_int64_t versions[IBUF_MAX_N_PAGES_MERGED];
+
+ if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
+ /* If a B-tree page is empty, it must be the root page
+ and the whole B-tree must be empty. InnoDB does not
+ allow empty B-tree pages other than the root. */
+ ut_ad(ibuf->empty);
+ ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
+ == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
+ == FSP_IBUF_TREE_ROOT_PAGE_NO);
+
+ } else {
+
+ sum_sizes = ibuf_get_merge_pages(
+ &pcur, space, IBUF_MAX_N_PAGES_MERGED,
+ &pages[0], &spaces[0], &versions[0], n_pages,
+ &mtr);
+
+ ++sum_sizes;
+ }
+
+ ibuf_mtr_commit(&mtr);
+
+ btr_pcur_close(&pcur);
+
+ if (sum_sizes > 0) {
+
+ ut_a(*n_pages > 0 || sum_sizes == 1);
+
+#ifdef UNIV_DEBUG
+ ut_ad(*n_pages <= UT_ARR_SIZE(pages));
+
+ for (ulint i = 0; i < *n_pages; ++i) {
+ ut_ad(spaces[i] == space);
+ ut_ad(i == 0 || versions[i] == versions[i - 1]);
+ }
+#endif /* UNIV_DEBUG */
+
+ buf_read_ibuf_merge_pages(
+ TRUE, spaces, versions, pages, *n_pages);
+ }
+
+ return(sum_sizes);
+}
+
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+static __attribute__((nonnull, warn_unused_result))
+ulint
+ibuf_merge(
+/*=======*/
+ table_id_t table_id, /*!< in: if merge should be
+ done only for a specific
+ table, for all tables this
+ should be 0 */
+ ulint* n_pages, /*!< out: number of pages to
+ which merged */
+ bool sync) /*!< in: TRUE if the caller
+ wants to wait for the issued
+ read with the highest
+ tablespace address to complete */
+{
+ dict_table_t* table;
+
+ *n_pages = 0;
+
+ /* We perform a dirty read of ibuf->empty, without latching
+ the insert buffer root page. We trust this dirty read except
+ when a slow shutdown is being executed. During a slow
+ shutdown, the insert buffer merge must be completed. */
+
+ if (ibuf->empty && !srv_shutdown_state) {
+ return(0);
+ } else if (table_id == 0) {
+ return(ibuf_merge_pages(n_pages, sync));
+ } else if ((table = ibuf_get_table(table_id)) == 0) {
+ /* Table has been dropped. */
+ return(0);
+ }
+
+ ulint volume = ibuf_merge_space(table->space, n_pages);
+
+ dict_table_close(table, FALSE, FALSE);
+
+ return(volume);
+}
+
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+static
ulint
ibuf_contract(
/*==========*/
@@ -2575,7 +2778,7 @@ ibuf_contract(
{
ulint n_pages;
- return(ibuf_contract_ext(&n_pages, sync));
+ return(ibuf_merge(0, &n_pages, sync));
}
/*********************************************************************//**
@@ -2587,17 +2790,26 @@ UNIV_INTERN
ulint
ibuf_contract_in_background(
/*========================*/
- ibool full) /*!< in: TRUE if the caller wants to do a full
- contract based on PCT_IO(100). If FALSE then
- the size of contract batch is determined based
- on the current size of the ibuf tree. */
+ table_id_t table_id, /*!< in: if merge should be done only
+ for a specific table, for all tables
+ this should be 0 */
+ ibool full) /*!< in: TRUE if the caller wants to
+ do a full contract based on PCT_IO(100).
+ If FALSE then the size of contract
+ batch is determined based on the
+ current size of the ibuf tree. */
{
ulint sum_bytes = 0;
ulint sum_pages = 0;
- ulint n_bytes;
ulint n_pag2;
ulint n_pages;
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+ if (srv_ibuf_disable_background_merge && table_id == 0) {
+ return(0);
+ }
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
if (full) {
/* Caller has requested a full batch */
n_pages = PCT_IO(100);
@@ -2620,7 +2832,9 @@ ibuf_contract_in_background(
}
while (sum_pages < n_pages) {
- n_bytes = ibuf_contract_ext(&n_pag2, FALSE);
+ ulint n_bytes;
+
+ n_bytes = ibuf_merge(table_id, &n_pag2, FALSE);
if (n_bytes == 0) {
return(sum_bytes);
@@ -3061,7 +3275,7 @@ ibuf_update_max_tablespace_id(void)
ibuf_mtr_start(&mtr);
btr_pcur_open_at_index_side(
- FALSE, ibuf->index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+ false, ibuf->index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
@@ -3223,8 +3437,8 @@ ibuf_get_entry_counter_func(
Buffer an operation in the insert/delete buffer, instead of doing it
directly to the disk page, if this is possible.
@return DB_SUCCESS, DB_STRONG_FAIL or other error */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
ibuf_insert_low(
/*============*/
ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
@@ -3246,7 +3460,9 @@ ibuf_insert_low(
btr_pcur_t pcur;
btr_cur_t* cursor;
dtuple_t* ibuf_entry;
+ mem_heap_t* offsets_heap = NULL;
mem_heap_t* heap;
+ ulint* offsets = NULL;
ulint buffered;
lint min_n_recs;
rec_t* ins_rec;
@@ -3254,7 +3470,7 @@ ibuf_insert_low(
page_t* bitmap_page;
buf_block_t* block;
page_t* root;
- ulint err;
+ dberr_t err;
ibool do_merge;
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
@@ -3294,7 +3510,7 @@ ibuf_insert_low(
return(DB_STRONG_FAIL);
}
- heap = mem_heap_create(512);
+ heap = mem_heap_create(1024);
/* Build the entry which contains the space id and the page number
as the first fields and the type information for other fields, and
@@ -3464,9 +3680,11 @@ fail_exit:
cursor = btr_pcur_get_btr_cur(&pcur);
if (mode == BTR_MODIFY_PREV) {
- err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
- ibuf_entry, &ins_rec,
- &dummy_big_rec, 0, thr, &mtr);
+ err = btr_cur_optimistic_insert(
+ BTR_NO_LOCKING_FLAG,
+ cursor, &offsets, &offsets_heap,
+ ibuf_entry, &ins_rec,
+ &dummy_big_rec, 0, thr, &mtr);
block = btr_cur_get_block(cursor);
ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
@@ -3493,13 +3711,15 @@ fail_exit:
err = btr_cur_optimistic_insert(
BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
- cursor, ibuf_entry, &ins_rec,
+ cursor, &offsets, &offsets_heap,
+ ibuf_entry, &ins_rec,
&dummy_big_rec, 0, thr, &mtr);
if (err == DB_FAIL) {
err = btr_cur_pessimistic_insert(
BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
- cursor, ibuf_entry, &ins_rec,
+ cursor, &offsets, &offsets_heap,
+ ibuf_entry, &ins_rec,
&dummy_big_rec, 0, thr, &mtr);
}
@@ -3512,6 +3732,10 @@ fail_exit:
ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
}
+ if (offsets_heap) {
+ mem_heap_free(offsets_heap);
+ }
+
if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
/* Update the page max trx id field */
page_update_max_trx_id(block, NULL,
@@ -3568,7 +3792,7 @@ ibuf_insert(
ulint page_no,/*!< in: page number where to insert */
que_thr_t* thr) /*!< in: query thread */
{
- ulint err;
+ dberr_t err;
ulint entry_size;
ibool no_counter;
/* Read the settable global variable ibuf_use only once in
@@ -3699,7 +3923,7 @@ skip_watch:
/********************************************************************//**
During merge, inserts to an index page a secondary index entry extracted
from the insert buffer. */
-static
+static __attribute__((nonnull))
void
ibuf_insert_to_index_page_low(
/*==========================*/
@@ -3707,6 +3931,8 @@ ibuf_insert_to_index_page_low(
buf_block_t* block, /*!< in/out: index page where the buffered
entry should be placed */
dict_index_t* index, /*!< in: record descriptor */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t* heap, /*!< in/out: memory heap */
mtr_t* mtr, /*!< in/out: mtr */
page_cur_t* page_cur)/*!< in/out: cursor positioned on the record
after which to insert the buffered entry */
@@ -3718,8 +3944,8 @@ ibuf_insert_to_index_page_low(
const page_t* bitmap_page;
ulint old_bits;
- if (UNIV_LIKELY
- (page_cur_tuple_insert(page_cur, entry, index, 0, mtr) != NULL)) {
+ if (page_cur_tuple_insert(
+ page_cur, entry, index, offsets, &heap, 0, mtr) != NULL) {
return;
}
@@ -3730,8 +3956,8 @@ ibuf_insert_to_index_page_low(
/* This time the record must fit */
- if (UNIV_LIKELY
- (page_cur_tuple_insert(page_cur, entry, index, 0, mtr) != NULL)) {
+ if (page_cur_tuple_insert(page_cur, entry, index,
+ offsets, &heap, 0, mtr) != NULL) {
return;
}
@@ -3785,6 +4011,8 @@ ibuf_insert_to_index_page(
ulint low_match;
page_t* page = buf_block_get_frame(block);
rec_t* rec;
+ ulint* offsets;
+ mem_heap_t* heap;
ut_ad(ibuf_inside(mtr));
ut_ad(dtuple_check_typed(entry));
@@ -3835,10 +4063,14 @@ dump:
low_match = page_cur_search(block, index, entry,
PAGE_CUR_LE, &page_cur);
+ heap = mem_heap_create(
+ sizeof(upd_t)
+ + REC_OFFS_HEADER_SIZE * sizeof(*offsets)
+ + dtuple_get_n_fields(entry)
+ * (sizeof(upd_field_t) + sizeof *offsets));
+
if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) {
- mem_heap_t* heap;
upd_t* update;
- ulint* offsets;
page_zip_des_t* page_zip;
rec = page_cur_get_rec(&page_cur);
@@ -3847,12 +4079,10 @@ dump:
row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
ut_ad(rec_get_deleted_flag(rec, page_is_comp(page)));
- heap = mem_heap_create(1024);
-
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
&heap);
update = row_upd_build_sec_rec_difference_binary(
- index, entry, rec, NULL, heap);
+ rec, index, offsets, entry, heap);
page_zip = buf_block_get_page_zip(block);
@@ -3862,9 +4092,7 @@ dump:
Bug #56680 was fixed. */
btr_cur_set_deleted_flag_for_ibuf(
rec, page_zip, FALSE, mtr);
-updated_in_place:
- mem_heap_free(heap);
- return;
+ goto updated_in_place;
}
/* Copy the info bits. Clear the delete-mark. */
@@ -3908,15 +4136,20 @@ updated_in_place:
lock_rec_store_on_page_infimum(block, rec);
page_cur_delete_rec(&page_cur, index, offsets, mtr);
page_cur_move_to_prev(&page_cur);
- mem_heap_free(heap);
- ibuf_insert_to_index_page_low(entry, block, index, mtr,
+ ibuf_insert_to_index_page_low(entry, block, index,
+ &offsets, heap, mtr,
&page_cur);
lock_rec_restore_from_page_infimum(block, rec, block);
} else {
- ibuf_insert_to_index_page_low(entry, block, index, mtr,
+ offsets = NULL;
+ ibuf_insert_to_index_page_low(entry, block, index,
+ &offsets, heap, mtr,
&page_cur);
}
+
+updated_in_place:
+ mem_heap_free(heap);
}
/****************************************************************//**
@@ -3950,7 +4183,7 @@ ibuf_set_del_mark(
/* Delete mark the old index record. According to a
comment in row_upd_sec_index_entry(), it can already
have been delete marked if a lock wait occurred in
- row_ins_index_entry() in a previous invocation of
+ row_ins_sec_index_entry() in a previous invocation of
row_upd_sec_index_entry(). */
if (UNIV_LIKELY
@@ -4128,7 +4361,7 @@ ibuf_restore_pos(
ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
fputs("InnoDB: Validating insert buffer tree:\n", stderr);
- if (!btr_validate_index(ibuf->index, NULL)) {
+ if (!btr_validate_index(ibuf->index, 0)) {
ut_error;
}
@@ -4160,7 +4393,7 @@ ibuf_delete_rec(
{
ibool success;
page_t* root;
- ulint err;
+ dberr_t err;
ut_ad(ibuf_inside(mtr));
ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
@@ -4183,7 +4416,8 @@ ibuf_delete_rec(
}
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
- success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
+ success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur),
+ 0, mtr);
if (success) {
if (UNIV_UNLIKELY(!page_get_n_recs(btr_pcur_get_page(pcur)))) {
@@ -4241,7 +4475,7 @@ ibuf_delete_rec(
root = ibuf_tree_root_get(mtr);
- btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
+ btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), 0,
RB_NONE, mtr);
ut_a(err == DB_SUCCESS);
@@ -4829,4 +5063,109 @@ ibuf_print(
mutex_exit(&ibuf_mutex);
}
+
+/******************************************************************//**
+Checks the insert buffer bitmaps on IMPORT TABLESPACE.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+ibuf_check_bitmap_on_import(
+/*========================*/
+ const trx_t* trx, /*!< in: transaction */
+ ulint space_id) /*!< in: tablespace identifier */
+{
+ ulint zip_size;
+ ulint page_size;
+ ulint size;
+ ulint page_no;
+
+ ut_ad(space_id);
+ ut_ad(trx->mysql_thd);
+
+ zip_size = fil_space_get_zip_size(space_id);
+
+ if (zip_size == ULINT_UNDEFINED) {
+ return(DB_TABLE_NOT_FOUND);
+ }
+
+ size = fil_space_get_size(space_id);
+
+ if (size == 0) {
+ return(DB_TABLE_NOT_FOUND);
+ }
+
+ mutex_enter(&ibuf_mutex);
+
+ page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
+
+ for (page_no = 0; page_no < size; page_no += page_size) {
+ mtr_t mtr;
+ page_t* bitmap_page;
+ ulint i;
+
+ if (trx_is_interrupted(trx)) {
+ mutex_exit(&ibuf_mutex);
+ return(DB_INTERRUPTED);
+ }
+
+ mtr_start(&mtr);
+
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ ibuf_enter(&mtr);
+
+ bitmap_page = ibuf_bitmap_get_map_page(
+ space_id, page_no, zip_size, &mtr);
+
+ for (i = FSP_IBUF_BITMAP_OFFSET + 1; i < page_size; i++) {
+ const ulint offset = page_no + i;
+
+ if (ibuf_bitmap_page_get_bits(
+ bitmap_page, offset, zip_size,
+ IBUF_BITMAP_IBUF, &mtr)) {
+
+ mutex_exit(&ibuf_mutex);
+ ibuf_exit(&mtr);
+ mtr_commit(&mtr);
+
+ ib_errf(trx->mysql_thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_INNODB_INDEX_CORRUPT,
+ "Space %u page %u"
+ " is wrongly flagged to belong to the"
+ " insert buffer",
+ (unsigned) space_id,
+ (unsigned) offset);
+
+ return(DB_CORRUPTION);
+ }
+
+ if (ibuf_bitmap_page_get_bits(
+ bitmap_page, offset, zip_size,
+ IBUF_BITMAP_BUFFERED, &mtr)) {
+
+ ib_errf(trx->mysql_thd,
+ IB_LOG_LEVEL_WARN,
+ ER_INNODB_INDEX_CORRUPT,
+ "Buffered changes"
+ " for space %u page %u are lost",
+ (unsigned) space_id,
+ (unsigned) offset);
+
+ /* Tolerate this error, so that
+ slightly corrupted tables can be
+ imported and dumped. Clear the bit. */
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, offset, zip_size,
+ IBUF_BITMAP_BUFFERED, FALSE, &mtr);
+ }
+ }
+
+ ibuf_exit(&mtr);
+ mtr_commit(&mtr);
+ }
+
+ mutex_exit(&ibuf_mutex);
+ return(DB_SUCCESS);
+}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/api0api.h b/storage/innobase/include/api0api.h
new file mode 100644
index 00000000000..5b7bfdbdde5
--- /dev/null
+++ b/storage/innobase/include/api0api.h
@@ -0,0 +1,1282 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/api0api.h
+InnoDB Native API
+
+2008-08-01 Created by Sunny Bains.
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+*******************************************************/
+
+#ifndef api0api_h
+#define api0api_h
+
+#include "db0err.h"
+#include <stdio.h>
+
+#ifdef _MSC_VER
+#define strncasecmp _strnicmp
+#define strcasecmp _stricmp
+#endif
+
+#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
+#define UNIV_NO_IGNORE __attribute__ ((warn_unused_result))
+#else
+#define UNIV_NO_IGNORE
+#endif /* __GNUC__ && __GNUC__ > 2 && !__INTEL_COMPILER */
+
+/* See comment about ib_bool_t as to why the two macros are unsigned long. */
+/** The boolean value of "true" used internally within InnoDB */
+#define IB_TRUE 0x1UL
+/** The boolean value of "false" used internally within InnoDB */
+#define IB_FALSE 0x0UL
+
+/* Basic types used by the InnoDB API. */
+/** All InnoDB error codes are represented by ib_err_t */
+typedef enum dberr_t ib_err_t;
+/** Representation of a byte within InnoDB */
+typedef unsigned char ib_byte_t;
+/** Representation of an unsigned long int within InnoDB */
+typedef unsigned long int ib_ulint_t;
+
+/* We assume C99 support except when using VisualStudio. */
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#endif /* _MSC_VER */
+
+/* Integer types used by the API. Microsft VS defines its own types
+and we use the Microsoft types when building with Visual Studio. */
+#if defined(_MSC_VER)
+/** A signed 8 bit integral type. */
+typedef __int8 ib_i8_t;
+#else
+/** A signed 8 bit integral type. */
+typedef int8_t ib_i8_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 8 bit integral type. */
+typedef unsigned __int8 ib_u8_t;
+#else
+/** An unsigned 8 bit integral type. */
+typedef uint8_t ib_u8_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 16 bit integral type. */
+typedef __int16 ib_i16_t;
+#else
+/** A signed 16 bit integral type. */
+typedef int16_t ib_i16_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 16 bit integral type. */
+typedef unsigned __int16 ib_u16_t;
+#else
+/** An unsigned 16 bit integral type. */
+typedef uint16_t ib_u16_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 32 bit integral type. */
+typedef __int32 ib_i32_t;
+#else
+/** A signed 32 bit integral type. */
+typedef int32_t ib_i32_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 32 bit integral type. */
+typedef unsigned __int32 ib_u32_t;
+#else
+/** An unsigned 32 bit integral type. */
+typedef uint32_t ib_u32_t;
+#endif
+
+#if defined(_MSC_VER)
+/** A signed 64 bit integral type. */
+typedef __int64 ib_i64_t;
+#else
+/** A signed 64 bit integral type. */
+typedef int64_t ib_i64_t;
+#endif
+
+#if defined(_MSC_VER)
+/** An unsigned 64 bit integral type. */
+typedef unsigned __int64 ib_u64_t;
+#else
+/** An unsigned 64 bit integral type. */
+typedef uint64_t ib_u64_t;
+#endif
+
+typedef void* ib_opaque_t;
+typedef ib_opaque_t ib_charset_t;
+typedef ib_ulint_t ib_bool_t;
+typedef ib_u64_t ib_id_u64_t;
+
+/** @enum ib_cfg_type_t Possible types for a configuration variable. */
+typedef enum {
+ IB_CFG_IBOOL, /*!< The configuration parameter is
+ of type ibool */
+
+ /* XXX Can we avoid having different types for ulint and ulong?
+ - On Win64 "unsigned long" is 32 bits
+ - ulong is always defined as "unsigned long"
+ - On Win64 ulint is defined as 64 bit integer
+ => On Win64 ulint != ulong.
+ If we typecast all ulong and ulint variables to the smaller type
+ ulong, then we will cut the range of the ulint variables.
+ This is not a problem for most ulint variables because their max
+ allowed values do not exceed 2^32-1 (e.g. log_groups is ulint
+ but its max allowed value is 10). BUT buffer_pool_size and
+ log_file_size allow up to 2^64-1. */
+
+ IB_CFG_ULINT, /*!< The configuration parameter is
+ of type ulint */
+
+ IB_CFG_ULONG, /*!< The configuration parameter is
+ of type ulong */
+
+ IB_CFG_TEXT, /*!< The configuration parameter is
+ of type char* */
+
+ IB_CFG_CB /*!< The configuration parameter is
+ a callback parameter */
+} ib_cfg_type_t;
+
+/** @enum ib_col_type_t column types that are supported. */
+typedef enum {
+ IB_VARCHAR = 1, /*!< Character varying length. The
+ column is not padded. */
+
+ IB_CHAR = 2, /*!< Fixed length character string. The
+ column is padded to the right. */
+
+ IB_BINARY = 3, /*!< Fixed length binary, similar to
+ IB_CHAR but the column is not padded
+ to the right. */
+
+ IB_VARBINARY = 4, /*!< Variable length binary */
+
+ IB_BLOB = 5, /*!< Binary large object, or
+ a TEXT type */
+
+ IB_INT = 6, /*!< Integer: can be any size
+ from 1 - 8 bytes. If the size is
+ 1, 2, 4 and 8 bytes then you can use
+ the typed read and write functions. For
+ other sizes you will need to use the
+ ib_col_get_value() function and do the
+ conversion yourself. */
+
+ IB_SYS = 8, /*!< System column, this column can
+ be one of DATA_TRX_ID, DATA_ROLL_PTR
+ or DATA_ROW_ID. */
+
+ IB_FLOAT = 9, /*!< C (float) floating point value. */
+
+ IB_DOUBLE = 10, /*!> C (double) floating point value. */
+
+ IB_DECIMAL = 11, /*!< Decimal stored as an ASCII
+ string */
+
+ IB_VARCHAR_ANYCHARSET = 12, /*!< Any charset, varying length */
+
+ IB_CHAR_ANYCHARSET = 13 /*!< Any charset, fixed length */
+
+} ib_col_type_t;
+
+/** @enum ib_tbl_fmt_t InnoDB table format types */
+typedef enum {
+ IB_TBL_REDUNDANT, /*!< Redundant row format, the column
+ type and length is stored in the row.*/
+
+ IB_TBL_COMPACT, /*!< Compact row format, the column
+ type is not stored in the row. The
+ length is stored in the row but the
+ storage format uses a compact format
+ to store the length of the column data
+ and record data storage format also
+ uses less storage. */
+
+ IB_TBL_DYNAMIC, /*!< Compact row format. BLOB prefixes
+ are not stored in the clustered index */
+
+ IB_TBL_COMPRESSED /*!< Similar to dynamic format but
+ with pages compressed */
+} ib_tbl_fmt_t;
+
+/** @enum ib_col_attr_t InnoDB column attributes */
+typedef enum {
+ IB_COL_NONE = 0, /*!< No special attributes. */
+
+ IB_COL_NOT_NULL = 1, /*!< Column data can't be NULL. */
+
+ IB_COL_UNSIGNED = 2, /*!< Column is IB_INT and unsigned. */
+
+ IB_COL_NOT_USED = 4, /*!< Future use, reserved. */
+
+ IB_COL_CUSTOM1 = 8, /*!< Custom precision type, this is
+ a bit that is ignored by InnoDB and so
+ can be set and queried by users. */
+
+ IB_COL_CUSTOM2 = 16, /*!< Custom precision type, this is
+ a bit that is ignored by InnoDB and so
+ can be set and queried by users. */
+
+ IB_COL_CUSTOM3 = 32 /*!< Custom precision type, this is
+ a bit that is ignored by InnoDB and so
+ can be set and queried by users. */
+} ib_col_attr_t;
+
+/* Note: must match lock0types.h */
+/** @enum ib_lck_mode_t InnoDB lock modes. */
+typedef enum {
+ IB_LOCK_IS = 0, /*!< Intention shared, an intention
+ lock should be used to lock tables */
+
+ IB_LOCK_IX, /*!< Intention exclusive, an intention
+ lock should be used to lock tables */
+
+ IB_LOCK_S, /*!< Shared locks should be used to
+ lock rows */
+
+ IB_LOCK_X, /*!< Exclusive locks should be used to
+ lock rows*/
+
+ IB_LOCK_TABLE_X, /*!< exclusive table lock */
+
+ IB_LOCK_NONE, /*!< This is used internally to note
+ consistent read */
+
+ IB_LOCK_NUM = IB_LOCK_NONE /*!< number of lock modes */
+} ib_lck_mode_t;
+
+typedef enum {
+ IB_CLUSTERED = 1, /*!< clustered index */
+ IB_UNIQUE = 2 /*!< unique index */
+} ib_index_type_t;
+
+/** @enum ib_srch_mode_t InnoDB cursor search modes for ib_cursor_moveto().
+Note: Values must match those found in page0cur.h */
+typedef enum {
+ IB_CUR_G = 1, /*!< If search key is not found then
+ position the cursor on the row that
+ is greater than the search key */
+
+ IB_CUR_GE = 2, /*!< If the search key not found then
+ position the cursor on the row that
+ is greater than or equal to the search
+ key */
+
+ IB_CUR_L = 3, /*!< If search key is not found then
+ position the cursor on the row that
+ is less than the search key */
+
+ IB_CUR_LE = 4 /*!< If search key is not found then
+ position the cursor on the row that
+ is less than or equal to the search
+ key */
+} ib_srch_mode_t;
+
+/** @enum ib_match_mode_t Various match modes used by ib_cursor_moveto() */
+typedef enum {
+ IB_CLOSEST_MATCH, /*!< Closest match possible */
+
+ IB_EXACT_MATCH, /*!< Search using a complete key
+ value */
+
+ IB_EXACT_PREFIX /*!< Search using a key prefix which
+ must match to rows: the prefix may
+ contain an incomplete field (the
+ last field in prefix may be just
+ a prefix of a fixed length column) */
+} ib_match_mode_t;
+
+/** @struct ib_col_meta_t InnoDB column meta data. */
+typedef struct {
+ ib_col_type_t type; /*!< Type of the column */
+
+ ib_col_attr_t attr; /*!< Column attributes */
+
+ ib_u32_t type_len; /*!< Length of type */
+
+ ib_u16_t client_type; /*!< 16 bits of data relevant only to
+ the client. InnoDB doesn't care */
+
+ ib_charset_t* charset; /*!< Column charset */
+} ib_col_meta_t;
+
+/* Note: Must be in sync with trx0trx.h */
+/** @enum ib_trx_state_t The transaction state can be queried using the
+ib_trx_state() function. The InnoDB deadlock monitor can roll back a
+transaction and users should be prepared for this, especially where there
+is high contention. The way to determine the state of the transaction is to
+query it's state and check. */
+typedef enum {
+ IB_TRX_NOT_STARTED, /*!< Has not started yet, the
+ transaction has not ben started yet.*/
+
+ IB_TRX_ACTIVE, /*!< The transaction is currently
+ active and needs to be either
+ committed or rolled back. */
+
+ IB_TRX_COMMITTED_IN_MEMORY, /*!< Not committed to disk yet */
+
+ IB_TRX_PREPARED /*!< Support for 2PC/XA */
+} ib_trx_state_t;
+
+/* Note: Must be in sync with trx0trx.h */
+/** @enum ib_trx_level_t Transaction isolation levels */
+typedef enum {
+ IB_TRX_READ_UNCOMMITTED = 0, /*!< Dirty read: non-locking SELECTs are
+ performed so that we do not look at a
+ possible earlier version of a record;
+ thus they are not 'consistent' reads
+ under this isolation level; otherwise
+ like level 2 */
+
+ IB_TRX_READ_COMMITTED = 1, /*!< Somewhat Oracle-like isolation,
+ except that in range UPDATE and DELETE
+ we must block phantom rows with
+ next-key locks; SELECT ... FOR UPDATE
+ and ... LOCK IN SHARE MODE only lock
+ the index records, NOT the gaps before
+ them, and thus allow free inserting;
+ each consistent read reads its own
+ snapshot */
+
+ IB_TRX_REPEATABLE_READ = 2, /*!< All consistent reads in the same
+ trx read the same snapshot; full
+ next-key locking used in locking reads
+ to block insertions into gaps */
+
+ IB_TRX_SERIALIZABLE = 3 /*!< All plain SELECTs are converted to
+ LOCK IN SHARE MODE reads */
+} ib_trx_level_t;
+
+/** Generical InnoDB callback prototype. */
+typedef void (*ib_cb_t)(void);
+
+#define IB_CFG_BINLOG_ENABLED 0x1
+#define IB_CFG_MDL_ENABLED 0x2
+#define IB_CFG_DISABLE_ROWLOCK 0x4
+
+/** The first argument to the InnoDB message logging function. By default
+it's set to stderr. You should treat ib_msg_stream_t as a void*, since
+it will probably change in the future. */
+typedef FILE* ib_msg_stream_t;
+
+/** All log messages are written to this function.It should have the same
+behavior as fprintf(3). */
+typedef int (*ib_msg_log_t)(ib_msg_stream_t, const char*, ...);
+
+/* Note: This is to make it easy for API users to have type
+checking for arguments to our functions. Making it ib_opaque_t
+by itself will result in pointer decay resulting in subverting
+of the compiler's type checking. */
+
+/** InnoDB tuple handle. This handle can refer to either a cluster index
+tuple or a secondary index tuple. There are two types of tuples for each
+type of index, making a total of four types of tuple handles. There
+is a tuple for reading the entire row contents and another for searching
+on the index key. */
+typedef struct ib_tuple_t* ib_tpl_t;
+
+/** InnoDB transaction handle, all database operations need to be covered
+by transactions. This handle represents a transaction. The handle can be
+created with ib_trx_begin(), you commit your changes with ib_trx_commit()
+and undo your changes using ib_trx_rollback(). If the InnoDB deadlock
+monitor rolls back the transaction then you need to free the transaction
+using the function ib_trx_release(). You can query the state of an InnoDB
+transaction by calling ib_trx_state(). */
+typedef struct trx_t* ib_trx_t;
+
+/** InnoDB cursor handle */
+typedef struct ib_cursor_t* ib_crsr_t;
+
+/*************************************************************//**
+This function is used to compare two data fields for which the data type
+is such that we must use the client code to compare them.
+
+@param col_meta column meta data
+@param p1 key
+@oaram p1_len key length
+@param p2 second key
+@param p2_len second key length
+@return 1, 0, -1, if a is greater, equal, less than b, respectively */
+
+typedef int (*ib_client_cmp_t)(
+ const ib_col_meta_t* col_meta,
+ const ib_byte_t* p1,
+ ib_ulint_t p1_len,
+ const ib_byte_t* p2,
+ ib_ulint_t p2_len);
+
+/* This should be the same as univ.i */
+/** Represents SQL_NULL length */
+#define IB_SQL_NULL 0xFFFFFFFF
+/** The number of system columns in a row. */
+#define IB_N_SYS_COLS 3
+
+/** The maximum length of a text column. */
+#define MAX_TEXT_LEN 4096
+
+/* MySQL uses 3 byte UTF-8 encoding. */
+/** The maximum length of a column name in a table schema. */
+#define IB_MAX_COL_NAME_LEN (64 * 3)
+
+/** The maximum length of a table name (plus database name). */
+#define IB_MAX_TABLE_NAME_LEN (64 * 3) * 2
+
+/*****************************************************************//**
+Start a transaction that's been rolled back. This special function
+exists for the case when InnoDB's deadlock detector has rolledack
+a transaction. While the transaction has been rolled back the handle
+is still valid and can be reused by calling this function. If you
+don't want to reuse the transaction handle then you can free the handle
+by calling ib_trx_release().
+@return innobase txn handle */
+
+ib_err_t
+ib_trx_start(
+/*=========*/
+ ib_trx_t ib_trx, /*!< in: transaction to restart */
+ ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */
+ void* thd); /*!< in: THD */
+
+/*****************************************************************//**
+Begin a transaction. This will allocate a new transaction handle and
+put the transaction in the active state.
+@return innobase txn handle */
+
+ib_trx_t
+ib_trx_begin(
+/*=========*/
+ ib_trx_level_t ib_trx_level); /*!< in: trx isolation level */
+
+/*****************************************************************//**
+Query the transaction's state. This function can be used to check for
+the state of the transaction in case it has been rolled back by the
+InnoDB deadlock detector. Note that when a transaction is selected as
+a victim for rollback, InnoDB will always return an appropriate error
+code indicating this. @see DB_DEADLOCK, @see DB_LOCK_TABLE_FULL and
+@see DB_LOCK_WAIT_TIMEOUT
+@return transaction state */
+
+ib_trx_state_t
+ib_trx_state(
+/*=========*/
+ ib_trx_t ib_trx); /*!< in: trx handle */
+
+/*****************************************************************//**
+Release the resources of the transaction. If the transaction was
+selected as a victim by InnoDB and rolled back then use this function
+to free the transaction handle.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_release(
+/*===========*/
+ ib_trx_t ib_trx); /*!< in: trx handle */
+
+/*****************************************************************//**
+Commit a transaction. This function will release the schema latches too.
+It will also free the transaction handle.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_commit(
+/*==========*/
+ ib_trx_t ib_trx); /*!< in: trx handle */
+
+/*****************************************************************//**
+Rollback a transaction. This function will release the schema latches too.
+It will also free the transaction handle.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_trx_rollback(
+/*============*/
+ ib_trx_t ib_trx); /*!< in: trx handle */
+
+/*****************************************************************//**
+Open an InnoDB table and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_table_using_id(
+/*==========================*/
+ ib_id_u64_t table_id, /*!< in: table id of table to open */
+ ib_trx_t ib_trx, /*!< in: Current transaction handle
+ can be NULL */
+ ib_crsr_t* ib_crsr); /*!< out,own: InnoDB cursor */
+
+/*****************************************************************//**
+Open an InnoDB index and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_index_using_id(
+/*==========================*/
+ ib_id_u64_t index_id, /*!< in: index id of index to open */
+ ib_trx_t ib_trx, /*!< in: Current transaction handle
+ can be NULL */
+ ib_crsr_t* ib_crsr); /*!< out: InnoDB cursor */
+
+/*****************************************************************//**
+Open an InnoDB secondary index cursor and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_index_using_name(
+/*============================*/
+ ib_crsr_t ib_open_crsr, /*!< in: open/active cursor */
+ const char* index_name, /*!< in: secondary index name */
+ ib_crsr_t* ib_crsr, /*!< out,own: InnoDB index cursor */
+ int* idx_type, /*!< out: index is cluster index */
+ ib_id_u64_t* idx_id); /*!< out: index id */
+
+/*****************************************************************//**
+Open an InnoDB table by name and return a cursor handle to it.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_open_table(
+/*=================*/
+ const char* name, /*!< in: table name */
+ ib_trx_t ib_trx, /*!< in: Current transaction handle
+ can be NULL */
+ ib_crsr_t* ib_crsr); /*!< out,own: InnoDB cursor */
+
+/*****************************************************************//**
+Reset the cursor.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_reset(
+/*============*/
+ ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
+
+
+/*****************************************************************//**
+set a cursor trx to NULL*/
+
+void
+ib_cursor_clear_trx(
+/*================*/
+ ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Close an InnoDB table and free the cursor.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_close(
+/*============*/
+ ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Close the table, decrement n_ref_count count.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_close_table(
+/*==================*/
+ ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+update the cursor with new transactions and also reset the cursor
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_new_trx(
+/*==============*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_trx_t ib_trx); /*!< in: transaction */
+
+/*****************************************************************//**
+Commit the transaction in a cursor
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_commit_trx(
+/*=================*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_trx_t ib_trx); /*!< in: transaction */
+
+/********************************************************************//**
+Open a table using the table name, if found then increment table ref count.
+@return table instance if found */
+
+void*
+ib_open_table_by_name(
+/*==================*/
+ const char* name); /*!< in: table name to lookup */
+
+/*****************************************************************//**
+Insert a row to a table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_insert_row(
+/*=================*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor instance */
+ const ib_tpl_t ib_tpl); /*!< in: tuple to insert */
+
+/*****************************************************************//**
+Update a row in a table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_update_row(
+/*=================*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ const ib_tpl_t ib_old_tpl, /*!< in: Old tuple in table */
+ const ib_tpl_t ib_new_tpl); /*!< in: New tuple to update */
+
+/*****************************************************************//**
+Delete a row in a table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_delete_row(
+/*=================*/
+ ib_crsr_t ib_crsr); /*!< in: cursor instance */
+
+/*****************************************************************//**
+Read current row.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_read_row(
+/*===============*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_tpl_t ib_tpl); /*!< out: read cols into this tuple */
+
+/*****************************************************************//**
+Move cursor to the first record in the table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_first(
+/*============*/
+ ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Move cursor to the last record in the table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_last(
+/*===========*/
+ ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Move cursor to the next record in the table.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_next(
+/*===========*/
+ ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Search for key.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_cursor_moveto(
+/*=============*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_tpl_t ib_tpl, /*!< in: Key to search for */
+ ib_srch_mode_t ib_srch_mode); /*!< in: search mode */
+
+/*****************************************************************//**
+Set the match mode for ib_cursor_move(). */
+
+void
+ib_cursor_set_match_mode(
+/*=====================*/
+ ib_crsr_t ib_crsr, /*!< in: Cursor instance */
+ ib_match_mode_t match_mode); /*!< in: ib_cursor_moveto match mode */
+
+/*****************************************************************//**
+Set a column of the tuple. Make a copy using the tuple's heap.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_col_set_value(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t col_no, /*!< in: column index in tuple */
+ const void* src, /*!< in: data value */
+ ib_ulint_t len); /*!< in: data value len */
+
+/*****************************************************************//**
+Get the size of the data available in the column the tuple.
+@return bytes avail or IB_SQL_NULL */
+
+ib_ulint_t
+ib_col_get_len(
+/*===========*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t i); /*!< in: column index in tuple */
+
+/*****************************************************************//**
+Copy a column value from the tuple.
+@return bytes copied or IB_SQL_NULL */
+
+ib_ulint_t
+ib_col_copy_value(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: tuple instance */
+ ib_ulint_t i, /*!< in: column index in tuple */
+ void* dst, /*!< out: copied data value */
+ ib_ulint_t len); /*!< in: max data value len to copy */
+
+/*************************************************************//**
+Read a signed int 8 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i8(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i8_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 8 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u8(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u8_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 16 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i16(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i16_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 16 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u16(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u16_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 32 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i32(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i32_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 32 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u32(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u32_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read a signed int 64 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_i64(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_i64_t* ival); /*!< out: integer value */
+
+/*************************************************************//**
+Read an unsigned int 64 bit column from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_u64(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_u64_t* ival); /*!< out: integer value */
+
+/*****************************************************************//**
+Get a column value pointer from the tuple.
+@return NULL or pointer to buffer */
+
+const void*
+ib_col_get_value(
+/*=============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i); /*!< in: column number */
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return len of column data */
+
+ib_ulint_t
+ib_col_get_meta(
+/*============*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t i, /*!< in: column number */
+ ib_col_meta_t* ib_col_meta); /*!< out: column meta data */
+
+/*****************************************************************//**
+"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
+@return new tuple, or NULL */
+
+ib_tpl_t
+ib_tuple_clear(
+/*============*/
+ ib_tpl_t ib_tpl); /*!< in: InnoDB tuple */
+
+/*****************************************************************//**
+Create a new cluster key search tuple and copy the contents of the
+secondary index key tuple columns that refer to the cluster index record
+to the cluster key. It does a deep copy of the column data.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_tuple_get_cluster_key(
+/*=====================*/
+ ib_crsr_t ib_crsr, /*!< in: secondary index cursor */
+ ib_tpl_t* ib_dst_tpl, /*!< out,own: destination tuple */
+ const ib_tpl_t ib_src_tpl); /*!< in: source tuple */
+
+/*****************************************************************//**
+Copy the contents of source tuple to destination tuple. The tuples
+must be of the same type and belong to the same table/index.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_tuple_copy(
+/*==========*/
+ ib_tpl_t ib_dst_tpl, /*!< in: destination tuple */
+ const ib_tpl_t ib_src_tpl); /*!< in: source tuple */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return tuple for current index */
+
+ib_tpl_t
+ib_sec_search_tuple_create(
+/*=======================*/
+ ib_crsr_t ib_crsr); /*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for index/table search.
+@return tuple for current index */
+
+ib_tpl_t
+ib_sec_read_tuple_create(
+/*=====================*/
+ ib_crsr_t ib_crsr); /*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple used for table key operations.
+@return tuple for current table */
+
+ib_tpl_t
+ib_clust_search_tuple_create(
+/*=========================*/
+ ib_crsr_t ib_crsr); /*!< in: Cursor instance */
+
+/*****************************************************************//**
+Create an InnoDB tuple for table row operations.
+@return tuple for current table */
+
+ib_tpl_t
+ib_clust_read_tuple_create(
+/*=======================*/
+ ib_crsr_t ib_crsr); /*!< in: Cursor instance */
+
+/*****************************************************************//**
+Return the number of user columns in the tuple definition.
+@return number of user columns */
+
+ib_ulint_t
+ib_tuple_get_n_user_cols(
+/*=====================*/
+ const ib_tpl_t ib_tpl); /*!< in: Tuple for current table */
+
+/*****************************************************************//**
+Return the number of columns in the tuple definition.
+@return number of columns */
+
+ib_ulint_t
+ib_tuple_get_n_cols(
+/*================*/
+ const ib_tpl_t ib_tpl); /*!< in: Tuple for current table */
+
+/*****************************************************************//**
+Destroy an InnoDB tuple. */
+
+void
+ib_tuple_delete(
+/*============*/
+ ib_tpl_t ib_tpl); /*!< in,own: Tuple instance to delete */
+
+/*****************************************************************//**
+Truncate a table. The cursor handle will be closed and set to NULL
+on success.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_truncate(
+/*===============*/
+ ib_crsr_t* ib_crsr, /*!< in/out: cursor for table
+ to truncate */
+ ib_id_u64_t* table_id); /*!< out: new table id */
+
+/*****************************************************************//**
+Get a table id.
+@return DB_SUCCESS if found */
+
+ib_err_t
+ib_table_get_id(
+/*============*/
+ const char* table_name, /*!< in: table to find */
+ ib_id_u64_t* table_id); /*!< out: table id if found */
+
+/*****************************************************************//**
+Get an index id.
+@return DB_SUCCESS if found */
+
+ib_err_t
+ib_index_get_id(
+/*============*/
+ const char* table_name, /*!< in: find index for this table */
+ const char* index_name, /*!< in: index to find */
+ ib_id_u64_t* index_id); /*!< out: index id if found */
+
+/*****************************************************************//**
+Check if cursor is positioned.
+@return IB_TRUE if positioned */
+
+ib_bool_t
+ib_cursor_is_positioned(
+/*====================*/
+ const ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
+
+/*****************************************************************//**
+Checks if the data dictionary is latched in exclusive mode by a
+user transaction.
+@return TRUE if exclusive latch */
+
+ib_bool_t
+ib_schema_lock_is_exclusive(
+/*========================*/
+ const ib_trx_t ib_trx); /*!< in: transaction */
+
+/*****************************************************************//**
+Lock an InnoDB cursor/table.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_lock(
+/*===========*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set the Lock an InnoDB table using the table id.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_table_lock(
+/*===========*/
+ ib_trx_t ib_trx, /*!< in/out: transaction */
+ ib_id_u64_t table_id, /*!< in: table id */
+ ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set the Lock mode of the cursor.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_cursor_set_lock_mode(
+/*====================*/
+ ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
+ ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
+
+/*****************************************************************//**
+Set need to access clustered index record flag. */
+
+void
+ib_cursor_set_cluster_access(
+/*=========================*/
+ ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i8(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i8_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i16(
+/*=================*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i16_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i32(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i32_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_i64(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_i64_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u8(
+/*==============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_u8_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u16(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_u16_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u32(
+/*=================*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_u32_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Write an integer value to a column. Integers are stored in big-endian
+format and will need to be converted from the host format.
+@return DB_SUCESS or error */
+
+ib_err_t
+ib_tuple_write_u64(
+/*===============*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ ib_u64_t val); /*!< in: value to write */
+
+/*****************************************************************//**
+Inform the cursor that it's the start of an SQL statement. */
+
+void
+ib_cursor_stmt_begin(
+/*=================*/
+ ib_crsr_t ib_crsr); /*!< in: cursor */
+
+/*****************************************************************//**
+Write a double value to a column.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_write_double(
+/*==================*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ int col_no, /*!< in: column number */
+ double val); /*!< in: value to write */
+
+/*************************************************************//**
+Read a double column value from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_double(
+/*=================*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t col_no, /*!< in: column number */
+ double* dval); /*!< out: double value */
+
+/*****************************************************************//**
+Write a float value to a column.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_write_float(
+/*=================*/
+ ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
+ int col_no, /*!< in: column number */
+ float val); /*!< in: value to write */
+
+/*************************************************************//**
+Read a float value from an InnoDB tuple.
+@return DB_SUCCESS or error */
+
+ib_err_t
+ib_tuple_read_float(
+/*================*/
+ ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
+ ib_ulint_t col_no, /*!< in: column number */
+ float* fval); /*!< out: float value */
+
+/*****************************************************************//**
+Get a column type, length and attributes from the tuple.
+@return len of column data */
+
+const char*
+ib_col_get_name(
+/*============*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_ulint_t i); /*!< in: column index in tuple */
+
+/*****************************************************************//**
+Get an index field name from the cursor.
+@return name of the field */
+
+const char*
+ib_get_idx_field_name(
+/*==================*/
+ ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
+ ib_ulint_t i); /*!< in: column index in tuple */
+
+/*****************************************************************//**
+Truncate a table.
+@return DB_SUCCESS or error code */
+
+ib_err_t
+ib_table_truncate(
+/*==============*/
+ const char* table_name, /*!< in: table name */
+ ib_id_u64_t* table_id); /*!< out: new table id */
+
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return DB_SUCCESS or error number */
+
+ib_err_t
+ib_close_thd(
+/*=========*/
+ void* thd); /*!< in: handle to the MySQL
+ thread of the user whose resources
+ should be free'd */
+
+/*****************************************************************//**
+Get generic configure status
+@return configure status*/
+
+int
+ib_cfg_get_cfg();
+/*============*/
+
+/*****************************************************************//**
+Check whether the table name conforms to our requirements. Currently
+we only do a simple check for the presence of a '/'.
+@return DB_SUCCESS or err code */
+
+ib_err_t
+ib_table_name_check(
+/*================*/
+ const char* name); /*!< in: table name to check */
+
+/*****************************************************************//**
+Return isolation configuration set by "innodb_api_trx_level"
+@return trx isolation level*/
+
+ib_trx_state_t
+ib_cfg_trx_level();
+/*==============*/
+
+/*****************************************************************//**
+Return configure value for background commit interval (in seconds)
+@return background commit interval (in seconds) */
+
+ib_ulint_t
+ib_cfg_bk_commit_interval();
+/*=======================*/
+
+/*****************************************************************//**
+Get a trx start time.
+@return trx start_time */
+
+ib_u64_t
+ib_trx_get_start_time(
+/*==================*/
+ ib_trx_t ib_trx); /*!< in: transaction */
+
+#endif /* api0api_h */
diff --git a/storage/innobase/include/api0misc.h b/storage/innobase/include/api0misc.h
new file mode 100644
index 00000000000..fcd748390d1
--- /dev/null
+++ b/storage/innobase/include/api0misc.h
@@ -0,0 +1,78 @@
+/*****************************************************************************
+
+Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/api0misc.h
+InnoDB Native API
+
+3/20/2011 Jimmy Yang extracted from Embedded InnoDB
+2008 Created by Sunny Bains
+*******************************************************/
+
+#ifndef api0misc_h
+#define api0misc_h
+
+#include "univ.i"
+#include "os0file.h"
+#include "que0que.h"
+#include "trx0trx.h"
+
+/** Whether binlog is enabled for applications using InnoDB APIs */
+extern my_bool ib_binlog_enabled;
+
+/** Whether MySQL MDL is enabled for applications using InnoDB APIs */
+extern my_bool ib_mdl_enabled;
+
+/** Whether InnoDB row lock is disabled for applications using InnoDB APIs */
+extern my_bool ib_disable_row_lock;
+
+/** configure value for transaction isolation level */
+extern ulong ib_trx_level_setting;
+
+/** configure value for background commit interval (in seconds) */
+extern ulong ib_bk_commit_interval;
+
+/********************************************************************
+Handles user errors and lock waits detected by the database engine.
+@return TRUE if it was a lock wait and we should continue running
+the query thread */
+UNIV_INTERN
+ibool
+ib_handle_errors(
+/*=============*/
+ dberr_t* new_err, /*!< out: possible new error
+ encountered in lock wait, or if
+ no new error, the value of
+ trx->error_state at the entry of this
+ function */
+ trx_t* trx, /*!< in: transaction */
+ que_thr_t* thr, /*!< in: query thread */
+ trx_savept_t* savept); /*!< in: savepoint or NULL */
+
+/*************************************************************************
+Sets a lock on a table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+ib_trx_lock_table_with_retry(
+/*=========================*/
+ trx_t* trx, /*!< in/out: transaction */
+ dict_table_t* table, /*!< in: table to lock */
+ enum lock_mode mode); /*!< in: lock mode */
+
+#endif /* api0misc_h */
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 5592995d4b2..b99b0c0cd7b 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -92,6 +93,17 @@ insert/delete buffer when the record is not in the buffer pool. */
buffer when the record is not in the buffer pool. */
#define BTR_DELETE 8192
+/** In the case of BTR_SEARCH_LEAF or BTR_MODIFY_LEAF, the caller is
+already holding an S latch on the index tree */
+#define BTR_ALREADY_S_LATCHED 16384
+
+#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \
+ ((latch_mode) & ~(BTR_INSERT \
+ | BTR_DELETE_MARK \
+ | BTR_DELETE \
+ | BTR_ESTIMATE \
+ | BTR_IGNORE_SEC_UNIQUE \
+ | BTR_ALREADY_S_LATCHED))
#endif /* UNIV_HOTBACKUP */
/**************************************************************//**
@@ -118,7 +130,7 @@ btr_corruption_report(
#ifdef UNIV_BLOB_DEBUG
# include "ut0rbt.h"
/** An index->blobs entry for keeping track of off-page column references */
-struct btr_blob_dbg_struct
+struct btr_blob_dbg_t
{
unsigned blob_page_no:32; /*!< first BLOB page number */
unsigned ref_page_no:32; /*!< referring page number */
@@ -207,8 +219,32 @@ UNIV_INTERN
page_t*
btr_root_get(
/*=========*/
+ const dict_index_t* index, /*!< in: index tree */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
+
+/**************************************************************//**
+Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
+@return error code, or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+btr_root_adjust_on_import(
+/*======================*/
+ const dict_index_t* index) /*!< in: index tree */
+ __attribute__((nonnull, warn_unused_result));
+
+/**************************************************************//**
+Gets the height of the B-tree (the level of the root, when the leaf
+level is assumed to be 0). The caller must hold an S or X latch on
+the index.
+@return tree height (level of the root) */
+UNIV_INTERN
+ulint
+btr_height_get(
+/*===========*/
dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull, warn_unused_result));
/**************************************************************//**
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
@@ -269,7 +305,8 @@ UNIV_INLINE
index_id_t
btr_page_get_index_id(
/*==================*/
- const page_t* page); /*!< in: index page */
+ const page_t* page) /*!< in: index page */
+ __attribute__((nonnull, pure, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/********************************************************//**
Gets the node level field in an index page.
@@ -278,16 +315,9 @@ UNIV_INLINE
ulint
btr_page_get_level_low(
/*===================*/
- const page_t* page); /*!< in: index page */
-/********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ const page_t* page) /*!< in: index page */
+ __attribute__((nonnull, pure, warn_unused_result));
+#define btr_page_get_level(page, mtr) btr_page_get_level_low(page)
/********************************************************//**
Gets the next index page number.
@return next page number */
@@ -296,7 +326,8 @@ ulint
btr_page_get_next(
/*==============*/
const page_t* page, /*!< in: index page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************//**
Gets the previous index page number.
@return prev page number */
@@ -305,7 +336,8 @@ ulint
btr_page_get_prev(
/*==============*/
const page_t* page, /*!< in: index page */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Gets pointer to the previous user record in the tree. It is assumed
that the caller has appropriate latches on the page and its neighbor.
@@ -315,8 +347,9 @@ rec_t*
btr_get_prev_user_rec(
/*==================*/
rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
+ mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
needed, also to the previous page */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Gets pointer to the next user record in the tree. It is assumed
that the caller has appropriate latches on the page and its neighbor.
@@ -326,8 +359,9 @@ rec_t*
btr_get_next_user_rec(
/*==================*/
rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
+ mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
needed, also to the next page */
+ __attribute__((nonnull, warn_unused_result));
/**************************************************************//**
Releases the latch on a leaf page and bufferunfixes it. */
UNIV_INLINE
@@ -337,7 +371,8 @@ btr_leaf_page_release(
buf_block_t* block, /*!< in: buffer block */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
BTR_MODIFY_LEAF */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
/**************************************************************//**
Gets the child node file address in a node pointer.
NOTE: the offsets array must contain all offsets for the record since
@@ -350,7 +385,8 @@ ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
const rec_t* rec, /*!< in: node pointer record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/************************************************************//**
Creates the root node for a new index tree.
@return page number of the created root, FIL_NULL if did not succeed */
@@ -364,7 +400,8 @@ btr_create(
or 0 for uncompressed pages */
index_id_t index_id,/*!< in: index id */
dict_index_t* index, /*!< in: index */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
+ __attribute__((nonnull));
/************************************************************//**
Frees a B-tree except the root page, which MUST be freed after this
by calling btr_free_root. */
@@ -386,7 +423,8 @@ btr_free_root(
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint root_page_no, /*!< in: root page number */
- mtr_t* mtr); /*!< in/out: mini-transaction */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
/*************************************************************//**
Makes tree one level higher by splitting the root, and inserts
the tuple. It is assumed that mtr contains an x-latch on the tree.
@@ -398,13 +436,18 @@ UNIV_INTERN
rec_t*
btr_root_raise_and_insert(
/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
on the root page; when the function returns,
the cursor is positioned on the predecessor
of the inserted record */
+ ulint** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Reorganizes an index page.
IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
@@ -418,7 +461,8 @@ btr_page_reorganize(
/*================*/
buf_block_t* block, /*!< in: page to be reorganized */
dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
/*************************************************************//**
Decides if the page should be split at the convergence point of
inserts converging to left.
@@ -428,9 +472,10 @@ ibool
btr_page_get_split_rec_to_left(
/*===========================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec);/*!< out: if split recommended,
+ rec_t** split_rec)/*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Decides if the page should be split at the convergence point of
inserts converging to right.
@@ -440,9 +485,10 @@ ibool
btr_page_get_split_rec_to_right(
/*============================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec);/*!< out: if split recommended,
+ rec_t** split_rec)/*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Splits an index page to halves and inserts the tuple. It is assumed
that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
@@ -456,12 +502,17 @@ UNIV_INTERN
rec_t*
btr_page_split_and_insert(
/*======================*/
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
function returns, the cursor is positioned
on the predecessor of the inserted record */
+ ulint** offsets,/*!< out: offsets on inserted record */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
const dtuple_t* tuple, /*!< in: tuple to insert */
ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************//**
Inserts a data tuple to a tree on a non-leaf level. It is assumed
that mtr holds an x-latch on the tree. */
@@ -469,14 +520,16 @@ UNIV_INTERN
void
btr_insert_on_non_leaf_level_func(
/*==============================*/
+ ulint flags, /*!< in: undo logging and locking flags */
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: level, must be > 0 */
dtuple_t* tuple, /*!< in: the record to be inserted */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
-# define btr_insert_on_non_leaf_level(i,l,t,m) \
- btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
+# define btr_insert_on_non_leaf_level(f,i,l,t,m) \
+ btr_insert_on_non_leaf_level_func(f,i,l,t,__FILE__,__LINE__,m)
#endif /* !UNIV_HOTBACKUP */
/****************************************************************//**
Sets a record as the predefined minimum record. */
@@ -485,7 +538,8 @@ void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /*!< in/out: record */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Deletes on the upper level the node pointer to a page. */
@@ -495,7 +549,8 @@ btr_node_ptr_delete(
/*================*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: page whose node pointer is deleted */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
#ifdef UNIV_DEBUG
/************************************************************//**
Checks that the node pointer to a page is appropriate.
@@ -506,7 +561,8 @@ btr_check_node_ptr(
/*===============*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: index page */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
/*************************************************************//**
Tries to merge the page first to the left immediate brother if such a
@@ -540,7 +596,8 @@ btr_discard_page(
/*=============*/
btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
the root page */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/****************************************************************//**
Parses the redo log record for setting an index record as the predefined
@@ -554,7 +611,8 @@ btr_parse_set_min_rec_mark(
byte* end_ptr,/*!< in: buffer end */
ulint comp, /*!< in: nonzero=compact page format */
page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
+ __attribute__((nonnull(1,2), warn_unused_result));
/***********************************************************//**
Parses a redo log record of reorganizing a page.
@return end of log record or NULL */
@@ -565,8 +623,10 @@ btr_parse_page_reorganize(
byte* ptr, /*!< in: buffer */
byte* end_ptr,/*!< in: buffer end */
dict_index_t* index, /*!< in: record descriptor */
+ bool compressed,/*!< in: true if compressed page */
buf_block_t* block, /*!< in: page to be reorganized, or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
+ __attribute__((nonnull(1,2,3), warn_unused_result));
#ifndef UNIV_HOTBACKUP
/**************************************************************//**
Gets the number of pages in a B-tree.
@@ -612,7 +672,8 @@ btr_page_free(
/*==========*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
/**************************************************************//**
Frees a file page used in an index tree. Can be used also to BLOB
external storage pages, because the page level 0 can be given as an
@@ -624,7 +685,8 @@ btr_page_free_low(
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
ulint level, /*!< in: page level */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
#ifdef UNIV_BTR_PRINT
/*************************************************************//**
Prints size info of a B-tree. */
@@ -632,7 +694,8 @@ UNIV_INTERN
void
btr_print_size(
/*===========*/
- dict_index_t* index); /*!< in: index tree */
+ dict_index_t* index) /*!< in: index tree */
+ __attribute__((nonnull));
/**************************************************************//**
Prints directories and other info of all nodes in the index. */
UNIV_INTERN
@@ -640,8 +703,9 @@ void
btr_print_index(
/*============*/
dict_index_t* index, /*!< in: index */
- ulint width); /*!< in: print this many entries from start
+ ulint width) /*!< in: print this many entries from start
and end */
+ __attribute__((nonnull));
#endif /* UNIV_BTR_PRINT */
/************************************************************//**
Checks the size and number of fields in a record based on the definition of
@@ -653,18 +717,20 @@ btr_index_rec_validate(
/*===================*/
const rec_t* rec, /*!< in: index record */
const dict_index_t* index, /*!< in: index */
- ibool dump_on_error); /*!< in: TRUE if the function
+ ibool dump_on_error) /*!< in: TRUE if the function
should print hex dump of record
and page on error */
+ __attribute__((nonnull, warn_unused_result));
/**************************************************************//**
Checks the consistency of an index tree.
@return TRUE if ok */
UNIV_INTERN
-ibool
+bool
btr_validate_index(
/*===============*/
- dict_index_t* index, /*!< in: index */
- trx_t* trx); /*!< in: transaction or NULL */
+ dict_index_t* index, /*!< in: index */
+ const trx_t* trx) /*!< in: transaction or 0 */
+ __attribute__((nonnull(1), warn_unused_result));
#define BTR_N_LEAF_PAGES 1
#define BTR_TOTAL_SIZE 2
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
index 6f7a66b12ac..00f50b5dcaf 100644
--- a/storage/innobase/include/btr0btr.ic
+++ b/storage/innobase/include/btr0btr.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -126,22 +126,6 @@ btr_page_get_level_low(
}
/********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level(
-/*===============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr __attribute__((unused)))
- /*!< in: mini-transaction handle */
-{
- ut_ad(page && mtr);
-
- return(btr_page_get_level_low(page));
-}
-
-/********************************************************//**
Sets the node level field in an index page. */
UNIV_INLINE
void
@@ -278,6 +262,7 @@ btr_node_ptr_get_child_page_no(
" in a node ptr record at offset %lu\n",
(ulong) page_offset(rec));
buf_page_print(page_align(rec), 0, 0);
+ ut_ad(0);
}
return(page_no);
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index f437575579e..edba1d1d77f 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -31,14 +31,22 @@ Created 10/16/1994 Heikki Tuuri
#include "page0cur.h"
#include "btr0types.h"
-/* Mode flags for btr_cur operations; these can be ORed */
-#define BTR_NO_UNDO_LOG_FLAG 1 /* do no undo logging */
-#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */
-#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the
- update vector or inserted entry */
-#define BTR_KEEP_POS_FLAG 8 /* btr_cur_pessimistic_update()
- must keep cursor position when
- moving columns to big_rec */
+/** Mode flags for btr_cur operations; these can be ORed */
+enum {
+ /** do no undo logging */
+ BTR_NO_UNDO_LOG_FLAG = 1,
+ /** do no record lock checking */
+ BTR_NO_LOCKING_FLAG = 2,
+ /** sys fields will be found in the update vector or inserted
+ entry */
+ BTR_KEEP_SYS_FLAG = 4,
+ /** btr_cur_pessimistic_update() must keep cursor position
+ when moving columns to big_rec */
+ BTR_KEEP_POS_FLAG = 8,
+ /** the caller is creating the index or wants to bypass the
+ index->info.online creation log */
+ BTR_CREATE_FLAG = 16
+};
#ifndef UNIV_HOTBACKUP
#include "que0types.h"
@@ -164,16 +172,19 @@ UNIV_INTERN
void
btr_cur_open_at_index_side_func(
/*============================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
+ bool from_left, /*!< in: true if open to the low end,
+ false if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
- btr_cur_t* cursor, /*!< in: cursor */
+ btr_cur_t* cursor, /*!< in/out: cursor */
+ ulint level, /*!< in: level to search for
+ (0=leaf) */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
-#define btr_cur_open_at_index_side(f,i,l,c,m) \
- btr_cur_open_at_index_side_func(f,i,l,c,__FILE__,__LINE__,m)
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
+#define btr_cur_open_at_index_side(f,i,l,c,lv,m) \
+ btr_cur_open_at_index_side_func(f,i,l,c,lv,__FILE__,__LINE__,m)
/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree. */
UNIV_INTERN
@@ -196,7 +207,7 @@ one record on the page, the insert will always succeed; this is to
prevent trying to split a page with just one record.
@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_optimistic_insert(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
@@ -204,6 +215,8 @@ btr_cur_optimistic_insert(
specified */
btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
cursor stays valid */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
@@ -212,11 +225,12 @@ btr_cur_optimistic_insert(
NULL */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr); /*!< in: mtr; if this function returns
+ mtr_t* mtr) /*!< in: mtr; if this function returns
DB_SUCCESS on a leaf page of a secondary
index in a compressed tablespace, the
mtr must be committed before latching
any further pages */
+ __attribute__((nonnull(2,3,4,5,6,7,10), warn_unused_result));
/*************************************************************//**
Performs an insert on a page of an index tree. It is assumed that mtr
holds an x-latch on the tree and on the cursor page. If the insert is
@@ -224,7 +238,7 @@ made on the leaf level, to avoid deadlocks, mtr must also own x-latches
to brothers of page, if those brothers exist.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_pessimistic_insert(
/*=======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
@@ -235,6 +249,9 @@ btr_cur_pessimistic_insert(
insertion will certainly succeed */
btr_cur_t* cursor, /*!< in: cursor after which to insert;
cursor stays valid */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
@@ -243,7 +260,8 @@ btr_cur_pessimistic_insert(
NULL */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull(2,3,4,5,6,7,10), warn_unused_result));
/*************************************************************//**
See if there is enough place in the page modification log to log
an update-in-place.
@@ -264,19 +282,23 @@ btr_cur_update_alloc_zip(
Updates a record when the update causes no size changes in its fields.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_update_in_place(
/*====================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
+ const ulint* offsets,/*!< in: offsets on cursor->page_cur.rec */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr; must be committed before
+ que_thr_t* thr, /*!< in: query thread, or NULL if
+ appropriate flags are set */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
+ __attribute__((warn_unused_result, nonnull(2,3,4,8)));
/*************************************************************//**
Tries to update a record on a page in an index tree. It is assumed that mtr
holds an x-latch on the page. The operation does not succeed if there is too
@@ -286,20 +308,25 @@ so that tree compression is recommended.
DB_UNDERFLOW if the page would become too empty, or DB_ZIP_OVERFLOW if
there is not enough space left on the compressed page */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_optimistic_update(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
+ ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr; must be committed before
+ que_thr_t* thr, /*!< in: query thread, or NULL if
+ appropriate flags are set */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
+ __attribute__((warn_unused_result, nonnull(2,3,4,5,9)));
/*************************************************************//**
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
@@ -307,7 +334,7 @@ update is made on the leaf level, to avoid deadlocks, mtr must also
own x-latches to brothers of page, if those brothers exist.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_pessimistic_update(
/*=======================*/
ulint flags, /*!< in: undo logging, locking, and rollback
@@ -315,7 +342,13 @@ btr_cur_pessimistic_update(
btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
cursor may become invalid if *big_rec == NULL
|| !(flags & BTR_KEEP_POS_FLAG) */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ mem_heap_t** offsets_heap,
+ /*!< in/out: pointer to memory heap
+ that can be emptied, or NULL */
+ mem_heap_t* entry_heap,
+ /*!< in/out: memory heap for allocating
+ big_rec and the index tuple */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
const upd_t* update, /*!< in: update vector; this is allowed also
@@ -323,9 +356,12 @@ btr_cur_pessimistic_update(
the values in update vector have no effect */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr; must be committed before
+ que_thr_t* thr, /*!< in: query thread, or NULL if
+ appropriate flags are set */
+ trx_id_t trx_id, /*!< in: transaction id */
+ mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
+ __attribute__((warn_unused_result, nonnull(2,3,4,5,6,7,11)));
/***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
@@ -333,15 +369,13 @@ of the deleting transaction, and in the roll ptr field pointer to the
undo log record created.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_del_mark_set_clust_rec(
/*===========================*/
- ulint flags, /*!< in: undo logging and locking flags */
buf_block_t* block, /*!< in/out: buffer block of the record */
rec_t* rec, /*!< in/out: record */
dict_index_t* index, /*!< in: clustered index of the record */
const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr) /*!< in: mtr */
__attribute__((nonnull));
@@ -349,7 +383,7 @@ btr_cur_del_mark_set_clust_rec(
Sets a secondary index record delete mark to TRUE or FALSE.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
UNIV_INTERN
-ulint
+dberr_t
btr_cur_del_mark_set_sec_rec(
/*=========================*/
ulint flags, /*!< in: locking flag */
@@ -382,16 +416,27 @@ but no latch on the whole tree.
@return TRUE if success, i.e., the page did not become too empty */
UNIV_INTERN
ibool
-btr_cur_optimistic_delete(
-/*======================*/
+btr_cur_optimistic_delete_func(
+/*===========================*/
btr_cur_t* cursor, /*!< in: cursor on the record to delete;
cursor stays valid: if deletion succeeds,
on function exit it points to the successor
of the deleted record */
- mtr_t* mtr); /*!< in: mtr; if this function returns
+# ifdef UNIV_DEBUG
+ ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
+# endif /* UNIV_DEBUG */
+ mtr_t* mtr) /*!< in: mtr; if this function returns
TRUE on a leaf page of a secondary
index, the mtr must be committed
before latching any further pages */
+ __attribute__((nonnull, warn_unused_result));
+# ifdef UNIV_DEBUG
+# define btr_cur_optimistic_delete(cursor, flags, mtr) \
+ btr_cur_optimistic_delete_func(cursor, flags, mtr)
+# else /* UNIV_DEBUG */
+# define btr_cur_optimistic_delete(cursor, flags, mtr) \
+ btr_cur_optimistic_delete_func(cursor, mtr)
+# endif /* UNIV_DEBUG */
/*************************************************************//**
Removes the record on which the tree cursor is positioned. Tries
to compress the page if its fillfactor drops below a threshold
@@ -404,7 +449,7 @@ UNIV_INTERN
ibool
btr_cur_pessimistic_delete(
/*=======================*/
- ulint* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
+ dberr_t* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
the latter may occur because we may have
to update node pointers on upper levels,
and in the case of variable length keys
@@ -417,8 +462,10 @@ btr_cur_pessimistic_delete(
if compression does not occur, the cursor
stays valid: it points to successor of
deleted record on function exit */
+ ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses a redo log record of updating a record in-place.
@@ -472,9 +519,10 @@ btr_estimate_n_rows_in_range(
ulint mode2); /*!< in: search mode for range end */
/*******************************************************************//**
Estimates the number of different key values in a given index, for
-each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals[] and
-the number of pages that were sampled is saved in index->stat_n_sample_sizes[].
+each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
+The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
+0..n_uniq-1) and the number of pages that were sampled is saved in
+index->stat_n_sample_sizes[].
If innodb_stats_method is nulls_ignored, we also record the number of
non-null values for each prefix and stored the estimates in
array index->stat_n_non_null_key_vals. */
@@ -528,7 +576,7 @@ The fields are stored on pages allocated from leaf node
file segment of the index tree.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
UNIV_INTERN
-enum db_err
+dberr_t
btr_store_big_rec_extern_fields(
/*============================*/
dict_index_t* index, /*!< in: index of rec; the index tree
@@ -662,8 +710,7 @@ limit, merging it to a neighbor is tried */
/** A slot in the path array. We store here info on a search path down the
tree. Each slot contains data on a single level of the tree. */
-typedef struct btr_path_struct btr_path_t;
-struct btr_path_struct{
+struct btr_path_t{
ulint nth_rec; /*!< index of the record
where the page cursor stopped on
this level (index in alphabetical
@@ -700,7 +747,7 @@ enum btr_cur_method {
/** The tree cursor: the definition appears here only for the compiler
to know struct size! */
-struct btr_cur_struct {
+struct btr_cur_t {
dict_index_t* index; /*!< index where positioned */
page_cur_t page_cur; /*!< page cursor */
purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */
@@ -737,7 +784,7 @@ struct btr_cur_struct {
for comparison to the adjacent user
record if that record is on a
different leaf page! (See the note in
- row_ins_duplicate_key.) */
+ row_ins_duplicate_error_in_clust.) */
ulint up_bytes; /*!< number of matched bytes to the
right at the time cursor positioned;
only used internally in searches: not
@@ -822,6 +869,11 @@ srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_sea_old;
#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+/* Flag to limit optimistic insert records */
+extern uint btr_cur_limit_optimistic_insert_debug;
+#endif /* UNIV_DEBUG */
+
#ifndef UNIV_NONINL
#include "btr0cur.ic"
#endif
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
index 540417e3062..080866c7465 100644
--- a/storage/innobase/include/btr0cur.ic
+++ b/storage/innobase/include/btr0cur.ic
@@ -27,6 +27,16 @@ Created 10/16/1994 Heikki Tuuri
#include "btr0btr.h"
#ifdef UNIV_DEBUG
+# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
+if (btr_cur_limit_optimistic_insert_debug\
+ && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
+ CODE;\
+}
+#else
+# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)
+#endif /* UNIV_DEBUG */
+
+#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the page cursor component of a tree cursor.
@return pointer to page cursor component */
@@ -135,6 +145,9 @@ btr_cur_compress_recommendation(
page = btr_cur_get_page(cursor);
+ LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
+ return(FALSE));
+
if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
|| ((btr_page_get_next(page, mtr) == FIL_NULL)
&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index a8eaac4690b..973fae382ab 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -146,13 +146,16 @@ UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
+ bool from_left, /*!< in: true if open to the low end,
+ false if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
- btr_pcur_t* pcur, /*!< in: cursor */
- ibool do_init, /*!< in: TRUE if should be initialized */
- mtr_t* mtr); /*!< in: mtr */
+ btr_pcur_t* pcur, /*!< in/out: cursor */
+ bool init_pcur, /*!< in: whether to initialize pcur */
+ ulint level, /*!< in: level to search for
+ (0=leaf) */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
/**************************************************************//**
Gets the up_match value for a pcur after a search.
@return number of matched fields at the cursor or to the right if
@@ -209,8 +212,17 @@ btr_pcur_open_at_rnd_pos_func(
#define btr_pcur_open_at_rnd_pos(i,l,c,m) \
btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
/**************************************************************//**
-Frees the possible old_rec_buf buffer of a persistent cursor and sets the
-latch mode of the persistent cursor to BTR_NO_LATCHES. */
+Frees the possible memory heap of a persistent cursor and sets the latch
+mode of the persistent cursor to BTR_NO_LATCHES.
+WARNING: this function does not release the latch on the page where the
+cursor is currently positioned. The latch is acquired by the
+"move to next/previous" family of functions. Since recursive shared locks
+are not allowed, you must take care (if using the cursor in S-mode) to
+manually release the latch by either calling
+btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr)
+or by committing the mini-transaction right after btr_pcur_close().
+A subsequent attempt to crawl the same page in the same mtr would cause
+an assertion failure. */
UNIV_INLINE
void
btr_pcur_close(
@@ -452,14 +464,14 @@ btr_pcur_move_to_prev_on_page(
/* The persistent B-tree cursor structure. This is used mainly for SQL
selects, updates, and deletes. */
-struct btr_pcur_struct{
+struct btr_pcur_t{
btr_cur_t btr_cur; /*!< a B-tree cursor */
ulint latch_mode; /*!< see TODO note below!
BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
BTR_MODIFY_TREE, or BTR_NO_LATCHES,
depending on the latching state of
the page and tree where the cursor is
- positioned; the last value means that
+ positioned; BTR_NO_LATCHES means that
the cursor is not currently positioned:
we say then that the cursor is
detached; it can be restored to
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
index a27033c4a7c..79afd7c322e 100644
--- a/storage/innobase/include/btr0pcur.ic
+++ b/storage/innobase/include/btr0pcur.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -429,7 +429,7 @@ btr_pcur_open_low(
btr_pcur_init(cursor);
- cursor->latch_mode = latch_mode;
+ cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
cursor->search_mode = mode;
/* Search with the tree cursor */
@@ -496,28 +496,26 @@ UNIV_INLINE
void
btr_pcur_open_at_index_side(
/*========================*/
- ibool from_left, /*!< in: TRUE if open to the low end,
- FALSE if to the high end */
+ bool from_left, /*!< in: true if open to the low end,
+ false if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
- btr_pcur_t* pcur, /*!< in: cursor */
- ibool do_init, /*!< in: TRUE if should be initialized */
- mtr_t* mtr) /*!< in: mtr */
+ btr_pcur_t* pcur, /*!< in/out: cursor */
+ bool init_pcur, /*!< in: whether to initialize pcur */
+ ulint level, /*!< in: level to search for
+ (0=leaf) */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
- pcur->latch_mode = latch_mode;
+ pcur->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
- if (from_left) {
- pcur->search_mode = PAGE_CUR_G;
- } else {
- pcur->search_mode = PAGE_CUR_L;
- }
+ pcur->search_mode = from_left ? PAGE_CUR_G : PAGE_CUR_L;
- if (do_init) {
+ if (init_pcur) {
btr_pcur_init(pcur);
}
btr_cur_open_at_index_side(from_left, index, latch_mode,
- btr_pcur_get_btr_cur(pcur), mtr);
+ btr_pcur_get_btr_cur(pcur), level, mtr);
pcur->pos_state = BTR_PCUR_IS_POSITIONED;
pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
@@ -556,7 +554,16 @@ btr_pcur_open_at_rnd_pos_func(
/**************************************************************//**
Frees the possible memory heap of a persistent cursor and sets the latch
-mode of the persistent cursor to BTR_NO_LATCHES. */
+mode of the persistent cursor to BTR_NO_LATCHES.
+WARNING: this function does not release the latch on the page where the
+cursor is currently positioned. The latch is acquired by the
+"move to next/previous" family of functions. Since recursive shared locks
+are not allowed, you must take care (if using the cursor in S-mode) to
+manually release the latch by either calling
+btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr)
+or by committing the mini-transaction right after btr_pcur_close().
+A subsequent attempt to crawl the same page in the same mtr would cause
+an assertion failure. */
UNIV_INLINE
void
btr_pcur_close(
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
index 5316c3efd39..fea117d0aaf 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innobase/include/btr0sea.h
@@ -68,7 +68,8 @@ UNIV_INLINE
btr_search_t*
btr_search_get_info(
/*================*/
- dict_index_t* index); /*!< in: index */
+ dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull));
/*****************************************************************//**
Creates and initializes a search info struct.
@return own: search info struct */
@@ -193,7 +194,7 @@ btr_search_validate(void);
#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
/** The search info struct in an index */
-struct btr_search_struct{
+struct btr_search_t{
ulint ref_count; /*!< Number of blocks in this index tree
that have search index built
i.e. block->index points to this index.
@@ -242,16 +243,13 @@ struct btr_search_struct{
#endif /* UNIV_SEARCH_PERF_STAT */
#ifdef UNIV_DEBUG
ulint magic_n; /*!< magic number @see BTR_SEARCH_MAGIC_N */
-/** value of btr_search_struct::magic_n, used in assertions */
+/** value of btr_search_t::magic_n, used in assertions */
# define BTR_SEARCH_MAGIC_N 1112765
#endif /* UNIV_DEBUG */
};
/** The hash index system */
-typedef struct btr_search_sys_struct btr_search_sys_t;
-
-/** The hash index system */
-struct btr_search_sys_struct{
+struct btr_search_sys_t{
hash_table_t* hash_index; /*!< the adaptive hash index,
mapping dtuple_fold values
to rec_t pointers on index pages */
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
index 49ba0fd3f0b..0bd869be136 100644
--- a/storage/innobase/include/btr0sea.ic
+++ b/storage/innobase/include/btr0sea.ic
@@ -45,8 +45,6 @@ btr_search_get_info(
/*================*/
dict_index_t* index) /*!< in: index */
{
- ut_ad(index);
-
return(index->search_info);
}
diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h
index 09f97b3cabd..c1a4531f861 100644
--- a/storage/innobase/include/btr0types.h
+++ b/storage/innobase/include/btr0types.h
@@ -33,11 +33,11 @@ Created 2/17/1996 Heikki Tuuri
#include "sync0rw.h"
/** Persistent cursor */
-typedef struct btr_pcur_struct btr_pcur_t;
+struct btr_pcur_t;
/** B-tree cursor */
-typedef struct btr_cur_struct btr_cur_t;
+struct btr_cur_t;
/** B-tree search information for the adaptive hash index */
-typedef struct btr_search_struct btr_search_t;
+struct btr_search_t;
#ifndef UNIV_HOTBACKUP
@@ -68,7 +68,7 @@ extern char btr_search_enabled;
#ifdef UNIV_BLOB_DEBUG
# include "buf0types.h"
/** An index->blobs entry for keeping track of off-page column references */
-typedef struct btr_blob_dbg_struct btr_blob_dbg_t;
+struct btr_blob_dbg_t;
/** Insert to index->blobs a reference to an off-page column.
@param index the index tree
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 2284f21e3ab..74a6e203808 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -89,8 +89,6 @@ extern ibool buf_debug_prints;/*!< If this is set TRUE, the program
prints info whenever read or flush
occurs */
#endif /* UNIV_DEBUG */
-extern ulint srv_buf_pool_write_requests; /*!< variable to count write request
- issued */
extern ulint srv_buf_pool_instances;
extern ulint srv_buf_pool_curr_size;
#else /* !UNIV_HOTBACKUP */
@@ -102,7 +100,7 @@ extern buf_block_t* back_block2; /*!< second block, for page reorganize */
#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
/** @brief States of a control block
-@see buf_page_struct
+@see buf_page_t
The enumeration values must be 0..7. */
enum buf_page_state {
@@ -132,7 +130,7 @@ enum buf_page_state {
/** This structure defines information we will fetch from each buffer pool. It
will be used to print table IO stats */
-struct buf_pool_info_struct{
+struct buf_pool_info_t{
/* General buffer pool info */
ulint pool_unique_id; /*!< Buffer Pool ID */
ulint pool_size; /*!< Buffer Pool size in pages */
@@ -203,7 +201,12 @@ struct buf_pool_info_struct{
interval */
};
-typedef struct buf_pool_info_struct buf_pool_info_t;
+/** The occupied bytes of lists in all buffer pools */
+struct buf_pools_list_size_t {
+ ulint LRU_bytes; /*!< LRU size in bytes */
+ ulint unzip_LRU_bytes; /*!< unzip_LRU size in bytes */
+ ulint flush_list_bytes; /*!< flush_list size in bytes */
+};
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
@@ -222,9 +225,9 @@ buf_pool_mutex_exit_all(void);
/********************************************************************//**
Creates the buffer pool.
-@return own: buf_pool object, NULL if not enough memory or error */
+@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
UNIV_INTERN
-ulint
+dberr_t
buf_pool_init(
/*=========*/
ulint size, /*!< in: Size of the total pool in bytes */
@@ -629,9 +632,12 @@ UNIV_INTERN
ibool
buf_page_is_corrupted(
/*==================*/
+ bool check_lsn, /*!< in: true if we need to check the
+ and complain about the LSN */
const byte* read_buf, /*!< in: a database page */
- ulint zip_size); /*!< in: size of compressed page;
+ ulint zip_size) /*!< in: size of compressed page;
0 for uncompressed pages */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Gets the space id, page offset, and byte offset within page of a
@@ -881,7 +887,7 @@ buf_page_belongs_to_unzip_LRU(
Gets the mutex of a block.
@return pointer to mutex protecting bpage */
UNIV_INLINE
-mutex_t*
+ib_mutex_t*
buf_page_get_mutex(
/*===============*/
const buf_page_t* bpage) /*!< in: pointer to control block */
@@ -1010,8 +1016,7 @@ UNIV_INLINE
void
buf_page_set_accessed(
/*==================*/
- buf_page_t* bpage, /*!< in/out: control block */
- ulint time_ms) /*!< in: ut_time_ms() */
+ buf_page_t* bpage) /*!< in/out: control block */
__attribute__((nonnull));
/*********************************************************************//**
Gets the buf_block_t handle of a buffered file block if an uncompressed
@@ -1152,7 +1157,7 @@ UNIV_INTERN
buf_page_t*
buf_page_init_for_read(
/*===================*/
- ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+ dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size, or 0 */
@@ -1164,9 +1169,9 @@ buf_page_init_for_read(
/********************************************************************//**
Completes an asynchronous read or write request of a file page to or from
the buffer pool.
-@return TRUE if successful */
+@return true if successful */
UNIV_INTERN
-ibool
+bool
buf_page_io_complete(
/*=================*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
@@ -1368,6 +1373,14 @@ buf_get_total_list_len(
ulint* free_len, /*!< out: length of all free lists */
ulint* flush_list_len);/*!< out: length of all flush lists */
/********************************************************************//**
+Get total list size in bytes from all buffer pools. */
+UNIV_INTERN
+void
+buf_get_total_list_size_in_bytes(
+/*=============================*/
+ buf_pools_list_size_t* buf_pools_list_size); /*!< out: list sizes
+ in all buffer pools */
+/********************************************************************//**
Get total buffer pool statistics. */
UNIV_INTERN
void
@@ -1385,6 +1398,16 @@ buf_get_nth_chunk_block(
ulint n, /*!< in: nth chunk in the buffer pool */
ulint* chunk_size); /*!< in: chunk size */
+/********************************************************************//**
+Calculate the checksum of a page from compressed table and update the page. */
+UNIV_INTERN
+void
+buf_flush_update_zip_checksum(
+/*==========================*/
+ buf_frame_t* page, /*!< in/out: Page to update */
+ ulint zip_size, /*!< in: Compressed page size */
+ lsn_t lsn); /*!< in: Lsn to stamp on the page */
+
#endif /* !UNIV_HOTBACKUP */
/** The common buffer control block structure
@@ -1393,10 +1416,10 @@ for compressed and uncompressed frames */
/** Number of bits used for buffer page states. */
#define BUF_PAGE_STATE_BITS 3
-struct buf_page_struct{
+struct buf_page_t{
/** @name General fields
None of these bit-fields must be modified without holding
- buf_page_get_mutex() [buf_block_struct::mutex or
+ buf_page_get_mutex() [buf_block_t::mutex or
buf_pool->zip_mutex], since they can be stored in the same
machine word. Some of these fields are additionally protected
by buf_pool->mutex. */
@@ -1527,7 +1550,7 @@ struct buf_page_struct{
/* @} */
/** @name LRU replacement algorithm fields
These fields are protected by buf_pool->mutex only (not
- buf_pool->zip_mutex or buf_block_struct::mutex). */
+ buf_pool->zip_mutex or buf_block_t::mutex). */
/* @{ */
UT_LIST_NODE_T(buf_page_t) LRU;
@@ -1547,23 +1570,24 @@ struct buf_page_struct{
to read this for heuristic
purposes without holding any
mutex or latch */
- unsigned access_time:32; /*!< time of first access, or
- 0 if the block was never accessed
- in the buffer pool */
/* @} */
+ unsigned access_time; /*!< time of first access, or
+ 0 if the block was never accessed
+ in the buffer pool. Protected by
+ block mutex */
# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ibool file_page_was_freed;
/*!< this is set to TRUE when
fsp frees a page in buffer pool;
protected by buf_pool->zip_mutex
- or buf_block_struct::mutex. */
+ or buf_block_t::mutex. */
# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
#endif /* !UNIV_HOTBACKUP */
};
/** The buffer control block structure */
-struct buf_block_struct{
+struct buf_block_t{
/** @name General fields */
/* @{ */
@@ -1587,7 +1611,7 @@ struct buf_block_struct{
decompressed LRU list;
used in debugging */
#endif /* UNIV_DEBUG */
- mutex_t mutex; /*!< mutex protecting this block:
+ ib_mutex_t mutex; /*!< mutex protecting this block:
state (also protected by the buffer
pool mutex), io_fix, buf_fix_count,
and accessed; we introduce this new
@@ -1646,8 +1670,8 @@ struct buf_block_struct{
/** @name Hash search fields
These 5 fields may only be modified when we have
an x-latch on btr_search_latch AND
- - we are holding an s-latch or x-latch on buf_block_struct::lock or
- - we know that buf_block_struct::buf_fix_count == 0.
+ - we are holding an s-latch or x-latch on buf_block_t::lock or
+ - we know that buf_block_t::buf_fix_count == 0.
An exception to this is when we init or create a page
in the buffer pool in buf0buf.cc.
@@ -1706,7 +1730,7 @@ Compute the hash fold value for blocks in buf_pool->zip_hash. */
/* @} */
/** @brief The buffer pool statistics structure. */
-struct buf_pool_stat_struct{
+struct buf_pool_stat_t{
ulint n_page_gets; /*!< number of page gets performed;
also successful searches through
the adaptive hash index are
@@ -1730,10 +1754,12 @@ struct buf_pool_stat_struct{
young because the first access
was not long enough ago, in
buf_page_peek_if_too_old() */
+ ulint LRU_bytes; /*!< LRU size in bytes */
+ ulint flush_list_bytes;/*!< flush_list size in bytes */
};
/** Statistics of buddy blocks of a given size. */
-struct buf_buddy_stat_struct {
+struct buf_buddy_stat_t {
/** Number of blocks allocated from the buddy system. */
ulint used;
/** Number of blocks relocated by the buddy system. */
@@ -1747,13 +1773,13 @@ struct buf_buddy_stat_struct {
NOTE! The definition appears here only for other modules of this
directory (buf) to see it. Do not use from outside! */
-struct buf_pool_struct{
+struct buf_pool_t{
/** @name General fields */
/* @{ */
- mutex_t mutex; /*!< Buffer pool mutex of this
+ ib_mutex_t mutex; /*!< Buffer pool mutex of this
instance */
- mutex_t zip_mutex; /*!< Zip mutex of this buffer
+ ib_mutex_t zip_mutex; /*!< Zip mutex of this buffer
pool instance, protects compressed
only pages (of type buf_page_t, not
buf_block_t */
@@ -1807,7 +1833,7 @@ struct buf_pool_struct{
/* @{ */
- mutex_t flush_list_mutex;/*!< mutex protecting the
+ ib_mutex_t flush_list_mutex;/*!< mutex protecting the
flush list access. This mutex
protects flush_list, flush_rbt
and bpage::list pointers when
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index 88c29ab5603..b310efdf451 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -31,13 +31,13 @@ Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "mtr0mtr.h"
+#ifndef UNIV_HOTBACKUP
#include "buf0flu.h"
#include "buf0lru.h"
#include "buf0rea.h"
-#ifndef UNIV_HOTBACKUP
/** A chunk of buffers. The buffer pool is allocated in chunks. */
-struct buf_chunk_struct{
+struct buf_chunk_t{
ulint mem_size; /*!< allocated size of the chunk */
ulint size; /*!< size of frames[] and blocks[] */
void* mem; /*!< pointer to the memory area which
@@ -339,7 +339,7 @@ buf_page_belongs_to_unzip_LRU(
Gets the mutex of a block.
@return pointer to mutex protecting bpage */
UNIV_INLINE
-mutex_t*
+ib_mutex_t*
buf_page_get_mutex(
/*===============*/
const buf_page_t* bpage) /*!< in: pointer to control block */
@@ -419,6 +419,8 @@ buf_page_get_io_fix(
/*================*/
const buf_page_t* bpage) /*!< in: pointer to the control block */
{
+ ut_ad(bpage != NULL);
+
enum buf_io_fix io_fix = (enum buf_io_fix) bpage->io_fix;
#ifdef UNIV_DEBUG
switch (io_fix) {
@@ -614,18 +616,18 @@ UNIV_INLINE
void
buf_page_set_accessed(
/*==================*/
- buf_page_t* bpage, /*!< in/out: control block */
- ulint time_ms) /*!< in: ut_time_ms() */
+ buf_page_t* bpage) /*!< in/out: control block */
{
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
#endif
ut_a(buf_page_in_file(bpage));
if (!bpage->access_time) {
/* Make this the time of the first access. */
- bpage->access_time = time_ms;
+ bpage->access_time = ut_time_ms();
}
}
@@ -942,7 +944,7 @@ buf_page_get_newest_modification(
page frame */
{
lsn_t lsn;
- mutex_t* block_mutex = buf_page_get_mutex(bpage);
+ ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index fcc56d91405..357ba697f6a 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -29,7 +29,6 @@ Created 2011/12/19 Inaam Rana
#include "univ.i"
#include "ut0byte.h"
#include "log0log.h"
-#include "buf0types.h"
#ifndef UNIV_HOTBACKUP
@@ -113,8 +112,8 @@ buf_dblwr_write_single_page(
buf_page_t* bpage); /*!< in: buffer block to write */
/** Doublewrite control struct */
-struct buf_dblwr_struct{
- mutex_t mutex; /*!< mutex protecting the first_free field and
+struct buf_dblwr_t{
+ ib_mutex_t mutex; /*!< mutex protecting the first_free field and
write_buf */
ulint block1; /*!< the page number of the first
doublewrite block (64 pages) */
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index faf577f718b..94f4e6dedd1 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -95,23 +95,27 @@ void
buf_flush_sync_datafiles(void);
/*==========================*/
/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush_list of
+This utility flushes dirty blocks from the end of the flush list of
all buffer pool instances.
NOTE: The calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already running */
+@return true if a batch was queued successfully for each buffer pool
+instance. false if another batch of same type was already running in
+at least one of the buffer pool instance */
UNIV_INTERN
-ulint
+bool
buf_flush_list(
-/*============*/
+/*===========*/
ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
- lsn_t lsn_limit); /*!< in the case BUF_FLUSH_LIST all
+ lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
blocks whose oldest_modification is
smaller than this should be flushed
(if their number does not exceed
min_n), otherwise ignored */
+ ulint* n_processed); /*!< out: the number of pages
+ which were processed is passed
+ back to caller. Ignored if NULL */
/******************************************************************//**
This function picks up a single dirty page from the tail of the LRU
list, flushes it, removes it from page_hash and LRU list and puts
@@ -176,31 +180,6 @@ buf_flush_ready_for_replace(
/*========================*/
buf_page_t* bpage); /*!< in: buffer control block, must be
buf_page_in_file(bpage) and in the LRU list */
-
-/** @brief Statistics for selecting flush rate based on redo log
-generation speed.
-
-These statistics are generated for heuristics used in estimating the
-rate at which we should flush the dirty blocks to avoid bursty IO
-activity. Note that the rate of flushing not only depends on how many
-dirty pages we have in the buffer pool but it is also a fucntion of
-how much redo the workload is generating and at what rate. */
-
-struct buf_flush_stat_struct
-{
- lsn_t redo; /**< amount of redo generated. */
- ulint n_flushed; /**< number of pages flushed. */
-};
-
-/** Statistics for selecting flush rate of dirty pages. */
-typedef struct buf_flush_stat_struct buf_flush_stat_t;
-/*********************************************************************
-Update the historical stats that we are collecting for flush rate
-heuristics at the end of each interval. */
-UNIV_INTERN
-void
-buf_flush_stat_update(void);
-/*=======================*/
/******************************************************************//**
page_cleaner thread tasked with flushing dirty pages from the buffer
pools. As of now we'll have only one instance of this thread.
@@ -211,6 +190,23 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
/*==========================================*/
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
+/*********************************************************************//**
+Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+buf_flush_LRU_tail(void);
+/*====================*/
+/*********************************************************************//**
+Wait for any possible LRU flushes that are in progress to end. */
+UNIV_INTERN
+void
+buf_flush_wait_LRU_batch_end(void);
+/*==============================*/
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/******************************************************************//**
@@ -238,6 +234,44 @@ UNIV_INTERN
void
buf_flush_free_flush_rbt(void);
/*==========================*/
+
+/********************************************************************//**
+Writes a flushable page asynchronously from the buffer pool to a file.
+NOTE: in simulated aio we must call
+os_aio_simulated_wake_handler_threads after we have posted a batch of
+writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be
+held upon entering this function, and they will be released by this
+function. */
+UNIV_INTERN
+void
+buf_flush_page(
+/*===========*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ buf_page_t* bpage, /*!< in: buffer control block */
+ buf_flush flush_type) /*!< in: type of flush */
+ __attribute__((nonnull));
+
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush
+list in a particular buffer pool.
+@return number of dirty pages present in a single buffer pool */
+UNIV_INTERN
+ulint
+buf_pool_get_dirty_pages_count(
+/*===========================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool */
+ ulint id); /*!< in: space id to check */
+/******************************************************************//**
+Check if there are any dirty pages that belong to a space id in the flush list.
+@return count of dirty pages present in all the buffer pools */
+UNIV_INTERN
+ulint
+buf_flush_get_dirty_pages_count(
+/*============================*/
+ ulint id); /*!< in: space id to check */
+#endif /* UNIV_DEBUG */
+
#endif /* !UNIV_HOTBACKUP */
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic
index 68a76c0b637..a763cd115fe 100644
--- a/storage/innobase/include/buf0flu.ic
+++ b/storage/innobase/include/buf0flu.ic
@@ -26,6 +26,7 @@ Created 11/5/1995 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
#include "buf0buf.h"
#include "mtr0mtr.h"
+#include "srv0srv.h"
/********************************************************************//**
Inserts a modified block into the flush list. */
@@ -61,7 +62,7 @@ buf_flush_note_modification(
{
buf_pool_t* buf_pool = buf_pool_from_block(block);
- ut_ad(block);
+ ut_ad(!srv_read_only_mode);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
@@ -91,7 +92,7 @@ buf_flush_note_modification(
mutex_exit(&block->mutex);
- ++srv_buf_pool_write_requests;
+ srv_stats.buf_pool_write_requests.inc();
}
/********************************************************************//**
@@ -108,7 +109,7 @@ buf_flush_recv_note_modification(
{
buf_pool_t* buf_pool = buf_pool_from_block(block);
- ut_ad(block);
+ ut_ad(!srv_read_only_mode);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->page.buf_fix_count > 0);
#ifdef UNIV_SYNC_DEBUG
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 527852da758..f7a69e1c9e4 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -31,6 +31,9 @@ Created 11/5/1995 Heikki Tuuri
#include "ut0byte.h"
#include "buf0types.h"
+// Forward declaration
+struct trx_t;
+
/******************************************************************//**
Returns TRUE if less than 25 % of the buffer pool is available. This can be
used in heuristics to prevent huge transactions eating up the whole buffer
@@ -49,15 +52,19 @@ These are low-level functions
#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
/******************************************************************//**
-Invalidates all pages belonging to a given tablespace when we are deleting
-the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
-what guarantees that it will not try to read in pages after this operation has
-completed? */
+Flushes all dirty pages or removes all pages belonging
+to a given tablespace. A PROBLEM: if readahead is being started, what
+guarantees that it will not try to read in pages after this operation
+has completed? */
UNIV_INTERN
void
-buf_LRU_invalidate_tablespace(
+buf_LRU_flush_or_remove_pages(
/*==========================*/
- ulint id); /*!< in: space id */
+ ulint id, /*!< in: space id */
+ buf_remove_t buf_remove, /*!< in: remove or flush strategy */
+ const trx_t* trx); /*!< to check if the operation must
+ be interrupted */
+
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/********************************************************************//**
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
@@ -157,7 +164,10 @@ buf_LRU_block_free_non_file_page(
/*=============================*/
buf_block_t* block); /*!< in: block, must not contain a file page */
/******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
UNIV_INTERN
void
buf_LRU_add_block(
@@ -270,15 +280,12 @@ extern uint buf_LRU_old_threshold_ms;
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
and page_zip_decompress() operations. Based on the statistics we decide
if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
-struct buf_LRU_stat_struct
+struct buf_LRU_stat_t
{
ulint io; /**< Counter of buffer pool I/O operations. */
ulint unzip; /**< Counter of page_zip_decompress operations. */
};
-/** Statistics for selecting the LRU list for eviction. */
-typedef struct buf_LRU_stat_struct buf_LRU_stat_t;
-
/** Current operation counters. Not protected by any mutex.
Cleared by buf_LRU_stat_update(). */
extern buf_LRU_stat_t buf_LRU_stat_cur;
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index ba54a8aeeea..5ed210d3b90 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -27,19 +27,19 @@ Created 11/17/1995 Heikki Tuuri
#define buf0types_h
/** Buffer page (uncompressed or compressed) */
-typedef struct buf_page_struct buf_page_t;
+struct buf_page_t;
/** Buffer block for which an uncompressed page exists */
-typedef struct buf_block_struct buf_block_t;
+struct buf_block_t;
/** Buffer pool chunk comprising buf_block_t */
-typedef struct buf_chunk_struct buf_chunk_t;
+struct buf_chunk_t;
/** Buffer pool comprising buf_chunk_t */
-typedef struct buf_pool_struct buf_pool_t;
+struct buf_pool_t;
/** Buffer pool statistics struct */
-typedef struct buf_pool_stat_struct buf_pool_stat_t;
+struct buf_pool_stat_t;
/** Buffer pool buddy statistics struct */
-typedef struct buf_buddy_stat_struct buf_buddy_stat_t;
+struct buf_buddy_stat_t;
/** Doublewrite memory struct */
-typedef struct buf_dblwr_struct buf_dblwr_t;
+struct buf_dblwr_t;
/** A buffer frame. @see page_t */
typedef byte buf_frame_t;
@@ -54,6 +54,17 @@ enum buf_flush {
BUF_FLUSH_N_TYPES /*!< index of last element + 1 */
};
+/** Algorithm to remove the pages for a tablespace from the buffer pool.
+See buf_LRU_flush_or_remove_pages(). */
+enum buf_remove_t {
+ BUF_REMOVE_ALL_NO_WRITE, /*!< Remove all pages from the buffer
+ pool, don't write or sync to disk */
+ BUF_REMOVE_FLUSH_NO_WRITE, /*!< Remove only, from the flush list,
+ don't write or sync to disk */
+ BUF_REMOVE_FLUSH_WRITE /*!< Flush dirty pages to disk only
+ don't remove from the buffer pool */
+};
+
/** Flags for io_fix types */
enum buf_io_fix {
BUF_IO_NONE = 0, /**< no pending I/O */
@@ -66,7 +77,7 @@ enum buf_io_fix {
/** Alternatives for srv_checksum_algorithm, which can be changed by
setting innodb_checksum_algorithm */
-enum srv_checksum_algorithm_enum {
+enum srv_checksum_algorithm_t {
SRV_CHECKSUM_ALGORITHM_CRC32, /*!< Write crc32, allow crc32,
innodb or none when reading */
SRV_CHECKSUM_ALGORITHM_STRICT_CRC32, /*!< Write crc32, allow crc32
@@ -81,8 +92,6 @@ enum srv_checksum_algorithm_enum {
when reading */
};
-typedef enum srv_checksum_algorithm_enum srv_checksum_algorithm_t;
-
/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
/* @{ */
/** Zip shift value for the smallest page size */
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
index 37364e891f5..a548c7b89b3 100644
--- a/storage/innobase/include/data0data.h
+++ b/storage/innobase/include/data0data.h
@@ -35,7 +35,7 @@ Created 5/30/1994 Heikki Tuuri
/** Storage for overflow data in a big record, that is, a clustered
index record which needs external storage of data fields */
-typedef struct big_rec_struct big_rec_t;
+struct big_rec_t;
#ifdef UNIV_DEBUG
/*********************************************************************//**
@@ -45,7 +45,8 @@ UNIV_INLINE
dtype_t*
dfield_get_type(
/*============*/
- const dfield_t* field); /*!< in: SQL data field */
+ const dfield_t* field) /*!< in: SQL data field */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets pointer to the data in a field.
@return pointer to data */
@@ -53,7 +54,8 @@ UNIV_INLINE
void*
dfield_get_data(
/*============*/
- const dfield_t* field); /*!< in: field */
+ const dfield_t* field) /*!< in: field */
+ __attribute__((nonnull, warn_unused_result));
#else /* UNIV_DEBUG */
# define dfield_get_type(field) (&(field)->type)
# define dfield_get_data(field) ((field)->data)
@@ -65,7 +67,8 @@ void
dfield_set_type(
/*============*/
dfield_t* field, /*!< in: SQL data field */
- dtype_t* type); /*!< in: pointer to data type struct */
+ const dtype_t* type) /*!< in: pointer to data type struct */
+ __attribute__((nonnull));
/*********************************************************************//**
Gets length of field data.
@return length of data; UNIV_SQL_NULL if SQL null data */
@@ -73,7 +76,8 @@ UNIV_INLINE
ulint
dfield_get_len(
/*===========*/
- const dfield_t* field); /*!< in: field */
+ const dfield_t* field) /*!< in: field */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets length in a field. */
UNIV_INLINE
@@ -81,7 +85,8 @@ void
dfield_set_len(
/*===========*/
dfield_t* field, /*!< in: field */
- ulint len); /*!< in: length or UNIV_SQL_NULL */
+ ulint len) /*!< in: length or UNIV_SQL_NULL */
+ __attribute__((nonnull));
/*********************************************************************//**
Determines if a field is SQL NULL
@return nonzero if SQL null data */
@@ -89,7 +94,8 @@ UNIV_INLINE
ulint
dfield_is_null(
/*===========*/
- const dfield_t* field); /*!< in: field */
+ const dfield_t* field) /*!< in: field */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Determines if a field is externally stored
@return nonzero if externally stored */
@@ -97,14 +103,16 @@ UNIV_INLINE
ulint
dfield_is_ext(
/*==========*/
- const dfield_t* field); /*!< in: field */
+ const dfield_t* field) /*!< in: field */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets the "external storage" flag */
UNIV_INLINE
void
dfield_set_ext(
/*===========*/
- dfield_t* field); /*!< in/out: field */
+ dfield_t* field) /*!< in/out: field */
+ __attribute__((nonnull));
/*********************************************************************//**
Sets pointer to the data and length in a field. */
UNIV_INLINE
@@ -113,14 +121,16 @@ dfield_set_data(
/*============*/
dfield_t* field, /*!< in: field */
const void* data, /*!< in: data */
- ulint len); /*!< in: length or UNIV_SQL_NULL */
+ ulint len) /*!< in: length or UNIV_SQL_NULL */
+ __attribute__((nonnull(1)));
/*********************************************************************//**
Sets a data field to SQL NULL. */
UNIV_INLINE
void
dfield_set_null(
/*============*/
- dfield_t* field); /*!< in/out: field */
+ dfield_t* field) /*!< in/out: field */
+ __attribute__((nonnull));
/**********************************************************************//**
Writes an SQL null field full of zeros. */
UNIV_INLINE
@@ -128,7 +138,8 @@ void
data_write_sql_null(
/*================*/
byte* data, /*!< in: pointer to a buffer of size len */
- ulint len); /*!< in: SQL null size in bytes */
+ ulint len) /*!< in: SQL null size in bytes */
+ __attribute__((nonnull));
/*********************************************************************//**
Copies the data and len fields. */
UNIV_INLINE
@@ -136,7 +147,8 @@ void
dfield_copy_data(
/*=============*/
dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2);/*!< in: field to copy from */
+ const dfield_t* field2) /*!< in: field to copy from */
+ __attribute__((nonnull));
/*********************************************************************//**
Copies a data field to another. */
UNIV_INLINE
@@ -144,7 +156,8 @@ void
dfield_copy(
/*========*/
dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2);/*!< in: field to copy from */
+ const dfield_t* field2) /*!< in: field to copy from */
+ __attribute__((nonnull));
/*********************************************************************//**
Copies the data pointed to by a data field. */
UNIV_INLINE
@@ -152,7 +165,8 @@ void
dfield_dup(
/*=======*/
dfield_t* field, /*!< in/out: data field */
- mem_heap_t* heap); /*!< in: memory heap where allocated */
+ mem_heap_t* heap) /*!< in: memory heap where allocated */
+ __attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Tests if two data fields are equal.
@@ -187,7 +201,8 @@ UNIV_INLINE
ulint
dtuple_get_n_fields(
/*================*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
#ifdef UNIV_DEBUG
/*********************************************************************//**
Gets nth field of a tuple.
@@ -208,7 +223,8 @@ UNIV_INLINE
ulint
dtuple_get_info_bits(
/*=================*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets info bits in a data tuple. */
UNIV_INLINE
@@ -216,7 +232,8 @@ void
dtuple_set_info_bits(
/*=================*/
dtuple_t* tuple, /*!< in: tuple */
- ulint info_bits); /*!< in: info bits */
+ ulint info_bits) /*!< in: info bits */
+ __attribute__((nonnull));
/*********************************************************************//**
Gets number of fields used in record comparisons.
@return number of fields used in comparisons in rem0cmp.* */
@@ -224,7 +241,8 @@ UNIV_INLINE
ulint
dtuple_get_n_fields_cmp(
/*====================*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets number of fields used in record comparisons. */
UNIV_INLINE
@@ -232,8 +250,9 @@ void
dtuple_set_n_fields_cmp(
/*====================*/
dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields_cmp); /*!< in: number of fields used in
+ ulint n_fields_cmp) /*!< in: number of fields used in
comparisons in rem0cmp.* */
+ __attribute__((nonnull));
/* Estimate the number of bytes that are going to be allocated when
creating a new dtuple_t object */
@@ -252,7 +271,8 @@ dtuple_create_from_mem(
/*===================*/
void* buf, /*!< in, out: buffer to use */
ulint buf_size, /*!< in: buffer size */
- ulint n_fields); /*!< in: number of fields */
+ ulint n_fields) /*!< in: number of fields */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************//**
Creates a data tuple to a memory heap. The default value for number
@@ -265,19 +285,8 @@ dtuple_create(
mem_heap_t* heap, /*!< in: memory heap where the tuple
is created, DTUPLE_EST_ALLOC(n_fields)
bytes will be allocated from this heap */
- ulint n_fields); /*!< in: number of fields */
-
-/**********************************************************//**
-Wrap data fields in a tuple. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return data tuple */
-UNIV_INLINE
-const dtuple_t*
-dtuple_from_fields(
-/*===============*/
- dtuple_t* tuple, /*!< in: storage for data tuple */
- const dfield_t* fields, /*!< in: fields */
- ulint n_fields); /*!< in: number of fields */
+ ulint n_fields)/*!< in: number of fields */
+ __attribute__((nonnull, malloc));
/*********************************************************************//**
Sets number of fields used in a tuple. Normally this is set in
@@ -287,7 +296,8 @@ void
dtuple_set_n_fields(
/*================*/
dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields); /*!< in: number of fields */
+ ulint n_fields) /*!< in: number of fields */
+ __attribute__((nonnull));
/*********************************************************************//**
Copies a data tuple to another. This is a shallow copy; if a deep copy
is desired, dfield_dup() will have to be invoked on each field.
@@ -297,8 +307,9 @@ dtuple_t*
dtuple_copy(
/*========*/
const dtuple_t* tuple, /*!< in: tuple to copy from */
- mem_heap_t* heap); /*!< in: memory heap
+ mem_heap_t* heap) /*!< in: memory heap
where the tuple is created */
+ __attribute__((nonnull, malloc));
/**********************************************************//**
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted.
@@ -308,7 +319,8 @@ ulint
dtuple_get_data_size(
/*=================*/
const dtuple_t* tuple, /*!< in: typed data tuple */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
+ __attribute__((nonnull));
/*********************************************************************//**
Computes the number of externally stored fields in a data tuple.
@return number of fields */
@@ -316,7 +328,8 @@ UNIV_INLINE
ulint
dtuple_get_n_ext(
/*=============*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull));
/************************************************************//**
Compare two data tuples, respecting the collation of character fields.
@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
@@ -326,7 +339,8 @@ int
dtuple_coll_cmp(
/*============*/
const dtuple_t* tuple1, /*!< in: tuple 1 */
- const dtuple_t* tuple2);/*!< in: tuple 2 */
+ const dtuple_t* tuple2) /*!< in: tuple 2 */
+ __attribute__((nonnull, warn_unused_result));
/************************************************************//**
Folds a prefix given as the number of fields of a tuple.
@return the folded value */
@@ -339,7 +353,7 @@ dtuple_fold(
ulint n_bytes,/*!< in: number of bytes to fold in an
incomplete last field */
index_id_t tree_id)/*!< in: index tree id */
- __attribute__((pure));
+ __attribute__((nonnull, pure, warn_unused_result));
/*******************************************************************//**
Sets types of fields binary in a tuple. */
UNIV_INLINE
@@ -347,7 +361,8 @@ void
dtuple_set_types_binary(
/*====================*/
dtuple_t* tuple, /*!< in: data tuple */
- ulint n); /*!< in: number of fields to set */
+ ulint n) /*!< in: number of fields to set */
+ __attribute__((nonnull));
/**********************************************************************//**
Checks if a dtuple contains an SQL null value.
@return TRUE if some field is SQL null */
@@ -355,7 +370,8 @@ UNIV_INLINE
ibool
dtuple_contains_null(
/*=================*/
- const dtuple_t* tuple); /*!< in: dtuple */
+ const dtuple_t* tuple) /*!< in: dtuple */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************//**
Checks that a data field is typed. Asserts an error if not.
@return TRUE if ok */
@@ -363,7 +379,8 @@ UNIV_INTERN
ibool
dfield_check_typed(
/*===============*/
- const dfield_t* field); /*!< in: data field */
+ const dfield_t* field) /*!< in: data field */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************//**
Checks that a data tuple is typed. Asserts an error if not.
@return TRUE if ok */
@@ -371,7 +388,8 @@ UNIV_INTERN
ibool
dtuple_check_typed(
/*===============*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************//**
Checks that a data tuple is typed.
@return TRUE if ok */
@@ -379,7 +397,8 @@ UNIV_INTERN
ibool
dtuple_check_typed_no_assert(
/*=========================*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
#ifdef UNIV_DEBUG
/**********************************************************//**
Validates the consistency of a tuple which must be complete, i.e,
@@ -389,7 +408,8 @@ UNIV_INTERN
ibool
dtuple_validate(
/*============*/
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
/*************************************************************//**
Pretty prints a dfield value according to its data type. */
@@ -397,7 +417,8 @@ UNIV_INTERN
void
dfield_print(
/*=========*/
- const dfield_t* dfield);/*!< in: dfield */
+ const dfield_t* dfield) /*!< in: dfield */
+ __attribute__((nonnull));
/*************************************************************//**
Pretty prints a dfield value according to its data type. Also the hex string
is printed if a string contains non-printable characters. */
@@ -405,7 +426,8 @@ UNIV_INTERN
void
dfield_print_also_hex(
/*==================*/
- const dfield_t* dfield); /*!< in: dfield */
+ const dfield_t* dfield) /*!< in: dfield */
+ __attribute__((nonnull));
/**********************************************************//**
The following function prints the contents of a tuple. */
UNIV_INTERN
@@ -413,7 +435,8 @@ void
dtuple_print(
/*=========*/
FILE* f, /*!< in: output stream */
- const dtuple_t* tuple); /*!< in: tuple */
+ const dtuple_t* tuple) /*!< in: tuple */
+ __attribute__((nonnull));
/**************************************************************//**
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
@@ -428,8 +451,9 @@ dtuple_convert_big_rec(
/*===================*/
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in/out: index entry */
- ulint* n_ext); /*!< in/out: number of
+ ulint* n_ext) /*!< in/out: number of
externally stored columns */
+ __attribute__((nonnull, malloc, warn_unused_result));
/**************************************************************//**
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
@@ -440,21 +464,23 @@ dtuple_convert_back_big_rec(
/*========================*/
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in: entry whose data was put to vector */
- big_rec_t* vector);/*!< in, own: big rec vector; it is
+ big_rec_t* vector) /*!< in, own: big rec vector; it is
freed in this function */
+ __attribute__((nonnull));
/**************************************************************//**
Frees the memory in a big rec vector. */
UNIV_INLINE
void
dtuple_big_rec_free(
/*================*/
- big_rec_t* vector); /*!< in, own: big rec vector; it is
+ big_rec_t* vector) /*!< in, own: big rec vector; it is
freed in this function */
+ __attribute__((nonnull));
/*######################################################################*/
/** Structure for an SQL data field */
-struct dfield_struct{
+struct dfield_t{
void* data; /*!< pointer to data */
unsigned ext:1; /*!< TRUE=externally stored, FALSE=local */
unsigned len:32; /*!< data length; UNIV_SQL_NULL if SQL null */
@@ -462,7 +488,7 @@ struct dfield_struct{
};
/** Structure for an SQL data tuple of fields (logical record) */
-struct dtuple_struct {
+struct dtuple_t {
ulint info_bits; /*!< info bits of an index record:
the default is 0; this field is used
if an index record is built from
@@ -482,15 +508,13 @@ struct dtuple_struct {
#ifdef UNIV_DEBUG
ulint magic_n; /*!< magic number, used in
debug assertions */
-/** Value of dtuple_struct::magic_n */
+/** Value of dtuple_t::magic_n */
# define DATA_TUPLE_MAGIC_N 65478679
#endif /* UNIV_DEBUG */
};
/** A slot for a field in a big rec vector */
-typedef struct big_rec_field_struct big_rec_field_t;
-/** A slot for a field in a big rec vector */
-struct big_rec_field_struct {
+struct big_rec_field_t {
ulint field_no; /*!< field number in record */
ulint len; /*!< stored data length, in bytes */
const void* data; /*!< stored data */
@@ -498,7 +522,7 @@ struct big_rec_field_struct {
/** Storage format for overflow data in a big record, that is, a
clustered index record which needs external storage of data fields */
-struct big_rec_struct {
+struct big_rec_t {
mem_heap_t* heap; /*!< memory heap from which
allocated */
ulint n_fields; /*!< number of stored fields */
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
index da50e91e98d..6937d55d211 100644
--- a/storage/innobase/include/data0data.ic
+++ b/storage/innobase/include/data0data.ic
@@ -54,7 +54,7 @@ void
dfield_set_type(
/*============*/
dfield_t* field, /*!< in: SQL data field */
- dtype_t* type) /*!< in: pointer to data type struct */
+ const dtype_t* type) /*!< in: pointer to data type struct */
{
ut_ad(field && type);
@@ -407,6 +407,8 @@ dtuple_create_from_mem(
}
}
#endif
+ UNIV_MEM_ASSERT_W(tuple->fields, n_fields * sizeof *tuple->fields);
+ UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
return(tuple);
}
@@ -434,30 +436,6 @@ dtuple_create(
tuple = dtuple_create_from_mem(buf, buf_size, n_fields);
-#ifdef UNIV_DEBUG
- UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
-#endif
-
- return(tuple);
-}
-
-/**********************************************************//**
-Wrap data fields in a tuple. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return data tuple */
-UNIV_INLINE
-const dtuple_t*
-dtuple_from_fields(
-/*===============*/
- dtuple_t* tuple, /*!< in: storage for data tuple */
- const dfield_t* fields, /*!< in: fields */
- ulint n_fields) /*!< in: number of fields */
-{
- tuple->info_bits = 0;
- tuple->n_fields = tuple->n_fields_cmp = n_fields;
- tuple->fields = (dfield_t*) fields;
- ut_d(tuple->magic_n = DATA_TUPLE_MAGIC_N);
-
return(tuple);
}
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
index c7fcf316f24..111664b0b52 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innobase/include/data0type.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,20 +33,20 @@ extern ulint data_mysql_default_charset_coll;
#define DATA_MYSQL_BINARY_CHARSET_COLL 63
/* SQL data type struct */
-typedef struct dtype_struct dtype_t;
+struct dtype_t;
/* SQL Like operator comparison types */
-enum ib_like_enum {
+enum ib_like_t {
IB_LIKE_EXACT, /* e.g. STRING */
IB_LIKE_PREFIX, /* e.g., STRING% */
IB_LIKE_SUFFIX, /* e.g., %STRING */
IB_LIKE_SUBSTR, /* e.g., %STRING% */
IB_LIKE_REGEXP /* Future */
};
-typedef enum ib_like_enum ib_like_t;
/*-------------------------------------------*/
/* The 'MAIN TYPE' of a column */
+#define DATA_MISSING 0 /* missing column */
#define DATA_VARCHAR 1 /* character varying of the
latin1_swedish_ci charset-collation; note
that the MySQL format for this, DATA_BINARY,
@@ -508,7 +508,7 @@ dtype_read_for_order_and_null_size()
dtype_new_read_for_order_and_null_size()
sym_tab_add_null_lit() */
-struct dtype_struct{
+struct dtype_t{
unsigned prtype:32; /*!< precise type; MySQL data
type, charset code, flags to
indicate nullability,
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
index a5e94a8edff..d489bef89a8 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innobase/include/data0type.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -556,35 +556,18 @@ dtype_get_fixed_size_low(
} else if (!comp) {
return(len);
} else {
- /* We play it safe here and ask MySQL for
- mbminlen and mbmaxlen. Although
- mbminlen and mbmaxlen are
- initialized if and only if prtype
- is (in one of the 3 functions in this file),
- it could be that none of these functions
- has been called. */
-
+#ifdef UNIV_DEBUG
ulint i_mbminlen, i_mbmaxlen;
innobase_get_cset_width(
dtype_get_charset_coll(prtype),
&i_mbminlen, &i_mbmaxlen);
- if (DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
- != mbminmaxlen) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: "
- "mbminlen=%lu, "
- "mbmaxlen=%lu, "
- "type->mbminlen=%lu, "
- "type->mbmaxlen=%lu\n",
- (ulong) i_mbminlen,
- (ulong) i_mbmaxlen,
- (ulong) DATA_MBMINLEN(mbminmaxlen),
- (ulong) DATA_MBMAXLEN(mbminmaxlen));
- }
- if (i_mbminlen == i_mbmaxlen) {
+ ut_ad(DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
+ == mbminmaxlen);
+#endif /* UNIV_DEBUG */
+ if (DATA_MBMINLEN(mbminmaxlen)
+ == DATA_MBMAXLEN(mbminmaxlen)) {
return(len);
}
}
diff --git a/storage/innobase/include/data0types.h b/storage/innobase/include/data0types.h
index 7d599ef2c8d..bd2bb577611 100644
--- a/storage/innobase/include/data0types.h
+++ b/storage/innobase/include/data0types.h
@@ -27,10 +27,10 @@ Created 9/21/2000 Heikki Tuuri
#define data0types_h
/* SQL data field struct */
-typedef struct dfield_struct dfield_t;
+struct dfield_t;
/* SQL data tuple struct */
-typedef struct dtuple_struct dtuple_t;
+struct dtuple_t;
#endif
diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
index 1a3499b09e0..12e9f543e94 100644
--- a/storage/innobase/include/db0err.h
+++ b/storage/innobase/include/db0err.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,7 @@ Created 5/24/1996 Heikki Tuuri
#define db0err_h
-enum db_err {
+enum dberr_t {
DB_SUCCESS_LOCKED_REC = 9, /*!< like DB_SUCCESS, but a new
explicit record lock was created */
DB_SUCCESS = 10,
@@ -68,11 +68,14 @@ enum db_err {
from a table failed */
DB_NO_SAVEPOINT, /*!< no savepoint exists with the given
name */
- DB_TABLESPACE_ALREADY_EXISTS, /*!< we cannot create a new single-table
+ DB_TABLESPACE_EXISTS, /*!< we cannot create a new single-table
tablespace because a file of the same
name already exists */
- DB_TABLESPACE_DELETED, /*!< tablespace does not exist or is
+ DB_TABLESPACE_DELETED, /*!< tablespace was deleted or is
being dropped right now */
+ DB_TABLESPACE_NOT_FOUND, /*<! Attempt to delete a tablespace
+ instance that was not found in the
+ tablespace hash table */
DB_LOCK_TABLE_FULL, /*!< lock structs have exhausted the
buffer pool (for big transactions,
InnoDB stores the lock structs in the
@@ -90,8 +93,8 @@ enum db_err {
work with e.g., FT indexes created by
a later version of the engine. */
- DB_PRIMARY_KEY_IS_NULL, /*!< a column in the PRIMARY KEY
- was found to be NULL */
+ DB_INVALID_NULL, /*!< a NOT NULL column was found to
+ be NULL during table rebuild */
DB_STATS_DO_NOT_EXIST, /*!< an operation that requires the
persistent storage, used for recording
@@ -115,6 +118,12 @@ enum db_err {
DB_READ_ONLY, /*!< Update operation attempted in
a read-only transaction */
DB_FTS_INVALID_DOCID, /* FTS Doc ID cannot be zero */
+ DB_TABLE_IN_FK_CHECK, /* table is being used in foreign
+ key check */
+ DB_ONLINE_LOG_TOO_BIG, /*!< Modification log grew too big
+ during online index creation */
+
+ DB_IO_ERROR, /*!< Generic IO error */
/* The following are partial failure codes */
DB_FAIL = 1000,
@@ -123,7 +132,23 @@ enum db_err {
DB_STRONG_FAIL,
DB_ZIP_OVERFLOW,
DB_RECORD_NOT_FOUND = 1500,
- DB_END_OF_INDEX
+ DB_END_OF_INDEX,
+ DB_DICT_CHANGED, /*!< Some part of table dictionary has
+ changed. Such as index dropped or
+ foreign key dropped */
+
+
+ /* The following are API only error codes. */
+ DB_DATA_MISMATCH = 2000, /*!< Column update or read failed
+ because the types mismatch */
+
+ DB_SCHEMA_NOT_LOCKED, /*!< If an API function expects the
+ schema to be locked in exclusive mode
+ and if it's not then that API function
+ will return this error code */
+
+ DB_NOT_FOUND /*!< Generic error code for "Not found"
+ type of errors */
};
#endif
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index 364aa746638..a994c9d8ff1 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -58,6 +58,13 @@ dict_hdr_get_new_id(
ulint* space_id); /*!< out: space id
(not assigned if NULL) */
/**********************************************************************//**
+Writes the current value of the row id counter to the dictionary header file
+page. */
+UNIV_INTERN
+void
+dict_hdr_flush_row_id(void);
+/*=======================*/
+/**********************************************************************//**
Returns a new row id.
@return the new id */
UNIV_INLINE
@@ -82,18 +89,32 @@ dict_sys_write_row_id(
row_id_t row_id);/*!< in: row id */
/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
-started. This function is also called when the data dictionary is created. */
+started. This function is also called when the data dictionary is created.
+@return DB_SUCCESS or error code. */
UNIV_INTERN
-void
-dict_boot(void);
+dberr_t
+dict_boot(void)
/*===========*/
+ __attribute__((warn_unused_result));
+
/*****************************************************************//**
-Creates and initializes the data dictionary at the database creation. */
+Creates and initializes the data dictionary at the server bootstrap.
+@return DB_SUCCESS or error code. */
UNIV_INTERN
-void
-dict_create(void);
+dberr_t
+dict_create(void)
/*=============*/
+ __attribute__((warn_unused_result));
+/*********************************************************************//**
+Check if a table id belongs to system table.
+@return true if the table id belongs to a system table. */
+UNIV_INLINE
+bool
+dict_is_sys_table(
+/*==============*/
+ table_id_t id) /*!< in: table id to check */
+ __attribute__((warn_unused_result));
/* Space id and page no where the dictionary header resides */
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
@@ -273,6 +294,41 @@ enum dict_fld_sys_foreign_cols_enum {
DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME = 5,
DICT_NUM_FIELDS__SYS_FOREIGN_COLS = 6
};
+/* The columns in SYS_TABLESPACES */
+enum dict_col_sys_tablespaces_enum {
+ DICT_COL__SYS_TABLESPACES__SPACE = 0,
+ DICT_COL__SYS_TABLESPACES__NAME = 1,
+ DICT_COL__SYS_TABLESPACES__FLAGS = 2,
+ DICT_NUM_COLS__SYS_TABLESPACES = 3
+};
+/* The field numbers in the SYS_TABLESPACES clustered index */
+enum dict_fld_sys_tablespaces_enum {
+ DICT_FLD__SYS_TABLESPACES__SPACE = 0,
+ DICT_FLD__SYS_TABLESPACES__DB_TRX_ID = 1,
+ DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR = 2,
+ DICT_FLD__SYS_TABLESPACES__NAME = 3,
+ DICT_FLD__SYS_TABLESPACES__FLAGS = 4,
+ DICT_NUM_FIELDS__SYS_TABLESPACES = 5
+};
+/* The columns in SYS_DATAFILES */
+enum dict_col_sys_datafiles_enum {
+ DICT_COL__SYS_DATAFILES__SPACE = 0,
+ DICT_COL__SYS_DATAFILES__PATH = 1,
+ DICT_NUM_COLS__SYS_DATAFILES = 2
+};
+/* The field numbers in the SYS_DATAFILES clustered index */
+enum dict_fld_sys_datafiles_enum {
+ DICT_FLD__SYS_DATAFILES__SPACE = 0,
+ DICT_FLD__SYS_DATAFILES__DB_TRX_ID = 1,
+ DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR = 2,
+ DICT_FLD__SYS_DATAFILES__PATH = 3,
+ DICT_NUM_FIELDS__SYS_DATAFILES = 4
+};
+
+/* A number of the columns above occur in multiple tables. These are the
+length of thos fields. */
+#define DICT_FLD_LEN_SPACE 4
+#define DICT_FLD_LEN_FLAGS 4
/* When a row id which is zero modulo this number (which must be a power of
two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic
index 0f660ab7555..2b156a4f672 100644
--- a/storage/innobase/include/dict0boot.ic
+++ b/storage/innobase/include/dict0boot.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -24,15 +24,6 @@ Created 4/18/1996 Heikki Tuuri
*******************************************************/
/**********************************************************************//**
-Writes the current value of the row id counter to the dictionary header file
-page. */
-UNIV_INTERN
-void
-dict_hdr_flush_row_id(void);
-/*=======================*/
-
-
-/**********************************************************************//**
Returns a new row id.
@return the new id */
UNIV_INLINE
@@ -90,4 +81,16 @@ dict_sys_write_row_id(
mach_write_to_6(field, row_id);
}
+/*********************************************************************//**
+Check if a table id belongs to system table.
+@return true if the table id belongs to a system table. */
+UNIV_INLINE
+bool
+dict_is_sys_table(
+/*==============*/
+ table_id_t id) /*!< in: table id to check */
+{
+ return(id < DICT_HDR_FIRST_ID);
+}
+
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
index 68fc9ba195a..217da0142ee 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innobase/include/dict0crea.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,7 +42,9 @@ tab_create_graph_create(
/*====================*/
dict_table_t* table, /*!< in: table to create, built as a memory data
structure */
- mem_heap_t* heap); /*!< in: heap where created */
+ mem_heap_t* heap, /*!< in: heap where created */
+ bool commit);/*!< in: true if the commit node should be
+ added to the query graph */
/*********************************************************************//**
Creates an index create graph.
@return own: index create node */
@@ -52,7 +54,9 @@ ind_create_graph_create(
/*====================*/
dict_index_t* index, /*!< in: index to create, built as a memory data
structure */
- mem_heap_t* heap); /*!< in: heap where created */
+ mem_heap_t* heap, /*!< in: heap where created */
+ bool commit);/*!< in: true if the commit node should be
+ added to the query graph */
/***********************************************************//**
Creates a table. This is a high-level function used in SQL execution graphs.
@return query thread to run next or NULL */
@@ -99,11 +103,11 @@ dict_drop_index_tree(
mtr_t* mtr); /*!< in: mtr having the latch on the record page */
/****************************************************************//**
Creates the foreign key constraints system tables inside InnoDB
-at database creation or database start if they are not found or are
+at server bootstrap or server start if they are not found or are
not of the right form.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
dict_create_or_check_foreign_constraint_tables(void);
/*================================================*/
/********************************************************************//**
@@ -115,7 +119,7 @@ given locally for this table, that is, the number is not global, as in the
old format constraints < 4.0.18 it used to be.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
dict_create_add_foreigns_to_dictionary(
/*===================================*/
ulint start_id,/*!< in: if we are actually doing ALTER TABLE
@@ -127,11 +131,56 @@ dict_create_add_foreigns_to_dictionary(
so far has no constraints for which the name
was generated here */
dict_table_t* table, /*!< in: table */
- trx_t* trx); /*!< in: transaction */
+ trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull, warn_unused_result));
+/****************************************************************//**
+Creates the tablespaces and datafiles system tables inside InnoDB
+at server bootstrap or server start if they are not found or are
+not of the right form.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_create_or_check_sys_tablespace(void);
+/*=====================================*/
+/********************************************************************//**
+Add a single tablespace definition to the data dictionary tables in the
+database.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_tablespace_to_dictionary(
+/*=====================================*/
+ ulint space, /*!< in: tablespace id */
+ const char* name, /*!< in: tablespace name */
+ ulint flags, /*!< in: tablespace flags */
+ const char* path, /*!< in: tablespace path */
+ trx_t* trx, /*!< in: transaction */
+ bool commit); /*!< in: if true then commit the
+ transaction */
+/********************************************************************//**
+Table create node structure */
-/* Table create node structure */
+/********************************************************************//**
+Add a single foreign key definition to the data dictionary tables in the
+database. We also generate names to constraints that were not named by the
+user. A generated constraint has a name of the format
+databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and
+are given locally for this table, that is, the number is not global, as in
+the old format constraints < 4.0.18 it used to be.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+dict_create_add_foreign_to_dictionary(
+/*==================================*/
+ ulint* id_nr, /*!< in/out: number to use in id generation;
+ incremented if used */
+ dict_table_t* table, /*!< in: table */
+ dict_foreign_t* foreign,/*!< in: foreign */
+ trx_t* trx) /*!< in/out: dictionary transaction */
+ __attribute__((nonnull, warn_unused_result));
-struct tab_node_struct{
+/* Table create node structure */
+struct tab_node_t{
que_common_t common; /*!< node type: QUE_NODE_TABLE_CREATE */
dict_table_t* table; /*!< table to create, built as a memory data
structure with dict_mem_... functions */
@@ -160,7 +209,7 @@ struct tab_node_struct{
/* Index create node struct */
-struct ind_node_struct{
+struct ind_node_t{
que_common_t common; /*!< node type: QUE_NODE_INDEX_CREATE */
dict_index_t* index; /*!< index to create, built as a memory data
structure with dict_mem_... functions */
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 68008f95c2f..af0a5b31cc4 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -40,6 +41,7 @@ Created 1/8/1996 Heikki Tuuri
#include "ut0rnd.h"
#include "ut0byte.h"
#include "trx0types.h"
+#include "row0types.h"
#ifndef UNIV_HOTBACKUP
# include "sync0sync.h"
@@ -50,7 +52,8 @@ UNIV_INTERN
void
dict_casedn_str(
/*============*/
- char* a); /*!< in/out: string to put in lower case */
+ char* a) /*!< in/out: string to put in lower case */
+ __attribute__((nonnull));
/********************************************************************//**
Get the database name length in a table name.
@return database name length */
@@ -58,17 +61,53 @@ UNIV_INTERN
ulint
dict_get_db_name_len(
/*=================*/
- const char* name); /*!< in: table name in the form
+ const char* name) /*!< in: table name in the form
dbname '/' tablename */
+ __attribute__((nonnull, warn_unused_result));
+/*********************************************************************//**
+Open a table from its database and table name, this is currently used by
+foreign constraint parser to get the referenced table.
+@return complete table name with database and table name, allocated from
+heap memory passed in */
+UNIV_INTERN
+char*
+dict_get_referenced_table(
+/*======================*/
+ const char* name, /*!< in: foreign key table name */
+ const char* database_name, /*!< in: table db name */
+ ulint database_name_len,/*!< in: db name length */
+ const char* table_name, /*!< in: table name */
+ ulint table_name_len, /*!< in: table name length */
+ dict_table_t** table, /*!< out: table object or NULL */
+ mem_heap_t* heap); /*!< in: heap memory */
+/*********************************************************************//**
+Frees a foreign key struct. */
+UNIV_INTERN
+void
+dict_foreign_free(
+/*==============*/
+ dict_foreign_t* foreign); /*!< in, own: foreign key struct */
+/*********************************************************************//**
+Finds the highest [number] for foreign key constraints of the table. Looks
+only at the >= 4.0.18-format id's, which are of the form
+databasename/tablename_ibfk_[number].
+@return highest number, 0 if table has no new format foreign key constraints */
+UNIV_INTERN
+ulint
+dict_table_get_highest_foreign_id(
+/*==============================*/
+ dict_table_t* table); /*!< in: table in the dictionary
+ memory cache */
/********************************************************************//**
Return the end of table name where we have removed dbname and '/'.
@return table name */
-
+UNIV_INTERN
const char*
dict_remove_db_name(
/*================*/
- const char* name); /*!< in: table name in the form
+ const char* name) /*!< in: table name in the form
dbname '/' tablename */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Returns a table object based on table id.
@return table, NULL if does not exist */
@@ -77,7 +116,11 @@ dict_table_t*
dict_table_open_on_id(
/*==================*/
table_id_t table_id, /*!< in: table id */
- ibool dict_locked); /*!< in: TRUE=data dictionary locked */
+ ibool dict_locked, /*!< in: TRUE=data dictionary locked */
+ ibool try_drop) /*!< in: TRUE=try to drop any orphan
+ indexes after an aborted online
+ index creation */
+ __attribute__((warn_unused_result));
/********************************************************************//**
Decrements the count of open handles to a table. */
UNIV_INTERN
@@ -85,7 +128,11 @@ void
dict_table_close(
/*=============*/
dict_table_t* table, /*!< in/out: table */
- ibool dict_locked); /*!< in: TRUE=data dictionary locked */
+ ibool dict_locked, /*!< in: TRUE=data dictionary locked */
+ ibool try_drop) /*!< in: TRUE=try to drop any orphan
+ indexes after an aborted online
+ index creation */
+ __attribute__((nonnull));
/**********************************************************************//**
Inits the data dictionary module. */
UNIV_INTERN
@@ -109,7 +156,8 @@ UNIV_INLINE
ulint
dict_col_get_mbminlen(
/*==================*/
- const dict_col_t* col); /*!< in: column */
+ const dict_col_t* col) /*!< in: column */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the maximum number of bytes per character.
@return maximum multi-byte char size, in bytes */
@@ -117,7 +165,8 @@ UNIV_INLINE
ulint
dict_col_get_mbmaxlen(
/*==================*/
- const dict_col_t* col); /*!< in: column */
+ const dict_col_t* col) /*!< in: column */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets the minimum and maximum number of bytes per character. */
UNIV_INLINE
@@ -127,8 +176,9 @@ dict_col_set_mbminmaxlen(
dict_col_t* col, /*!< in/out: column */
ulint mbminlen, /*!< in: minimum multi-byte
character size, in bytes */
- ulint mbmaxlen); /*!< in: minimum multi-byte
+ ulint mbmaxlen) /*!< in: minimum multi-byte
character size, in bytes */
+ __attribute__((nonnull));
/*********************************************************************//**
Gets the column data type. */
UNIV_INLINE
@@ -136,7 +186,8 @@ void
dict_col_copy_type(
/*===============*/
const dict_col_t* col, /*!< in: column */
- dtype_t* type); /*!< out: data type */
+ dtype_t* type) /*!< out: data type */
+ __attribute__((nonnull));
/**********************************************************************//**
Determine bytes of column prefix to be stored in the undo log. Please
note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
@@ -147,9 +198,9 @@ ulint
dict_max_field_len_store_undo(
/*==========================*/
dict_table_t* table, /*!< in: table */
- const dict_col_t* col); /*!< in: column which index prefix
+ const dict_col_t* col) /*!< in: column which index prefix
is based on */
-
+ __attribute__((nonnull, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/*********************************************************************//**
@@ -160,7 +211,8 @@ ibool
dict_col_type_assert_equal(
/*=======================*/
const dict_col_t* col, /*!< in: column */
- const dtype_t* type); /*!< in: data type */
+ const dtype_t* type) /*!< in: data type */
+ __attribute__((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
@@ -170,7 +222,8 @@ UNIV_INLINE
ulint
dict_col_get_min_size(
/*==================*/
- const dict_col_t* col); /*!< in: column */
+ const dict_col_t* col) /*!< in: column */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Returns the maximum size of the column.
@return maximum size */
@@ -178,7 +231,8 @@ UNIV_INLINE
ulint
dict_col_get_max_size(
/*==================*/
- const dict_col_t* col); /*!< in: column */
+ const dict_col_t* col) /*!< in: column */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Returns the size of a fixed size column, 0 if not a fixed size column.
@return fixed size, or 0 */
@@ -187,7 +241,8 @@ ulint
dict_col_get_fixed_size(
/*====================*/
const dict_col_t* col, /*!< in: column */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
For fixed length types it is the fixed length of the type, otherwise 0.
@@ -197,8 +252,8 @@ ulint
dict_col_get_sql_null_size(
/*=======================*/
const dict_col_t* col, /*!< in: column */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-
+ ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the column number.
@return col->ind, table column position (starting from 0) */
@@ -206,7 +261,8 @@ UNIV_INLINE
ulint
dict_col_get_no(
/*============*/
- const dict_col_t* col); /*!< in: column */
+ const dict_col_t* col) /*!< in: column */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the column position in the clustered index. */
UNIV_INLINE
@@ -214,7 +270,8 @@ ulint
dict_col_get_clust_pos(
/*===================*/
const dict_col_t* col, /*!< in: table column */
- const dict_index_t* clust_index); /*!< in: clustered index */
+ const dict_index_t* clust_index) /*!< in: clustered index */
+ __attribute__((nonnull, warn_unused_result));
/****************************************************************//**
If the given column name is reserved for InnoDB system columns, return
TRUE.
@@ -223,14 +280,16 @@ UNIV_INTERN
ibool
dict_col_name_is_reserved(
/*======================*/
- const char* name); /*!< in: column name */
+ const char* name) /*!< in: column name */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Acquire the autoinc lock. */
UNIV_INTERN
void
dict_table_autoinc_lock(
/*====================*/
- dict_table_t* table); /*!< in/out: table */
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
/********************************************************************//**
Unconditionally set the autoinc counter. */
UNIV_INTERN
@@ -238,7 +297,8 @@ void
dict_table_autoinc_initialize(
/*==========================*/
dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value); /*!< in: next value to assign to a row */
+ ib_uint64_t value) /*!< in: next value to assign to a row */
+ __attribute__((nonnull));
/********************************************************************//**
Reads the next autoinc value (== autoinc counter value), 0 if not yet
initialized.
@@ -247,7 +307,8 @@ UNIV_INTERN
ib_uint64_t
dict_table_autoinc_read(
/*====================*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Updates the autoinc counter if the value supplied is greater than the
current value. */
@@ -257,14 +318,16 @@ dict_table_autoinc_update_if_greater(
/*=================================*/
dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value); /*!< in: value which was assigned to a row */
+ ib_uint64_t value) /*!< in: value which was assigned to a row */
+ __attribute__((nonnull));
/********************************************************************//**
Release the autoinc lock. */
UNIV_INTERN
void
dict_table_autoinc_unlock(
/*======================*/
- dict_table_t* table); /*!< in/out: table */
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
Adds system columns to a table object. */
@@ -273,7 +336,8 @@ void
dict_table_add_system_columns(
/*==========================*/
dict_table_t* table, /*!< in/out: table */
- mem_heap_t* heap); /*!< in: temporary heap */
+ mem_heap_t* heap) /*!< in: temporary heap */
+ __attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Adds a table object to the dictionary cache. */
@@ -283,26 +347,30 @@ dict_table_add_to_cache(
/*====================*/
dict_table_t* table, /*!< in: table */
ibool can_be_evicted, /*!< in: TRUE if can be evicted*/
- mem_heap_t* heap); /*!< in: temporary heap */
+ mem_heap_t* heap) /*!< in: temporary heap */
+ __attribute__((nonnull));
/**********************************************************************//**
Removes a table object from the dictionary cache. */
UNIV_INTERN
void
dict_table_remove_from_cache(
/*=========================*/
- dict_table_t* table); /*!< in, own: table */
+ dict_table_t* table) /*!< in, own: table */
+ __attribute__((nonnull));
/**********************************************************************//**
Renames a table object.
@return TRUE if success */
UNIV_INTERN
-ibool
+dberr_t
dict_table_rename_in_cache(
/*=======================*/
dict_table_t* table, /*!< in/out: table */
const char* new_name, /*!< in: new name */
- ibool rename_also_foreigns);/*!< in: in ALTER TABLE we want
+ ibool rename_also_foreigns)
+ /*!< in: in ALTER TABLE we want
to preserve the original table name
in constraints which reference it */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Removes an index from the dictionary cache. */
UNIV_INTERN
@@ -310,7 +378,8 @@ void
dict_index_remove_from_cache(
/*=========================*/
dict_table_t* table, /*!< in/out: table */
- dict_index_t* index); /*!< in, own: index */
+ dict_index_t* index) /*!< in, own: index */
+ __attribute__((nonnull));
/**********************************************************************//**
Change the id of a table object in the dictionary cache. This is used in
DISCARD TABLESPACE. */
@@ -319,7 +388,16 @@ void
dict_table_change_id_in_cache(
/*==========================*/
dict_table_t* table, /*!< in/out: table object already in cache */
- table_id_t new_id);/*!< in: new id to set */
+ table_id_t new_id) /*!< in: new id to set */
+ __attribute__((nonnull));
+/**********************************************************************//**
+Removes a foreign constraint struct from the dictionary cache. */
+UNIV_INTERN
+void
+dict_foreign_remove_from_cache(
+/*===========================*/
+ dict_foreign_t* foreign) /*!< in, own: foreign constraint */
+ __attribute__((nonnull));
/**********************************************************************//**
Adds a foreign key constraint object to the dictionary cache. May free
the object if there already is an object with the same identifier in.
@@ -327,12 +405,13 @@ At least one of foreign table or referenced table must already be in
the dictionary cache!
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
dict_foreign_add_to_cache(
/*======================*/
dict_foreign_t* foreign, /*!< in, own: foreign key constraint */
- ibool check_charsets);/*!< in: TRUE=check charset
+ ibool check_charsets) /*!< in: TRUE=check charset
compatibility */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Check if the index is referenced by a foreign key, if TRUE return the
matching instance NULL otherwise.
@@ -343,7 +422,8 @@ dict_foreign_t*
dict_table_get_referenced_constraint(
/*=================================*/
dict_table_t* table, /*!< in: InnoDB table */
- dict_index_t* index); /*!< in: InnoDB index */
+ dict_index_t* index) /*!< in: InnoDB index */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Checks if a table is referenced by foreign keys.
@return TRUE if table is referenced by a foreign key */
@@ -351,17 +431,19 @@ UNIV_INTERN
ibool
dict_table_is_referenced_by_foreign_key(
/*====================================*/
- const dict_table_t* table); /*!< in: InnoDB table */
+ const dict_table_t* table) /*!< in: InnoDB table */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
-Replace the index in the foreign key list that matches this index's
-definition with an equivalent index. */
+Replace the index passed in with another equivalent index in the
+foreign key lists of the table. */
UNIV_INTERN
void
-dict_table_replace_index_in_foreign_list(
-/*=====================================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index, /*!< in: index to be replaced */
- const trx_t* trx); /*!< in: transaction handle */
+dict_foreign_replace_index(
+/*=======================*/
+ dict_table_t* table, /*!< in/out: table */
+ const dict_index_t* index, /*!< in: index to be replaced */
+ const trx_t* trx) /*!< in: transaction handle */
+ __attribute__((nonnull));
/**********************************************************************//**
Determines whether a string starts with the specified keyword.
@return TRUE if str starts with keyword */
@@ -369,9 +451,10 @@ UNIV_INTERN
ibool
dict_str_starts_with_keyword(
/*=========================*/
- void* mysql_thd, /*!< in: MySQL thread handle */
+ THD* thd, /*!< in: MySQL thread handle */
const char* str, /*!< in: string to scan for keyword */
- const char* keyword); /*!< in: keyword to look for */
+ const char* keyword) /*!< in: keyword to look for */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Checks if a index is defined for a foreign key constraint. Index is a part
of a foreign key constraint if the index is referenced by foreign key
@@ -383,7 +466,8 @@ dict_foreign_t*
dict_table_get_foreign_constraint(
/*==============================*/
dict_table_t* table, /*!< in: InnoDB table */
- dict_index_t* index); /*!< in: InnoDB index */
+ dict_index_t* index) /*!< in: InnoDB index */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Scans a table create SQL string and adds to the data dictionary
the foreign key constraints declared in the string. This function
@@ -393,7 +477,7 @@ bot participating tables. The indexes are allowed to contain more
fields than mentioned in the constraint.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
dict_create_foreign_constraints(
/*============================*/
trx_t* trx, /*!< in: transaction */
@@ -409,15 +493,16 @@ dict_create_foreign_constraints(
const char* name, /*!< in: table full name in the
normalized form
database_name/table_name */
- ibool reject_fks); /*!< in: if TRUE, fail with error
+ ibool reject_fks) /*!< in: if TRUE, fail with error
code DB_CANNOT_ADD_CONSTRAINT if
any foreign keys are found. */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
constraint id does not match */
UNIV_INTERN
-ulint
+dberr_t
dict_foreign_parse_drop_constraints(
/*================================*/
mem_heap_t* heap, /*!< in: heap from which we can
@@ -426,8 +511,9 @@ dict_foreign_parse_drop_constraints(
dict_table_t* table, /*!< in: table */
ulint* n, /*!< out: number of constraints
to drop */
- const char*** constraints_to_drop); /*!< out: id's of the
+ const char*** constraints_to_drop) /*!< out: id's of the
constraints to drop */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Returns a table object and increments its open handle count.
NOTE! This is a high-level function to be used mainly from outside the
@@ -439,43 +525,40 @@ dict_table_t*
dict_table_open_on_name(
/*====================*/
const char* table_name, /*!< in: table name */
- ibool dict_locked); /*!< in: TRUE=data dictionary locked */
-
-/**********************************************************************//**
-Returns a table object and increment its open handle count. Table
-statistics will not be updated if they are not initialized.
-Call this function when dropping a table.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_name_no_stats(
-/*=============================*/
- const char* table_name, /*!< in: table name */
ibool dict_locked, /*!< in: TRUE=data dictionary locked */
+ ibool try_drop, /*!< in: TRUE=try to drop any orphan
+ indexes after an aborted online
+ index creation */
dict_err_ignore_t
- ignore_err); /*!< in: error to be ignored when
+ ignore_err) /*!< in: error to be ignored when
loading the table */
-/**********************************************************************//**
-Find an index that is equivalent to the one passed in and is not marked
-for deletion.
-@return index equivalent to foreign->foreign_index, or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_foreign_find_equiv_index(
-/*==========================*/
- dict_foreign_t* foreign);/*!< in: foreign key */
-/**********************************************************************//**
-Returns an index object by matching on the name and column names and
-if more than one index matches return the index with the max id
+ __attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Tries to find an index whose first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
@return matching index, NULL if not found */
UNIV_INTERN
dict_index_t*
-dict_table_get_index_by_max_id(
-/*===========================*/
- dict_table_t* table, /*!< in: table */
- const char* name, /*!< in: the index name to find */
- const char** columns,/*!< in: array of column names */
- ulint n_cols);/*!< in: number of columns */
+dict_foreign_find_index(
+/*====================*/
+ const dict_table_t* table, /*!< in: table */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ const dict_index_t* types_idx,
+ /*!< in: NULL or an index
+ whose types the column types
+ must match */
+ ibool check_charsets,
+ /*!< in: whether to check
+ charsets. only has an effect
+ if types_idx != NULL */
+ ulint check_null)
+ /*!< in: nonzero if none of
+ the columns must be declared
+ NOT NULL */
+ __attribute__((nonnull(1,2), warn_unused_result));
/**********************************************************************//**
Returns a column's name.
@return column name. NOTE: not guaranteed to stay valid if table is
@@ -485,29 +568,16 @@ const char*
dict_table_get_col_name(
/*====================*/
const dict_table_t* table, /*!< in: table */
- ulint col_nr);/*!< in: column number */
-
+ ulint col_nr) /*!< in: column number */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
-Prints a table definition. */
+Prints a table data. */
UNIV_INTERN
void
dict_table_print(
/*=============*/
- dict_table_t* table); /*!< in: table */
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print_low(
-/*=================*/
- dict_table_t* table); /*!< in: table */
-/**********************************************************************//**
-Prints a table data when we know the table name. */
-UNIV_INTERN
-void
-dict_table_print_by_name(
-/*=====================*/
- const char* name); /*!< in: table name */
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
/**********************************************************************//**
Outputs info on foreign keys of a table. */
UNIV_INTERN
@@ -520,7 +590,8 @@ dict_print_info_on_foreign_keys(
of SHOW TABLE STATUS */
FILE* file, /*!< in: file where to print */
trx_t* trx, /*!< in: transaction */
- dict_table_t* table); /*!< in: table */
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
/**********************************************************************//**
Outputs info on a foreign key of a table in a format suitable for
CREATE TABLE. */
@@ -531,7 +602,8 @@ dict_print_info_on_foreign_key_in_create_format(
FILE* file, /*!< in: file where to print */
trx_t* trx, /*!< in: transaction */
dict_foreign_t* foreign, /*!< in: foreign key constraint */
- ibool add_newline); /*!< in: whether to add a newline */
+ ibool add_newline) /*!< in: whether to add a newline */
+ __attribute__((nonnull(1,3)));
/********************************************************************//**
Displays the names of the index and the table. */
UNIV_INTERN
@@ -539,8 +611,35 @@ void
dict_index_name_print(
/*==================*/
FILE* file, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction */
- const dict_index_t* index); /*!< in: index to print */
+ const trx_t* trx, /*!< in: transaction */
+ const dict_index_t* index) /*!< in: index to print */
+ __attribute__((nonnull(1,3)));
+/*********************************************************************//**
+Tries to find an index whose first fields are the columns in the array,
+in the same order and is not marked for deletion and is not the same
+as types_idx.
+@return matching index, NULL if not found */
+UNIV_INTERN
+bool
+dict_foreign_qualify_index(
+/*====================*/
+ const dict_table_t* table, /*!< in: table */
+ const char** columns,/*!< in: array of column names */
+ ulint n_cols, /*!< in: number of columns */
+ const dict_index_t* index, /*!< in: index to check */
+ const dict_index_t* types_idx,
+ /*!< in: NULL or an index
+ whose types the column types
+ must match */
+ ibool check_charsets,
+ /*!< in: whether to check
+ charsets. only has an effect
+ if types_idx != NULL */
+ ulint check_null)
+ /*!< in: nonzero if none of
+ the columns must be declared
+ NOT NULL */
+ __attribute__((nonnull(1,2), warn_unused_result));
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the first index on the table (the clustered index).
@@ -549,7 +648,17 @@ UNIV_INLINE
dict_index_t*
dict_table_get_first_index(
/*=======================*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Gets the last index on the table.
+@return index, NULL if none exists */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_last_index(
+/*=======================*/
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the next index on the table.
@return index, NULL if none left */
@@ -557,9 +666,11 @@ UNIV_INLINE
dict_index_t*
dict_table_get_next_index(
/*======================*/
- const dict_index_t* index); /*!< in: index */
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
#else /* UNIV_DEBUG */
# define dict_table_get_first_index(table) UT_LIST_GET_FIRST((table)->indexes)
+# define dict_table_get_last_index(table) UT_LIST_GET_LAST((table)->indexes)
# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
#endif /* UNIV_DEBUG */
#endif /* !UNIV_HOTBACKUP */
@@ -605,15 +716,6 @@ dict_index_is_ibuf(
const dict_index_t* index) /*!< in: index */
__attribute__((nonnull, pure, warn_unused_result));
/********************************************************************//**
-Check whether the index is an universal index tree.
-@return nonzero for universal tree, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_univ(
-/*===============*/
- const dict_index_t* index) /*!< in: index */
- __attribute__((nonnull, pure, warn_unused_result));
-/********************************************************************//**
Check whether the index is a secondary index or the insert buffer tree.
@return nonzero for insert buffer, zero for other indexes */
UNIV_INLINE
@@ -626,13 +728,14 @@ dict_index_is_sec_or_ibuf(
/************************************************************************
Gets the all the FTS indexes for the table. NOTE: must not be called for
tables which do not have an FTS-index. */
-
+UNIV_INTERN
ulint
dict_table_get_all_fts_indexes(
/*===========================*/
/* out: number of indexes collected */
dict_table_t* table, /* in: table */
- ib_vector_t* indexes);/* out: vector for collecting FTS indexes */
+ ib_vector_t* indexes)/* out: vector for collecting FTS indexes */
+ __attribute__((nonnull));
/********************************************************************//**
Gets the number of user-defined columns in a table in the dictionary
cache.
@@ -662,6 +765,35 @@ dict_table_get_n_cols(
/*==================*/
const dict_table_t* table) /*!< in: table */
__attribute__((nonnull, pure, warn_unused_result));
+/********************************************************************//**
+Gets the approximately estimated number of rows in the table.
+@return estimated number of rows */
+UNIV_INLINE
+ib_uint64_t
+dict_table_get_n_rows(
+/*==================*/
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Increment the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_inc(
+/*==================*/
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
+/********************************************************************//**
+Decrement the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_dec(
+/*==================*/
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth column of a table.
@@ -671,7 +803,8 @@ dict_col_t*
dict_table_get_nth_col(
/*===================*/
const dict_table_t* table, /*!< in: table */
- ulint pos); /*!< in: position of column */
+ ulint pos) /*!< in: position of column */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the given system column of a table.
@return pointer to column object */
@@ -680,7 +813,8 @@ dict_col_t*
dict_table_get_sys_col(
/*===================*/
const dict_table_t* table, /*!< in: table */
- ulint sys); /*!< in: DATA_ROW_ID, ... */
+ ulint sys) /*!< in: DATA_ROW_ID, ... */
+ __attribute__((nonnull, warn_unused_result));
#else /* UNIV_DEBUG */
#define dict_table_get_nth_col(table, pos) \
((table)->cols + (pos))
@@ -695,7 +829,8 @@ ulint
dict_table_get_sys_col_no(
/*======================*/
const dict_table_t* table, /*!< in: table */
- ulint sys); /*!< in: DATA_ROW_ID, ... */
+ ulint sys) /*!< in: DATA_ROW_ID, ... */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Returns the minimum data size of an index record.
@@ -704,7 +839,8 @@ UNIV_INLINE
ulint
dict_index_get_min_size(
/*====================*/
- const dict_index_t* index); /*!< in: index */
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Check whether the table uses the compact page format.
@@ -713,7 +849,8 @@ UNIV_INLINE
ibool
dict_table_is_comp(
/*===============*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Determine the file format of a table.
@return file format version */
@@ -721,7 +858,8 @@ UNIV_INLINE
ulint
dict_table_get_format(
/*==================*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Determine the file format from a dict_table_t::flags.
@return file format version */
@@ -729,7 +867,8 @@ UNIV_INLINE
ulint
dict_tf_get_format(
/*===============*/
- ulint flags); /*!< in: dict_table_t::flags */
+ ulint flags) /*!< in: dict_table_t::flags */
+ __attribute__((warn_unused_result));
/********************************************************************//**
Set the various values in a dict_table_t::flags pointer. */
UNIV_INLINE
@@ -738,7 +877,9 @@ dict_tf_set(
/*========*/
ulint* flags, /*!< in/out: table */
rec_format_t format, /*!< in: file format */
- ulint zip_ssize); /*!< in: zip shift size */
+ ulint zip_ssize, /*!< in: zip shift size */
+ bool remote_path) /*!< in: table uses DATA DIRECTORY */
+ __attribute__((nonnull));
/********************************************************************//**
Convert a 32 bit integer table flags to the 32 bit integer that is
written into the tablespace header at the offset FSP_SPACE_FLAGS and is
@@ -756,13 +897,6 @@ dict_tf_to_fsp_flags(
/*=================*/
ulint flags) /*!< in: dict_table_t::flags */
__attribute__((const));
-/********************************************************************/
-UNIV_INLINE
-ulint
-dict_tf_to_sys_tables_type(
-/*=======================*/
- ulint flags) /*!< in: dict_table_t::flags */
- __attribute__((const));
/********************************************************************//**
Extract the compressed page size from table flags.
@return compressed page size, or 0 if not compressed */
@@ -779,7 +913,8 @@ UNIV_INLINE
ulint
dict_table_zip_size(
/*================*/
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Obtain exclusive locks on all index trees of the table. This is to prevent
@@ -789,15 +924,16 @@ UNIV_INLINE
void
dict_table_x_lock_indexes(
/*======================*/
- dict_table_t* table); /*!< in: table */
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
/*********************************************************************//**
Release the exclusive locks on all index tree. */
UNIV_INLINE
void
dict_table_x_unlock_indexes(
/*========================*/
- dict_table_t* table); /*!< in: table */
-#endif /* !UNIV_HOTBACKUP */
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
/********************************************************************//**
Checks if a column is in the ordering columns of the clustered index of a
table. Column prefixes are treated like whole columns.
@@ -807,7 +943,8 @@ ibool
dict_table_col_in_clustered_key(
/*============================*/
const dict_table_t* table, /*!< in: table */
- ulint n); /*!< in: column number */
+ ulint n) /*!< in: column number */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Check if the table has an FTS index.
@return TRUE if table has an FTS index */
@@ -815,36 +952,8 @@ UNIV_INLINE
ibool
dict_table_has_fts_index(
/*=====================*/
- dict_table_t* table); /*!< in: table */
-/*******************************************************************//**
-Validate and return the table flags.
-@return Same as input after validating it as dict_table_t::flags.
-If there is an error, trigger assertion failure. */
-UNIV_INLINE
-ulint
-dict_tf_validate(
-/*=============*/
- ulint flags); /*!< in: table flags */
-/********************************************************************//**
-Validate a SYS_TABLES TYPE field and return it.
-@return Same as input after validating it as a SYS_TABLES TYPE field.
-If there is an error, return ULINT_UNDEFINED. */
-UNIV_INLINE
-ulint
-dict_sys_tables_type_validate(
-/*==========================*/
- ulint type, /*!< in: SYS_TABLES.TYPE */
- ulint n_cols); /*!< in: SYS_TABLES.N_COLS */
-/********************************************************************//**
-Determine the file format from dict_table_t::flags
-The low order bit will be zero for REDUNDANT and 1 for COMPACT. For any
-other row_format, file_format is > 0 and DICT_TF_COMPACT will also be set.
-@return file format version */
-UNIV_INLINE
-rec_format_t
-dict_tf_get_rec_format(
-/*===================*/
- ulint flags); /*!< in: dict_table_t::flags */
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Copies types of columns contained in table to tuple and sets all
fields of the tuple to the SQL NULL value. This function should
@@ -854,18 +963,20 @@ void
dict_table_copy_types(
/*==================*/
dtuple_t* tuple, /*!< in/out: data tuple */
- const dict_table_t* table); /*!< in: table */
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
/********************************************************************
Wait until all the background threads of the given table have exited, i.e.,
bg_threads == 0. Note: bg_threads_mutex must be reserved when
calling this. */
-
+UNIV_INTERN
void
dict_table_wait_for_bg_threads_to_exit(
/*===================================*/
dict_table_t* table, /* in: table */
- ulint delay); /* in: time in microseconds to wait between
+ ulint delay) /* in: time in microseconds to wait between
checks of bg_threads. */
+ __attribute__((nonnull));
/**********************************************************************//**
Looks for an index with the given id. NOTE that we do not reserve
the dictionary mutex: this function is for emergency purposes like
@@ -875,7 +986,8 @@ UNIV_INTERN
dict_index_t*
dict_index_find_on_id_low(
/*======================*/
- index_id_t id); /*!< in: index id */
+ index_id_t id) /*!< in: index id */
+ __attribute__((warn_unused_result));
/**********************************************************************//**
Make room in the table cache by evicting an unused table. The unused table
should not be part of FK relationship and currently not used in any user
@@ -891,16 +1003,17 @@ dict_make_room_in_cache(
Adds an index to the dictionary cache.
@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
UNIV_INTERN
-ulint
+dberr_t
dict_index_add_to_cache(
/*====================*/
dict_table_t* table, /*!< in: table on which the index is */
dict_index_t* index, /*!< in, own: index; NOTE! The index memory
object is freed in this function! */
ulint page_no,/*!< in: root page number of the index */
- ibool strict);/*!< in: TRUE=refuse to create the index
+ ibool strict) /*!< in: TRUE=refuse to create the index
if records could be too big to fit in
an B-tree page */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Removes an index from the dictionary cache. */
UNIV_INTERN
@@ -908,8 +1021,9 @@ void
dict_index_remove_from_cache(
/*=========================*/
dict_table_t* table, /*!< in/out: table */
- dict_index_t* index); /*!< in, own: index */
-
+ dict_index_t* index) /*!< in, own: index */
+ __attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Gets the number of fields in the internal representation of an index,
including fields added by the dictionary system.
@@ -918,9 +1032,10 @@ UNIV_INLINE
ulint
dict_index_get_n_fields(
/*====================*/
- const dict_index_t* index); /*!< in: an internal
+ const dict_index_t* index) /*!< in: an internal
representation of index (in
the dictionary cache) */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the number of fields in the internal representation of an index
that uniquely determine the position of an index entry in the index, if
@@ -931,8 +1046,9 @@ UNIV_INLINE
ulint
dict_index_get_n_unique(
/*====================*/
- const dict_index_t* index); /*!< in: an internal representation
+ const dict_index_t* index) /*!< in: an internal representation
of index (in the dictionary cache) */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the number of fields in the internal representation of an index
which uniquely determine the position of an index entry in the index, if
@@ -942,8 +1058,9 @@ UNIV_INLINE
ulint
dict_index_get_n_unique_in_tree(
/*============================*/
- const dict_index_t* index); /*!< in: an internal representation
+ const dict_index_t* index) /*!< in: an internal representation
of index (in the dictionary cache) */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the number of user-defined ordering fields in the index. In the internal
representation we add the row id to the ordering fields to make all indexes
@@ -954,8 +1071,9 @@ UNIV_INLINE
ulint
dict_index_get_n_ordering_defined_by_user(
/*======================================*/
- const dict_index_t* index); /*!< in: an internal representation
+ const dict_index_t* index) /*!< in: an internal representation
of index (in the dictionary cache) */
+ __attribute__((nonnull, warn_unused_result));
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth field of an index.
@@ -965,7 +1083,8 @@ dict_field_t*
dict_index_get_nth_field(
/*=====================*/
const dict_index_t* index, /*!< in: index */
- ulint pos); /*!< in: position of field */
+ ulint pos) /*!< in: position of field */
+ __attribute__((nonnull, warn_unused_result));
#else /* UNIV_DEBUG */
# define dict_index_get_nth_field(index, pos) ((index)->fields + (pos))
#endif /* UNIV_DEBUG */
@@ -977,7 +1096,8 @@ const dict_col_t*
dict_index_get_nth_col(
/*===================*/
const dict_index_t* index, /*!< in: index */
- ulint pos); /*!< in: position of the field */
+ ulint pos) /*!< in: position of the field */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Gets the column number of the nth field in an index.
@return column number */
@@ -986,7 +1106,8 @@ ulint
dict_index_get_nth_col_no(
/*======================*/
const dict_index_t* index, /*!< in: index */
- ulint pos); /*!< in: position of the field */
+ ulint pos) /*!< in: position of the field */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Looks for column n in an index.
@return position in internal representation of the index;
@@ -996,7 +1117,8 @@ ulint
dict_index_get_nth_col_pos(
/*=======================*/
const dict_index_t* index, /*!< in: index */
- ulint n); /*!< in: column number */
+ ulint n) /*!< in: column number */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Looks for column n in an index.
@return position in internal representation of the index;
@@ -1007,8 +1129,9 @@ dict_index_get_nth_col_or_prefix_pos(
/*=================================*/
const dict_index_t* index, /*!< in: index */
ulint n, /*!< in: column number */
- ibool inc_prefix); /*!< in: TRUE=consider
+ ibool inc_prefix) /*!< in: TRUE=consider
column prefixes too */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Returns TRUE if the index contains a column or a prefix of that column.
@return TRUE if contains the column or its prefix */
@@ -1017,7 +1140,8 @@ ibool
dict_index_contains_col_or_prefix(
/*==============================*/
const dict_index_t* index, /*!< in: index */
- ulint n); /*!< in: column number */
+ ulint n) /*!< in: column number */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Looks for a matching field in an index. The column has to be the same. The
column in index must be complete, or must contain a prefix longer than the
@@ -1031,7 +1155,8 @@ dict_index_get_nth_field_pos(
/*=========================*/
const dict_index_t* index, /*!< in: index from which to search */
const dict_index_t* index2, /*!< in: index */
- ulint n); /*!< in: field number in index2 */
+ ulint n) /*!< in: field number in index2 */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Looks for column n position in the clustered index.
@return position in internal representation of the clustered index */
@@ -1040,7 +1165,8 @@ ulint
dict_table_get_nth_col_pos(
/*=======================*/
const dict_table_t* table, /*!< in: table */
- ulint n); /*!< in: column number */
+ ulint n) /*!< in: column number */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Returns the position of a system column in an index.
@return position, ULINT_UNDEFINED if not contained */
@@ -1049,7 +1175,8 @@ ulint
dict_index_get_sys_col_pos(
/*=======================*/
const dict_index_t* index, /*!< in: index */
- ulint type); /*!< in: DATA_ROW_ID, ... */
+ ulint type) /*!< in: DATA_ROW_ID, ... */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Adds a column to index. */
UNIV_INTERN
@@ -1059,7 +1186,8 @@ dict_index_add_col(
dict_index_t* index, /*!< in/out: index */
const dict_table_t* table, /*!< in: table */
dict_col_t* col, /*!< in: column */
- ulint prefix_len); /*!< in: column prefix length */
+ ulint prefix_len) /*!< in: column prefix length */
+ __attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Copies types of fields contained in index to tuple. */
@@ -1069,8 +1197,9 @@ dict_index_copy_types(
/*==================*/
dtuple_t* tuple, /*!< in/out: data tuple */
const dict_index_t* index, /*!< in: index */
- ulint n_fields); /*!< in: number of
+ ulint n_fields) /*!< in: number of
field types to copy */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Gets the field column.
@@ -1079,7 +1208,8 @@ UNIV_INLINE
const dict_col_t*
dict_field_get_col(
/*===============*/
- const dict_field_t* field); /*!< in: index field */
+ const dict_field_t* field) /*!< in: index field */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Returns an index object if it is found in the dictionary cache.
@@ -1089,7 +1219,8 @@ UNIV_INTERN
dict_index_t*
dict_index_get_if_in_cache_low(
/*===========================*/
- index_id_t index_id); /*!< in: index id */
+ index_id_t index_id) /*!< in: index id */
+ __attribute__((warn_unused_result));
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//**
Returns an index object if it is found in the dictionary cache.
@@ -1098,7 +1229,8 @@ UNIV_INTERN
dict_index_t*
dict_index_get_if_in_cache(
/*=======================*/
- index_id_t index_id); /*!< in: index id */
+ index_id_t index_id) /*!< in: index id */
+ __attribute__((warn_unused_result));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG
/**********************************************************************//**
@@ -1110,7 +1242,17 @@ ibool
dict_index_check_search_tuple(
/*==========================*/
const dict_index_t* index, /*!< in: index tree */
- const dtuple_t* tuple); /*!< in: tuple used in a search */
+ const dtuple_t* tuple) /*!< in: tuple used in a search */
+ __attribute__((nonnull, warn_unused_result));
+/** Whether and when to allow temporary index names */
+enum check_name {
+ /** Require all indexes to be complete. */
+ CHECK_ALL_COMPLETE,
+ /** Allow aborted online index creation. */
+ CHECK_ABORTED_OK,
+ /** Allow partial indexes to exist. */
+ CHECK_PARTIAL_OK
+};
/**********************************************************************//**
Check for duplicate index entries in a table [using the index name] */
UNIV_INTERN
@@ -1119,8 +1261,9 @@ dict_table_check_for_dup_indexes(
/*=============================*/
const dict_table_t* table, /*!< in: Check for dup indexes
in this table */
- ibool tmp_ok);/*!< in: TRUE=allow temporary
- index names */
+ enum check_name check) /*!< in: whether and when to allow
+ temporary index names */
+ __attribute__((nonnull));
#endif /* UNIV_DEBUG */
/**********************************************************************//**
Builds a node pointer out of a physical record and a page number.
@@ -1136,8 +1279,9 @@ dict_index_build_node_ptr(
pointer */
mem_heap_t* heap, /*!< in: memory heap where pointer
created */
- ulint level); /*!< in: level of rec in tree:
+ ulint level) /*!< in: level of rec in tree:
0 means leaf level */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Copies an initial segment of a physical record, long enough to specify an
index entry uniquely.
@@ -1152,7 +1296,8 @@ dict_index_copy_rec_order_prefix(
ulint* n_fields,/*!< out: number of fields copied */
byte** buf, /*!< in/out: memory buffer for the
copied prefix, or NULL */
- ulint* buf_size);/*!< in/out: buffer size */
+ ulint* buf_size)/*!< in/out: buffer size */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Builds a typed data tuple out of a physical record.
@return own: data tuple */
@@ -1163,7 +1308,8 @@ dict_index_build_data_tuple(
dict_index_t* index, /*!< in: index */
rec_t* rec, /*!< in: record for which to build data tuple */
ulint n_fields,/*!< in: number of data fields */
- mem_heap_t* heap); /*!< in: memory heap where tuple created */
+ mem_heap_t* heap) /*!< in: memory heap where tuple created */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the space id of the root of the index tree.
@return space id */
@@ -1171,7 +1317,8 @@ UNIV_INLINE
ulint
dict_index_get_space(
/*=================*/
- const dict_index_t* index); /*!< in: index */
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets the space id of the root of the index tree. */
UNIV_INLINE
@@ -1179,7 +1326,8 @@ void
dict_index_set_space(
/*=================*/
dict_index_t* index, /*!< in/out: index */
- ulint space); /*!< in: space id */
+ ulint space) /*!< in: space id */
+ __attribute__((nonnull));
/*********************************************************************//**
Gets the page number of the root of the index tree.
@return page number */
@@ -1187,7 +1335,8 @@ UNIV_INLINE
ulint
dict_index_get_page(
/*================*/
- const dict_index_t* tree); /*!< in: index */
+ const dict_index_t* tree) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the read-write lock of the index tree.
@return read-write lock */
@@ -1195,7 +1344,8 @@ UNIV_INLINE
rw_lock_t*
dict_index_get_lock(
/*================*/
- dict_index_t* index); /*!< in: index */
+ dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Returns free space reserved for future updates of records. This is
relevant only in the case of many consecutive inserts, as updates
@@ -1205,13 +1355,48 @@ UNIV_INLINE
ulint
dict_index_get_space_reserve(void);
/*==============================*/
+
+/* Online index creation @{ */
+/********************************************************************//**
+Gets the status of online index creation.
+@return the status */
+UNIV_INLINE
+enum online_index_status
+dict_index_get_online_status(
+/*=========================*/
+ const dict_index_t* index) /*!< in: secondary index */
+ __attribute__((nonnull, warn_unused_result));
+/********************************************************************//**
+Sets the status of online index creation. */
+UNIV_INLINE
+void
+dict_index_set_online_status(
+/*=========================*/
+ dict_index_t* index, /*!< in/out: index */
+ enum online_index_status status) /*!< in: status */
+ __attribute__((nonnull));
+/********************************************************************//**
+Determines if a secondary index is being or has been created online,
+or if the table is being rebuilt online, allowing concurrent modifications
+to the table.
+@retval true if the index is being or has been built online, or
+if this is a clustered index and the table is being or has been rebuilt online
+@retval false if the index has been created or the table has been
+rebuilt completely */
+UNIV_INLINE
+bool
+dict_index_is_online_ddl(
+/*=====================*/
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Calculates the minimum record length in an index. */
UNIV_INTERN
ulint
dict_index_calc_min_rec_len(
/*========================*/
- const dict_index_t* index); /*!< in: index */
+ const dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Reserves the dictionary system mutex for MySQL. */
UNIV_INTERN
@@ -1233,8 +1418,9 @@ void
dict_table_stats_lock(
/*==================*/
const dict_table_t* table, /*!< in: table */
- ulint latch_mode); /*!< in: RW_S_LATCH or
+ ulint latch_mode) /*!< in: RW_S_LATCH or
RW_X_LATCH */
+ __attribute__((nonnull));
/**********************************************************************//**
Unlock the latch that has been locked by dict_table_stats_lock() */
UNIV_INTERN
@@ -1242,8 +1428,9 @@ void
dict_table_stats_unlock(
/*====================*/
const dict_table_t* table, /*!< in: table */
- ulint latch_mode); /*!< in: RW_S_LATCH or
+ ulint latch_mode) /*!< in: RW_S_LATCH or
RW_X_LATCH */
+ __attribute__((nonnull));
/********************************************************************//**
Checks if the database name in two table names is the same.
@return TRUE if same db name */
@@ -1253,8 +1440,9 @@ dict_tables_have_same_db(
/*=====================*/
const char* name1, /*!< in: table name in the form
dbname '/' tablename */
- const char* name2); /*!< in: table name in the form
+ const char* name2) /*!< in: table name in the form
dbname '/' tablename */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Removes an index from the cache */
UNIV_INTERN
@@ -1262,7 +1450,8 @@ void
dict_index_remove_from_cache(
/*=========================*/
dict_table_t* table, /*!< in/out: table */
- dict_index_t* index); /*!< in, own: index */
+ dict_index_t* index) /*!< in, own: index */
+ __attribute__((nonnull));
/**********************************************************************//**
Get index by name
@return index, NULL if does not exist */
@@ -1271,7 +1460,8 @@ dict_index_t*
dict_table_get_index_on_name(
/*=========================*/
dict_table_t* table, /*!< in: table */
- const char* name); /*!< in: name of the index to find */
+ const char* name) /*!< in: name of the index to find */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
In case there is more than one index with the same name return the index
with the min(id).
@@ -1281,7 +1471,8 @@ dict_index_t*
dict_table_get_index_on_name_and_min_id(
/*====================================*/
dict_table_t* table, /*!< in: table */
- const char* name); /*!< in: name of the index to find */
+ const char* name) /*!< in: name of the index to find */
+ __attribute__((nonnull, warn_unused_result));
/***************************************************************
Check whether a column exists in an FTS index. */
UNIV_INLINE
@@ -1291,32 +1482,42 @@ dict_table_is_fts_column(
/* out: ULINT_UNDEFINED if no match else
the offset within the vector */
ib_vector_t* indexes,/* in: vector containing only FTS indexes */
- ulint col_no);/* in: col number to search for */
+ ulint col_no) /* in: col number to search for */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Move a table to the non LRU end of the LRU list. */
UNIV_INTERN
void
dict_table_move_from_lru_to_non_lru(
/*================================*/
- dict_table_t* table); /*!< in: table to move from LRU to non-LRU */
+ dict_table_t* table) /*!< in: table to move from LRU to non-LRU */
+ __attribute__((nonnull));
/**********************************************************************//**
Move a table to the LRU list from the non-LRU list. */
UNIV_INTERN
void
dict_table_move_from_non_lru_to_lru(
/*================================*/
- dict_table_t* table); /*!< in: table to move from non-LRU to LRU */
+ dict_table_t* table) /*!< in: table to move from non-LRU to LRU */
+ __attribute__((nonnull));
/**********************************************************************//**
Move to the most recently used segment of the LRU list. */
UNIV_INTERN
void
dict_move_to_mru(
/*=============*/
- dict_table_t* table); /*!< in: table to move to MRU */
+ dict_table_t* table) /*!< in: table to move to MRU */
+ __attribute__((nonnull));
+
+/** Maximum number of columns in a foreign key constraint. Please Note MySQL
+has a much lower limit on the number of columns allowed in a foreign key
+constraint */
+#define MAX_NUM_FK_COLUMNS 500
+
/* Buffers for storing detailed information about the latest foreign key
and unique key errors */
extern FILE* dict_foreign_err_file;
-extern mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */
+extern ib_mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */
/** the dictionary system */
extern dict_sys_t* dict_sys;
@@ -1324,8 +1525,8 @@ extern dict_sys_t* dict_sys;
extern rw_lock_t dict_operation_lock;
/* Dictionary system struct */
-struct dict_sys_struct{
- mutex_t mutex; /*!< mutex protecting the data
+struct dict_sys_t{
+ ib_mutex_t mutex; /*!< mutex protecting the data
dictionary; protects also the
disk-based dictionary system tables;
this mutex serializes CREATE TABLE
@@ -1376,7 +1577,7 @@ dict_ind_init(void);
/* This struct is used to specify the name and type that a column must
have when checking a table's schema. */
-struct dict_col_meta_struct {
+struct dict_col_meta_t {
const char* name; /* column name */
ulint mtype; /* required column main type */
ulint prtype_mask; /* required column precise type mask;
@@ -1385,12 +1586,11 @@ struct dict_col_meta_struct {
in the column's prtype */
ulint len; /* required column length */
};
-typedef struct dict_col_meta_struct dict_col_meta_t;
/* This struct is used for checking whether a given table exists and
whether it has a predefined schema (number of columns and columns names
and types) */
-struct dict_table_schema_struct {
+struct dict_table_schema_t {
const char* table_name; /* the name of the table whose
structure we are checking */
ulint n_cols; /* the number of columns the
@@ -1398,8 +1598,15 @@ struct dict_table_schema_struct {
dict_col_meta_t* columns; /* metadata for the columns;
this array has n_cols
elements */
+ ulint n_foreign; /* number of foreign keys this
+ table has, pointing to other
+ tables (where this table is
+ FK child) */
+ ulint n_referenced; /* number of foreign keys other
+ tables have, pointing to this
+ table (where this table is
+ parent) */
};
-typedef struct dict_table_schema_struct dict_table_schema_t;
/* @} */
/*********************************************************************//**
@@ -1410,7 +1617,7 @@ The caller must own the dictionary mutex.
dict_table_schema_check() @{
@return DB_SUCCESS if the table exists and contains the necessary columns */
UNIV_INTERN
-enum db_err
+dberr_t
dict_table_schema_check(
/*====================*/
dict_table_schema_t* req_schema, /*!< in/out: required table
@@ -1419,9 +1626,27 @@ dict_table_schema_check(
message if != DB_SUCCESS and
!= DB_TABLE_NOT_FOUND is
returned */
- size_t errstr_sz); /*!< in: errstr size */
+ size_t errstr_sz) /*!< in: errstr size */
+ __attribute__((nonnull, warn_unused_result));
/* @} */
+/*********************************************************************//**
+Converts a database and table name from filesystem encoding
+(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
+strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
+at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
+UNIV_INTERN
+void
+dict_fs2utf8(
+/*=========*/
+ const char* db_and_table, /*!< in: database and table names,
+ e.g. d@i1b/a@q1b@1Kc */
+ char* db_utf8, /*!< out: database name, e.g. dцb */
+ size_t db_utf8_size, /*!< in: dbname_utf8 size */
+ char* table_utf8, /*!< out: table name, e.g. aюbØc */
+ size_t table_utf8_size)/*!< in: table_utf8 size */
+ __attribute__((nonnull));
+
/**********************************************************************//**
Closes the data dictionary module. */
UNIV_INTERN
@@ -1437,7 +1662,7 @@ ulint
dict_table_is_corrupted(
/*====================*/
const dict_table_t* table) /*!< in: table */
- __attribute__((nonnull, pure, warn_unused_result));
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************************//**
Check whether the index is corrupted.
@@ -1447,7 +1672,7 @@ ulint
dict_index_is_corrupted(
/*====================*/
const dict_index_t* index) /*!< in: index */
- __attribute__((nonnull, pure, warn_unused_result));
+ __attribute__((nonnull, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
@@ -1457,7 +1682,9 @@ UNIV_INTERN
void
dict_set_corrupted(
/*===============*/
- dict_index_t* index) /*!< in/out: index */
+ dict_index_t* index, /*!< in/out: index */
+ trx_t* trx, /*!< in/out: transaction */
+ const char* ctx) /*!< in: context */
UNIV_COLD __attribute__((nonnull));
/**********************************************************************//**
@@ -1469,7 +1696,8 @@ void
dict_set_corrupted_index_cache_only(
/*================================*/
dict_index_t* index, /*!< in/out: index */
- dict_table_t* table); /*!< in/out: table */
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
/**********************************************************************//**
Flags a table with specified space_id corrupted in the table dictionary
@@ -1481,6 +1709,76 @@ dict_set_corrupted_by_space(
/*========================*/
ulint space_id); /*!< in: space ID */
+/********************************************************************//**
+Validate the table flags.
+@return true if valid. */
+UNIV_INLINE
+bool
+dict_tf_is_valid(
+/*=============*/
+ ulint flags) /*!< in: table flags */
+ __attribute__((warn_unused_result));
+
+/********************************************************************//**
+Check if the tablespace for the table has been discarded.
+@return true if the tablespace has been discarded. */
+UNIV_INLINE
+bool
+dict_table_is_discarded(
+/*====================*/
+ const dict_table_t* table) /*!< in: table to check */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+/********************************************************************//**
+Check if it is a temporary table.
+@return true if temporary table flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_temporary(
+/*====================*/
+ const dict_table_t* table) /*!< in: table to check */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+This function should be called whenever a page is successfully
+compressed. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_success(
+/*===================*/
+ dict_index_t* index) /*!< in/out: index to be updated. */
+ __attribute__((nonnull));
+/*********************************************************************//**
+This function should be called whenever a page compression attempt
+fails. Updates the compression padding information. */
+UNIV_INTERN
+void
+dict_index_zip_failure(
+/*===================*/
+ dict_index_t* index) /*!< in/out: index to be updated. */
+ __attribute__((nonnull));
+/*********************************************************************//**
+Return the optimal page size, for which page will likely compress.
+@return page size beyond which page may not compress*/
+UNIV_INTERN
+ulint
+dict_index_zip_pad_optimal_page_size(
+/*=================================*/
+ dict_index_t* index) /*!< in: index for which page size
+ is requested */
+ __attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
+Convert table flag to row format string.
+@return row format name */
+UNIV_INTERN
+const char*
+dict_tf_to_row_format_string(
+/*=========================*/
+ ulint table_flag); /*!< in: row format setting */
+
+#endif /* !UNIV_HOTBACKUP */
+
#ifndef UNIV_NONINL
#include "dict0dict.ic"
#endif
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index f6585ea8205..83953c9325a 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -29,6 +29,7 @@ Created 1/8/1996 Heikki Tuuri
#include "rem0types.h"
#include "fsp0fsp.h"
#include "srv0srv.h"
+#include "sync0rw.h" /* RW_S_LATCH */
/*********************************************************************//**
Gets the minimum number of bytes per character.
@@ -223,6 +224,22 @@ dict_table_get_first_index(
}
/********************************************************************//**
+Gets the last index on the table.
+@return index, NULL if none exists */
+UNIV_INLINE
+dict_index_t*
+dict_table_get_last_index(
+/*=======================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ ut_ad(table);
+ ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+ return(UT_LIST_GET_LAST((const_cast<dict_table_t*>(table))
+ ->indexes));
+}
+
+/********************************************************************//**
Gets the next index on the table.
@return index, NULL if none left */
UNIV_INLINE
@@ -365,6 +382,56 @@ dict_table_get_n_cols(
return(table->n_cols);
}
+/********************************************************************//**
+Gets the approximately estimated number of rows in the table.
+@return estimated number of rows */
+UNIV_INLINE
+ib_uint64_t
+dict_table_get_n_rows(
+/*==================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ ut_ad(table->stat_initialized);
+
+ return(table->stat_n_rows);
+}
+
+/********************************************************************//**
+Increment the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_inc(
+/*==================*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ if (table->stat_initialized) {
+ ib_uint64_t n_rows = table->stat_n_rows;
+ if (n_rows < 0xFFFFFFFFFFFFFFFFULL) {
+ table->stat_n_rows = n_rows + 1;
+ }
+ }
+}
+
+/********************************************************************//**
+Decrement the number of rows in the table by one.
+Notice that this operation is not protected by any latch, the number is
+approximate. */
+UNIV_INLINE
+void
+dict_table_n_rows_dec(
+/*==================*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ if (table->stat_initialized) {
+ ib_uint64_t n_rows = table->stat_n_rows;
+ if (n_rows > 0) {
+ table->stat_n_rows = n_rows - 1;
+ }
+ }
+}
+
#ifdef UNIV_DEBUG
/********************************************************************//**
Gets the nth column of a table.
@@ -458,12 +525,11 @@ dict_table_has_fts_index(
}
/********************************************************************//**
-Validate and return the table flags.
-@return Same as input after validating it as dict_table_t::flags.
-If there is an error, trigger assertion failure. */
+Validate the table flags.
+@return true if valid. */
UNIV_INLINE
-ulint
-dict_tf_validate(
+bool
+dict_tf_is_valid(
/*=============*/
ulint flags) /*!< in: table flags */
{
@@ -473,31 +539,43 @@ dict_tf_validate(
ulint unused = DICT_TF_GET_UNUSED(flags);
/* Make sure there are no bits that we do not know about. */
- ut_a(unused == 0);
+ if (unused != 0) {
- if (atomic_blobs) {
+ return(false);
+
+ } else if (atomic_blobs) {
/* Barracuda row formats COMPRESSED and DYNAMIC build on
the page structure introduced for the COMPACT row format
by allowing keys in secondary indexes to be made from
data stored off-page in the clustered index. */
- ut_a(compact);
- } else {
+
+ if (!compact) {
+ return(false);
+ }
+
+ } else if (zip_ssize) {
+
/* Antelope does not support COMPRESSED row format. */
- ut_a(!zip_ssize);
+ return(false);
}
if (zip_ssize) {
+
/* COMPRESSED row format must have compact and atomic_blobs
- bits set. */
- ut_a(compact);
- ut_a(atomic_blobs);
+ bits set and validate the number is within allowed range. */
- /* Validate the number is within allowed range. */
- ut_a(zip_ssize <= PAGE_ZIP_SSIZE_MAX);
+ if (!compact
+ || !atomic_blobs
+ || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+
+ return(false);
+ }
}
- /* Return the flags sent if we did not crash. */
- return(flags);
+ /* CREATE TABLE ... DATA DIRECTORY is supported for any row format,
+ so the DATA_DIR flag is compatible with all other table flags. */
+
+ return(true);
}
/********************************************************************//**
@@ -517,9 +595,7 @@ dict_sys_tables_type_validate(
ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
ulint unused = DICT_TF_GET_UNUSED(type);
- /* If the format is UNIV_FORMAT_A, table->flags == 0, but
- SYS_TABLES.TYPE == 1, which is defined as SYS_TABLE_TYPE_ANTELOPE.
- The low order bit of SYS_TABLES.TYPE is always set to 1.
+ /* The low order bit of SYS_TABLES.TYPE is always set to 1.
If the format is UNIV_FORMAT_B or higher, this field is the same
as dict_table_t::flags. Zero is not allowed here. */
if (!low_order_bit) {
@@ -527,12 +603,9 @@ dict_sys_tables_type_validate(
}
if (redundant) {
- /* This is Redundant row format, only the first bit
- should be set in SYS_TABLES.TYPE */
- if (type != SYS_TABLE_TYPE_ANTELOPE) {
+ if (zip_ssize || atomic_blobs) {
return(ULINT_UNDEFINED);
}
- return(DICT_TF_REDUNDANT);
}
/* Make sure there are no bits that we do not know about. */
@@ -569,6 +642,11 @@ dict_sys_tables_type_validate(
}
}
+ /* There is nothing to validate for the data_dir field.
+ CREATE TABLE ... DATA DIRECTORY is supported for any row
+ format, so the DATA_DIR flag is compatible with any other
+ table flags. However, it is not used with TEMPORARY tables.*/
+
/* Return the validated SYS_TABLES.TYPE. */
return(type);
}
@@ -584,7 +662,7 @@ dict_tf_get_rec_format(
/*===================*/
ulint flags) /*!< in: dict_table_t::flags */
{
- dict_tf_validate(flags);
+ ut_a(dict_tf_is_valid(flags));
if (!DICT_TF_GET_COMPACT(flags)) {
return(REC_FORMAT_REDUNDANT);
@@ -640,7 +718,8 @@ dict_tf_set(
/*========*/
ulint* flags, /*!< in/out: table flags */
rec_format_t format, /*!< in: file format */
- ulint zip_ssize) /*!< in: zip shift size */
+ ulint zip_ssize, /*!< in: zip shift size */
+ bool use_data_dir) /*!< in: table uses DATA DIRECTORY */
{
switch (format) {
case REC_FORMAT_REDUNDANT:
@@ -662,6 +741,10 @@ dict_tf_set(
ut_ad(zip_ssize == 0);
break;
}
+
+ if (use_data_dir) {
+ *flags |= (1 << DICT_TF_POS_DATA_DIR);
+ }
}
/********************************************************************//**
@@ -679,15 +762,61 @@ UNIV_INLINE
ulint
dict_tf_to_fsp_flags(
/*=================*/
- ulint flags) /*!< in: dict_table_t::flags */
+ ulint table_flags) /*!< in: dict_table_t::flags */
{
+ ulint fsp_flags;
+
+ DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
+ return(ULINT_UNDEFINED););
+
/* Adjust bit zero. */
- flags = (flags == DICT_TF_COMPACT) ? 0 : flags;
+ fsp_flags = DICT_TF_HAS_ATOMIC_BLOBS(table_flags) ? 1 : 0;
+
+ /* ZIP_SSIZE and ATOMIC_BLOBS are at the same position. */
+ fsp_flags |= table_flags & DICT_TF_MASK_ZIP_SSIZE;
+ fsp_flags |= table_flags & DICT_TF_MASK_ATOMIC_BLOBS;
/* In addition, tablespace flags also contain the page size. */
- flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE);
+ fsp_flags |= fsp_flags_set_page_size(fsp_flags, UNIV_PAGE_SIZE);
+
+ /* The DATA_DIR flag is in a different position in fsp_flag */
+ fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags)
+ ? FSP_FLAGS_MASK_DATA_DIR : 0;
- return(fsp_flags_validate(flags));
+ ut_a(fsp_flags_is_valid(fsp_flags));
+
+ return(fsp_flags);
+}
+
+/********************************************************************//**
+Convert a 32 bit integer from SYS_TABLES.TYPE to dict_table_t::flags
+The following chart shows the translation of the low order bit.
+Other bits are the same.
+========================= Low order bit ==========================
+ | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
+SYS_TABLES.TYPE | 1 | 1 | 1
+dict_table_t::flags | 0 | 1 | 1
+==================================================================
+@return ulint containing SYS_TABLES.TYPE */
+UNIV_INLINE
+ulint
+dict_sys_tables_type_to_tf(
+/*=======================*/
+ ulint type, /*!< in: SYS_TABLES.TYPE field */
+ ulint n_cols) /*!< in: SYS_TABLES.N_COLS field */
+{
+ ulint flags;
+ ulint redundant = !(n_cols & DICT_N_COLS_COMPACT);
+
+ /* Adjust bit zero. */
+ flags = redundant ? 0 : 1;
+
+ /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+ flags |= type & (DICT_TF_MASK_ZIP_SSIZE
+ | DICT_TF_MASK_ATOMIC_BLOBS
+ | DICT_TF_MASK_DATA_DIR);
+
+ return(flags);
}
/********************************************************************//**
@@ -706,13 +835,19 @@ dict_tf_to_sys_tables_type(
/*=======================*/
ulint flags) /*!< in: dict_table_t::flags */
{
- if (!DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
- ut_a(flags == DICT_TF_REDUNDANT
- || flags == DICT_TF_COMPACT);
- return(SYS_TABLE_TYPE_ANTELOPE);
- }
+ ulint type;
+
+ ut_a(dict_tf_is_valid(flags));
+
+ /* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */
+ type = 1;
+
+ /* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+ type |= flags & (DICT_TF_MASK_ZIP_SSIZE
+ | DICT_TF_MASK_ATOMIC_BLOBS
+ | DICT_TF_MASK_DATA_DIR);
- return(dict_tf_validate(flags));
+ return(type);
}
/********************************************************************//**
@@ -1064,6 +1199,103 @@ dict_index_get_space_reserve(void)
return(UNIV_PAGE_SIZE / 16);
}
+/********************************************************************//**
+Gets the status of online index creation.
+@return the status */
+UNIV_INLINE
+enum online_index_status
+dict_index_get_online_status(
+/*=========================*/
+ const dict_index_t* index) /*!< in: secondary index */
+{
+ enum online_index_status status;
+
+ status = (enum online_index_status) index->online_status;
+
+ /* Without the index->lock protection, the online
+ status can change from ONLINE_INDEX_CREATION to
+ ONLINE_INDEX_COMPLETE (or ONLINE_INDEX_ABORTED) in
+ row_log_apply() once log application is done. So to make
+ sure the status is ONLINE_INDEX_CREATION or ONLINE_INDEX_COMPLETE
+ you should always do the recheck after acquiring index->lock */
+
+#ifdef UNIV_DEBUG
+ switch (status) {
+ case ONLINE_INDEX_COMPLETE:
+ case ONLINE_INDEX_CREATION:
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ return(status);
+ }
+ ut_error;
+#endif /* UNIV_DEBUG */
+ return(status);
+}
+
+/********************************************************************//**
+Sets the status of online index creation. */
+UNIV_INLINE
+void
+dict_index_set_online_status(
+/*=========================*/
+ dict_index_t* index, /*!< in/out: index */
+ enum online_index_status status) /*!< in: status */
+{
+ ut_ad(!(index->type & DICT_FTS));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_DEBUG
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_COMPLETE:
+ case ONLINE_INDEX_CREATION:
+ break;
+ case ONLINE_INDEX_ABORTED:
+ ut_ad(status == ONLINE_INDEX_ABORTED_DROPPED);
+ break;
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+
+ index->online_status = status;
+ ut_ad(dict_index_get_online_status(index) == status);
+}
+
+/********************************************************************//**
+Determines if a secondary index is being or has been created online,
+or if the table is being rebuilt online, allowing concurrent modifications
+to the table.
+@retval true if the index is being or has been built online, or
+if this is a clustered index and the table is being or has been rebuilt online
+@retval false if the index has been created or the table has been
+rebuilt completely */
+UNIV_INLINE
+bool
+dict_index_is_online_ddl(
+/*=====================*/
+ const dict_index_t* index) /*!< in: index */
+{
+#ifdef UNIV_DEBUG
+ if (dict_index_is_clust(index)) {
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_CREATION:
+ return(true);
+ case ONLINE_INDEX_COMPLETE:
+ return(false);
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ break;
+ }
+ ut_ad(0);
+ return(false);
+ }
+#endif /* UNIV_DEBUG */
+
+ return(UNIV_UNLIKELY(dict_index_get_online_status(index)
+ != ONLINE_INDEX_COMPLETE));
+}
+
/**********************************************************************//**
Check whether a column exists in an FTS index.
@return ULINT_UNDEFINED if no match else the offset within the vector */
@@ -1147,4 +1379,28 @@ dict_index_is_corrupted(
|| (index->table && index->table->corrupted));
}
+/********************************************************************//**
+Check if the tablespace for the table has been discarded.
+@return true if the tablespace has been discarded. */
+UNIV_INLINE
+bool
+dict_table_is_discarded(
+/*====================*/
+ const dict_table_t* table) /*!< in: table to check */
+{
+ return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_DISCARDED));
+}
+
+/********************************************************************//**
+Check if it is a temporary table.
+@return true if temporary table flag is set. */
+UNIV_INLINE
+bool
+dict_table_is_temporary(
+/*====================*/
+ const dict_table_t* table) /*!< in: table to check */
+{
+ return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY));
+}
+
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index 13b9a121c1c..5991d58a686 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,38 +29,35 @@ Created 4/24/1996 Heikki Tuuri
#include "univ.i"
#include "dict0types.h"
+#include "trx0types.h"
#include "ut0byte.h"
#include "mem0mem.h"
#include "btr0types.h"
-/** enum that defines all 6 system table IDs */
-enum dict_system_table_id {
+/** enum that defines all system table IDs. @see SYSTEM_TABLE_NAME[] */
+enum dict_system_id_t {
SYS_TABLES = 0,
SYS_INDEXES,
SYS_COLUMNS,
SYS_FIELDS,
SYS_FOREIGN,
SYS_FOREIGN_COLS,
+ SYS_TABLESPACES,
+ SYS_DATAFILES,
/* This must be last item. Defines the number of system tables. */
SYS_NUM_SYSTEM_TABLES
};
-typedef enum dict_system_table_id dict_system_id_t;
-
/** Status bit for dict_process_sys_tables_rec_and_mtr_commit() */
-enum dict_table_info {
+enum dict_table_info_t {
DICT_TABLE_LOAD_FROM_RECORD = 0,/*!< Directly populate a dict_table_t
structure with information from
a SYS_TABLES record */
- DICT_TABLE_LOAD_FROM_CACHE = 1, /*!< Check first whether dict_table_t
+ DICT_TABLE_LOAD_FROM_CACHE = 1 /*!< Check first whether dict_table_t
is in the cache, if so, return it */
- DICT_TABLE_UPDATE_STATS = 2 /*!< whether to update statistics
- when loading SYS_TABLES information. */
};
-typedef enum dict_table_info dict_table_info_t;
-
/********************************************************************//**
In a crash recovery we already have all the tablespace objects created.
This function compares the space id information in the InnoDB data dictionary
@@ -157,6 +154,27 @@ dict_load_field_low(
for temporary storage */
const rec_t* rec); /*!< in: SYS_FIELDS record */
/********************************************************************//**
+Using the table->heap, copy the null-terminated filepath into
+table->data_dir_path and put a null byte before the extension.
+This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
+Make this data directory path only if it has not yet been saved. */
+UNIV_INTERN
+void
+dict_save_data_dir_path(
+/*====================*/
+ dict_table_t* table, /*!< in/out: table */
+ char* filepath); /*!< in: filepath of tablespace */
+/*****************************************************************//**
+Make sure the data_file_name is saved in dict_table_t if needed. Try to
+read it from the file dictionary first, then from SYS_DATAFILES. */
+UNIV_INTERN
+void
+dict_get_and_save_data_dir_path(
+/*============================*/
+ dict_table_t* table, /*!< in/out: table */
+ bool dict_mutex_own); /*!< in: true if dict_sys->mutex
+ is owned already */
+/********************************************************************//**
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
all foreign key constraints where the foreign key is in the table or where
@@ -199,14 +217,15 @@ cache already contains all constraints where the other relevant table is
already in the dictionary cache.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
dict_load_foreigns(
/*===============*/
const char* table_name, /*!< in: table name */
ibool check_recursive,/*!< in: Whether to check recursive
load of tables chained by FK */
- ibool check_charsets);/*!< in: TRUE=check charsets
+ ibool check_charsets) /*!< in: TRUE=check charsets
compatibility */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************//**
Prints to the standard output information on all tables found in the data
dictionary system table. */
@@ -324,6 +343,66 @@ dict_process_sys_foreign_col_rec(
const char** ref_col_name, /*!< out: referenced column name
in referenced table */
ulint* pos); /*!< out: column position */
+/********************************************************************//**
+This function parses a SYS_TABLESPACES record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_tablespaces(
+/*=========================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_TABLESPACES rec */
+ ulint* space, /*!< out: pace id */
+ const char** name, /*!< out: tablespace name */
+ ulint* flags); /*!< out: tablespace flags */
+/********************************************************************//**
+This function parses a SYS_DATAFILES record, extracts necessary
+information from the record and returns to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_datafiles(
+/*=======================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_DATAFILES rec */
+ ulint* space, /*!< out: pace id */
+ const char** path); /*!< out: datafile path */
+/********************************************************************//**
+Get the filepath for a spaceid from SYS_DATAFILES. This function provides
+a temporary heap which is used for the table lookup, but not for the path.
+The caller must free the memory for the path returned. This function can
+return NULL if the space ID is not found in SYS_DATAFILES, then the caller
+will assume that the ibd file is in the normal datadir.
+@return own: A copy of the first datafile found in SYS_DATAFILES.PATH for
+the given space ID. NULL if space ID is zero or not found. */
+UNIV_INTERN
+char*
+dict_get_first_path(
+/*================*/
+ ulint space, /*!< in: space id */
+ const char* name); /*!< in: tablespace name */
+/********************************************************************//**
+Update the record for space_id in SYS_TABLESPACES to this filepath.
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_update_filepath(
+/*=================*/
+ ulint space_id, /*!< in: space id */
+ const char* filepath); /*!< in: filepath */
+/********************************************************************//**
+Insert records into SYS_TABLESPACES and SYS_DATAFILES.
+@return DB_SUCCESS if OK, dberr_t if the insert failed */
+UNIV_INTERN
+dberr_t
+dict_insert_tablespace_and_filepath(
+/*================================*/
+ ulint space, /*!< in: space id */
+ const char* name, /*!< in: talespace name */
+ const char* filepath, /*!< in: filepath */
+ ulint fsp_flags); /*!< in: tablespace flags */
+
#ifndef UNIV_NONINL
#include "dict0load.ic"
#endif
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index ea7e996dfa8..671f67eb1f8 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,6 +31,7 @@ Created 1/8/1996 Heikki Tuuri
#include "dict0types.h"
#include "data0type.h"
#include "mem0mem.h"
+#include "row0types.h"
#include "rem0types.h"
#include "btr0types.h"
#ifndef UNIV_HOTBACKUP
@@ -46,7 +48,7 @@ Created 1/8/1996 Heikki Tuuri
#include "fts0fts.h"
/* Forward declaration. */
-typedef struct ib_rbt_struct ib_rbt_t;
+struct ib_rbt_t;
/** Type flags of an index: OR'ing of the flags is allowed to define a
combination of types */
@@ -93,12 +95,9 @@ and SYS_TABLES.TYPE. Similar flags found in fil_space_t and FSP_SPACE_FLAGS
are described in fsp0fsp.h. */
/* @{ */
-/** SYS_TABLES.TYPE can be equal to 1 which means that the Row format
-is one of two Antelope row formats, Redundant or Compact. */
-#define SYS_TABLE_TYPE_ANTELOPE 1
-/** dict_table_t::flags can be equal to 0 if the row format = Redundant */
+/** dict_table_t::flags bit 0 is equal to 0 if the row format = Redundant */
#define DICT_TF_REDUNDANT 0 /*!< Redundant row format. */
-/** dict_table_t::flags can be equal to 1 if the row format = Compact */
+/** dict_table_t::flags bit 0 is equal to 1 if the row format = Compact */
#define DICT_TF_COMPACT 1 /*!< Compact row format. */
/** This bitmask is used in SYS_TABLES.N_COLS to set and test whether
@@ -115,10 +114,17 @@ Brracuda row formats store the whole blob or text field off-page atomically.
Secondary indexes are created from this external data using row_ext_t
to cache the BLOB prefixes. */
#define DICT_TF_WIDTH_ATOMIC_BLOBS 1
+/** If a table is created with the MYSQL option DATA DIRECTORY and
+innodb-file-per-table, an older engine will not be able to find that table.
+This flag prevents older engines from attempting to open the table and
+allows InnoDB to update_create_info() accordingly. */
+#define DICT_TF_WIDTH_DATA_DIR 1
+
/** Width of all the currently known table flags */
#define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \
+ DICT_TF_WIDTH_ZIP_SSIZE \
- + DICT_TF_WIDTH_ATOMIC_BLOBS)
+ + DICT_TF_WIDTH_ATOMIC_BLOBS \
+ + DICT_TF_WIDTH_DATA_DIR)
/** A mask of all the known/used bits in table flags */
#define DICT_TF_BIT_MASK (~(~0 << DICT_TF_BITS))
@@ -131,9 +137,12 @@ to cache the BLOB prefixes. */
/** Zero relative shift position of the ATOMIC_BLOBS field */
#define DICT_TF_POS_ATOMIC_BLOBS (DICT_TF_POS_ZIP_SSIZE \
+ DICT_TF_WIDTH_ZIP_SSIZE)
-/** Zero relative shift position of the start of the UNUSED bits */
-#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_BLOBS \
+/** Zero relative shift position of the DATA_DIR field */
+#define DICT_TF_POS_DATA_DIR (DICT_TF_POS_ATOMIC_BLOBS \
+ DICT_TF_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define DICT_TF_POS_UNUSED (DICT_TF_POS_DATA_DIR \
+ + DICT_TF_WIDTH_DATA_DIR)
/** Bit mask of the COMPACT field */
#define DICT_TF_MASK_COMPACT \
@@ -147,6 +156,10 @@ to cache the BLOB prefixes. */
#define DICT_TF_MASK_ATOMIC_BLOBS \
((~(~0 << DICT_TF_WIDTH_ATOMIC_BLOBS)) \
<< DICT_TF_POS_ATOMIC_BLOBS)
+/** Bit mask of the DATA_DIR field */
+#define DICT_TF_MASK_DATA_DIR \
+ ((~(~0 << DICT_TF_WIDTH_DATA_DIR)) \
+ << DICT_TF_POS_DATA_DIR)
/** Return the value of the COMPACT field */
#define DICT_TF_GET_COMPACT(flags) \
@@ -160,6 +173,10 @@ to cache the BLOB prefixes. */
#define DICT_TF_HAS_ATOMIC_BLOBS(flags) \
((flags & DICT_TF_MASK_ATOMIC_BLOBS) \
>> DICT_TF_POS_ATOMIC_BLOBS)
+/** Return the value of the ATOMIC_BLOBS field */
+#define DICT_TF_HAS_DATA_DIR(flags) \
+ ((flags & DICT_TF_MASK_DATA_DIR) \
+ >> DICT_TF_POS_DATA_DIR)
/** Return the contents of the UNUSED bits */
#define DICT_TF_GET_UNUSED(flags) \
(flags >> DICT_TF_POS_UNUSED)
@@ -174,7 +191,7 @@ ROW_FORMAT=REDUNDANT. InnoDB engines do not check these flags
for unknown bits in order to protect backward incompatibility. */
/* @{ */
/** Total number of bits in table->flags2. */
-#define DICT_TF2_BITS 5
+#define DICT_TF2_BITS 6
#define DICT_TF2_BIT_MASK ~(~0 << DICT_TF2_BITS)
/** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */
@@ -189,6 +206,9 @@ This is a transient bit for index build */
/** This bit is used during table creation to indicate that it will
use its own tablespace instead of the system tablespace. */
#define DICT_TF2_USE_TABLESPACE 16
+
+/** Set when we discard/detach the tablespace */
+#define DICT_TF2_DISCARDED 32
/* @} */
#define DICT_TF2_FLAG_SET(table, flag) \
@@ -225,9 +245,7 @@ dict_mem_table_create(
/*==================*/
const char* name, /*!< in: table name */
ulint space, /*!< in: space where the clustered index
- of the table is placed; this parameter
- is ignored if the table is made
- a member of a cluster */
+ of the table is placed */
ulint n_cols, /*!< in: number of columns */
ulint flags, /*!< in: table flags */
ulint flags2); /*!< in: table flags2 */
@@ -249,7 +267,19 @@ dict_mem_table_add_col(
const char* name, /*!< in: column name, or NULL */
ulint mtype, /*!< in: main datatype */
ulint prtype, /*!< in: precise type */
- ulint len); /*!< in: precision */
+ ulint len) /*!< in: precision */
+ __attribute__((nonnull(1)));
+/**********************************************************************//**
+Renames a column of a table in the data dictionary cache. */
+UNIV_INTERN
+void
+dict_mem_table_col_rename(
+/*======================*/
+ dict_table_t* table, /*!< in/out: table */
+ unsigned nth_col,/*!< in: column index */
+ const char* from, /*!< in: old column name */
+ const char* to) /*!< in: new column name */
+ __attribute__((nonnull));
/**********************************************************************//**
This function populates a dict_col_t memory structure with
supplied information. */
@@ -347,8 +377,19 @@ dict_mem_referenced_table_name_lookup_set(
dict_foreign_t* foreign, /*!< in/out: foreign struct */
ibool do_alloc); /*!< in: is an alloc needed */
+/*******************************************************************//**
+Create a temporary tablename.
+@return temporary tablename suitable for InnoDB use */
+UNIV_INTERN __attribute__((nonnull, warn_unused_result))
+char*
+dict_mem_create_temporary_tablename(
+/*================================*/
+ mem_heap_t* heap, /*!< in: memory heap */
+ const char* dbtab, /*!< in: database/table name */
+ table_id_t id); /*!< in: InnoDB table id */
+
/** Data structure for a column in a table */
-struct dict_col_struct{
+struct dict_col_t{
/*----------------------*/
/** The following are copied from dtype_t,
so that all bit-fields can be packed tightly. */
@@ -424,7 +465,7 @@ be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN
/** Data structure for a field in an index */
-struct dict_field_struct{
+struct dict_field_t{
dict_col_t* col; /*!< pointer to the table column */
const char* name; /*!< name of the column */
unsigned prefix_len:12; /*!< 0 or the length of the column
@@ -440,9 +481,61 @@ struct dict_field_struct{
DICT_ANTELOPE_MAX_INDEX_COL_LEN */
};
+/**********************************************************************//**
+PADDING HEURISTIC BASED ON LINEAR INCREASE OF PADDING TO AVOID
+COMPRESSION FAILURES
+(Note: this is relevant only for compressed indexes)
+GOAL: Avoid compression failures by maintaining information about the
+compressibility of data. If data is not very compressible then leave
+some extra space 'padding' in the uncompressed page making it more
+likely that compression of less than fully packed uncompressed page will
+succeed.
+
+This padding heuristic works by increasing the pad linearly until the
+desired failure rate is reached. A "round" is a fixed number of
+compression operations.
+After each round, the compression failure rate for that round is
+computed. If the failure rate is too high, then padding is incremented
+by a fixed value, otherwise it's left intact.
+If the compression failure is lower than the desired rate for a fixed
+number of consecutive rounds, then the padding is decreased by a fixed
+value. This is done to prevent overshooting the padding value,
+and to accommodate the possible change in data compressibility. */
+
+/** Number of zip ops in one round. */
+#define ZIP_PAD_ROUND_LEN (128)
+
+/** Number of successful rounds after which the padding is decreased */
+#define ZIP_PAD_SUCCESSFUL_ROUND_LIMIT (5)
+
+/** Amount by which padding is increased. */
+#define ZIP_PAD_INCR (128)
+
+/** Percentage of compression failures that are allowed in a single
+round */
+extern ulong zip_failure_threshold_pct;
+
+/** Maximum percentage of a page that can be allowed as a pad to avoid
+compression failures */
+extern ulong zip_pad_max;
+
+/** Data structure to hold information about about how much space in
+an uncompressed page should be left as padding to avoid compression
+failures. This estimate is based on a self-adapting heuristic. */
+struct zip_pad_info_t {
+ os_fast_mutex_t mutex; /*!< mutex protecting the info */
+ ulint pad; /*!< number of bytes used as pad */
+ ulint success;/*!< successful compression ops during
+ current round */
+ ulint failure;/*!< failed compression ops during
+ current round */
+ ulint n_rounds;/*!< number of currently successful
+ rounds */
+};
+
/** Data structure for an index. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_index_create(). */
-struct dict_index_struct{
+struct dict_index_t{
index_id_t id; /*!< id of the index */
mem_heap_t* heap; /*!< memory heap */
const char* name; /*!< index name */
@@ -478,24 +571,35 @@ struct dict_index_struct{
unsigned cached:1;/*!< TRUE if the index object is in the
dictionary cache */
unsigned to_be_dropped:1;
- /*!< TRUE if this index is marked to be
- dropped in ha_innobase::prepare_drop_index(),
- otherwise FALSE. Protected by
- dict_sys->mutex, dict_operation_lock and
- index->lock.*/
+ /*!< TRUE if the index is to be dropped;
+ protected by dict_operation_lock */
+ unsigned online_status:2;
+ /*!< enum online_index_status.
+ Transitions from ONLINE_INDEX_COMPLETE (to
+ ONLINE_INDEX_CREATION) are protected
+ by dict_operation_lock and
+ dict_sys->mutex. Other changes are
+ protected by index->lock. */
dict_field_t* fields; /*!< array of field descriptions */
#ifndef UNIV_HOTBACKUP
UT_LIST_NODE_T(dict_index_t)
indexes;/*!< list of indexes of the table */
- btr_search_t* search_info; /*!< info used in optimistic searches */
+ btr_search_t* search_info;
+ /*!< info used in optimistic searches */
+ row_log_t* online_log;
+ /*!< the log of modifications
+ during online index creation;
+ valid when online_status is
+ ONLINE_INDEX_CREATION */
/*----------------------*/
/** Statistics for query optimization */
/* @{ */
ib_uint64_t* stat_n_diff_key_vals;
/*!< approximate number of different
key values for this index, for each
- n-column prefix where n <=
- dict_get_n_unique(index); we
+ n-column prefix where 1 <= n <=
+ dict_get_n_unique(index) (the array is
+ indexed from 0 to n_uniq-1); we
periodically calculate new
estimates */
ib_uint64_t* stat_n_sample_sizes;
@@ -506,7 +610,8 @@ struct dict_index_struct{
ib_uint64_t* stat_n_non_null_key_vals;
/* approximate number of non-null key values
for this index, for each column where
- n < dict_get_n_unique(index); This
+ 1 <= n <= dict_get_n_unique(index) (the array
+ is indexed from 0 to n_uniq-1); This
is used when innodb_stats_method is
"nulls_ignored". */
ulint stat_index_size;
@@ -521,9 +626,11 @@ struct dict_index_struct{
trx_id_t trx_id; /*!< id of the transaction that created this
index, or 0 if the index existed
when InnoDB was started up */
+ zip_pad_info_t zip_pad;/*!< Information about state of
+ compression failures and successes */
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_BLOB_DEBUG
- mutex_t blobs_mutex;
+ ib_mutex_t blobs_mutex;
/*!< mutex protecting blobs */
ib_rbt_t* blobs; /*!< map of (page_no,heap_no,field_no)
to first_blob_page_no; protected by
@@ -531,15 +638,35 @@ struct dict_index_struct{
#endif /* UNIV_BLOB_DEBUG */
#ifdef UNIV_DEBUG
ulint magic_n;/*!< magic number */
-/** Value of dict_index_struct::magic_n */
+/** Value of dict_index_t::magic_n */
# define DICT_INDEX_MAGIC_N 76789786
#endif
};
+/** The status of online index creation */
+enum online_index_status {
+ /** the index is complete and ready for access */
+ ONLINE_INDEX_COMPLETE = 0,
+ /** the index is being created, online
+ (allowing concurrent modifications) */
+ ONLINE_INDEX_CREATION,
+ /** secondary index creation was aborted and the index
+ should be dropped as soon as index->table->n_ref_count reaches 0,
+ or online table rebuild was aborted and the clustered index
+ of the original table should soon be restored to
+ ONLINE_INDEX_COMPLETE */
+ ONLINE_INDEX_ABORTED,
+ /** the online index creation was aborted, the index was
+ dropped from the data dictionary and the tablespace, and it
+ should be dropped from the data dictionary cache as soon as
+ index->table->n_ref_count reaches 0. */
+ ONLINE_INDEX_ABORTED_DROPPED
+};
+
/** Data structure for a foreign key constraint; an example:
FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D). Most fields will be
initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
-struct dict_foreign_struct{
+struct dict_foreign_t{
mem_heap_t* heap; /*!< this object is allocated from
this memory heap */
char* id; /*!< id of the constraint as a
@@ -592,7 +719,7 @@ a foreign key constraint is enforced, therefore RESTRICT just means no flag */
/** Data structure for a database table. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_table_create(). */
-struct dict_table_struct{
+struct dict_table_t{
table_id_t id; /*!< id of the table */
mem_heap_t* heap; /*!< memory heap */
char* name; /*!< table name */
@@ -602,6 +729,8 @@ struct dict_table_struct{
innodb_file_per_table is defined in my.cnf;
in Unix this is usually /tmp/..., in Windows
temp\... */
+ char* data_dir_path; /*!< NULL or the directory path
+ specified by DATA DIRECTORY */
unsigned space:32;
/*!< space where the clustered index of the
table is placed */
@@ -612,13 +741,16 @@ struct dict_table_struct{
tablespace and the .ibd file is missing; then
we must return in ha_innodb.cc an error if the
user tries to query such an orphaned table */
- unsigned tablespace_discarded:1;
- /*!< this flag is set TRUE when the user
- calls DISCARD TABLESPACE on this
- table, and reset to FALSE in IMPORT
- TABLESPACE */
unsigned cached:1;/*!< TRUE if the table object has been added
to the dictionary cache */
+ unsigned to_be_dropped:1;
+ /*!< TRUE if the table is to be dropped, but
+ not yet actually dropped (could in the bk
+ drop list); It is turned on at the beginning
+ of row_drop_table_for_mysql() and turned off
+ just before we start to update system tables
+ for the drop. It is protected by
+ dict_operation_lock */
unsigned n_def:10;/*!< number of columns defined so far */
unsigned n_cols:10;/*!< number of columns */
unsigned can_be_evicted:1;
@@ -626,6 +758,10 @@ struct dict_table_struct{
or a table that has no FK relationships */
unsigned corrupted:1;
/*!< TRUE if table is corrupted */
+ unsigned drop_aborted:1;
+ /*!< TRUE if some indexes should be dropped
+ after ONLINE_INDEX_ABORTED
+ or ONLINE_INDEX_ABORTED_DROPPED */
dict_col_t* cols; /*!< array of column descriptions */
const char* col_names;
/*!< Column names packed in a character string
@@ -659,6 +795,12 @@ struct dict_table_struct{
on the table: we cannot drop the table while
there are foreign key checks running on
it! */
+ trx_id_t def_trx_id;
+ /*!< transaction id that last touched
+ the table definition, either when
+ loading the definition or CREATE
+ TABLE, or ALTER TABLE (prepare,
+ commit, and rollback phases) */
trx_id_t query_cache_inv_trx_id;
/*!< transactions whose trx id is
smaller than this number are not
@@ -691,7 +833,55 @@ struct dict_table_struct{
unsigned stat_initialized:1; /*!< TRUE if statistics have
been calculated the first time
after database startup or table creation */
- ib_int64_t stat_n_rows;
+ ib_time_t stats_last_recalc;
+ /*!< Timestamp of last recalc of the stats */
+ ib_uint32_t stat_persistent;
+ /*!< The two bits below are set in the
+ ::stat_persistent member and have the following
+ meaning:
+ 1. _ON=0, _OFF=0, no explicit persistent stats
+ setting for this table, the value of the global
+ srv_stats_persistent is used to determine
+ whether the table has persistent stats enabled
+ or not
+ 2. _ON=0, _OFF=1, persistent stats are
+ explicitly disabled for this table, regardless
+ of the value of the global srv_stats_persistent
+ 3. _ON=1, _OFF=0, persistent stats are
+ explicitly enabled for this table, regardless
+ of the value of the global srv_stats_persistent
+ 4. _ON=1, _OFF=1, not allowed, we assert if
+ this ever happens. */
+#define DICT_STATS_PERSISTENT_ON (1 << 1)
+#define DICT_STATS_PERSISTENT_OFF (1 << 2)
+ ib_uint32_t stats_auto_recalc;
+ /*!< The two bits below are set in the
+ ::stats_auto_recalc member and have
+ the following meaning:
+ 1. _ON=0, _OFF=0, no explicit auto recalc
+ setting for this table, the value of the global
+ srv_stats_persistent_auto_recalc is used to
+ determine whether the table has auto recalc
+ enabled or not
+ 2. _ON=0, _OFF=1, auto recalc is explicitly
+ disabled for this table, regardless of the
+ value of the global
+ srv_stats_persistent_auto_recalc
+ 3. _ON=1, _OFF=0, auto recalc is explicitly
+ enabled for this table, regardless of the
+ value of the global
+ srv_stats_persistent_auto_recalc
+ 4. _ON=1, _OFF=1, not allowed, we assert if
+ this ever happens. */
+#define DICT_STATS_AUTO_RECALC_ON (1 << 1)
+#define DICT_STATS_AUTO_RECALC_OFF (1 << 2)
+ ulint stats_sample_pages;
+ /*!< the number of pages to sample for this
+ table during persistent stats estimation;
+ if this is 0, then the value of the global
+ srv_stats_persistent_sample_pages will be
+ used instead. */
+ ib_uint64_t stat_n_rows;
/*!< approximate number of rows in the table;
we periodically calculate new estimates */
ulint stat_clustered_index_size;
@@ -699,19 +889,34 @@ struct dict_table_struct{
database pages */
ulint stat_sum_of_other_index_sizes;
/*!< other indexes in database pages */
- ulint stat_modified_counter;
+ ib_uint64_t stat_modified_counter;
/*!< when a row is inserted, updated,
or deleted,
we add 1 to this number; we calculate new
estimates for the stat_... values for the
- table and the indexes at an interval of 2 GB
- or when about 1 / 16 of table has been
- modified; also when the estimate operation is
+ table and the indexes when about 1 / 16 of
+ table has been modified;
+ also when the estimate operation is
called for MySQL SHOW TABLE STATUS; the
counter is reset to zero at statistics
calculation; this counter is not protected by
any latch, because this is only used for
heuristics */
+#define BG_STAT_NONE 0
+#define BG_STAT_IN_PROGRESS (1 << 0)
+ /*!< BG_STAT_IN_PROGRESS is set in
+ stats_bg_flag when the background
+ stats code is working on this table. The DROP
+ TABLE code waits for this to be cleared
+ before proceeding. */
+#define BG_STAT_SHOULD_QUIT (1 << 1)
+ /*!< BG_STAT_SHOULD_QUIT is set in
+ stats_bg_flag when DROP TABLE starts
+ waiting on BG_STAT_IN_PROGRESS to be cleared,
+ the background stats thread will detect this
+ and will eventually quit sooner */
+ byte stats_bg_flag;
+ /*!< see BG_STAT_* above */
/* @} */
/*----------------------*/
/**!< The following fields are used by the
@@ -737,7 +942,7 @@ struct dict_table_struct{
space from the lock heap of the trx:
otherwise the lock heap would grow rapidly
if we do a large insert from a select */
- mutex_t autoinc_mutex;
+ ib_mutex_t autoinc_mutex;
/*!< mutex protecting the autoincrement
counter */
ib_uint64_t autoinc;/*!< autoinc counter value to give to the
@@ -758,6 +963,14 @@ struct dict_table_struct{
fts_t* fts; /* FTS specific state variables */
/* @} */
/*----------------------*/
+
+ ib_quiesce_t quiesce;/*!< Quiescing states, protected by the
+ dict_index_t::lock. ie. we can only change
+ the state if we acquire all the latches
+ (dict_index_t::lock) in X mode of this table's
+ indexes. */
+
+ /*----------------------*/
ulint n_rec_locks;
/*!< Count of the number of record locks on
this table. We use this to determine whether
@@ -776,7 +989,7 @@ struct dict_table_struct{
#ifdef UNIV_DEBUG
ulint magic_n;/*!< magic number */
-/** Value of dict_table_struct::magic_n */
+/** Value of dict_table_t::magic_n */
# define DICT_TABLE_MAGIC_N 76333786
#endif /* UNIV_DEBUG */
};
diff --git a/storage/innobase/include/dict0stats.h b/storage/innobase/include/dict0stats.h
index 879e67a0918..186f90e3694 100644
--- a/storage/innobase/include/dict0stats.h
+++ b/storage/innobase/include/dict0stats.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2009, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2009, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -32,61 +32,128 @@ Created Jan 06, 2010 Vasil Dimov
#include "dict0types.h"
#include "trx0types.h"
-enum dict_stats_upd_option {
+enum dict_stats_upd_option_t {
DICT_STATS_RECALC_PERSISTENT,/* (re) calculate the
statistics using a precise and slow
algo and save them to the persistent
storage, if the persistent storage is
not present then emit a warning and
fall back to transient stats */
- DICT_STATS_RECALC_PERSISTENT_SILENT,/* same as
- DICT_STATS_RECALC_PERSISTENT
- but do not emit a warning */
DICT_STATS_RECALC_TRANSIENT,/* (re) calculate the statistics
using an imprecise quick algo
without saving the results
persistently */
- DICT_STATS_FETCH, /* fetch the statistics from the
- persistent storage */
- DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY /* only fetch the stats
+ DICT_STATS_EMPTY_TABLE, /* Write all zeros (or 1 where it makes sense)
+ into a table and its indexes' statistics
+ members. The resulting stats correspond to an
+ empty table. If the table is using persistent
+ statistics, then they are saved on disk. */
+ DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY /* fetch the stats
from the persistent storage if the in-memory
structures have not been initialized yet,
otherwise do nothing */
};
-typedef enum dict_stats_upd_option dict_stats_upd_option_t;
+/*********************************************************************//**
+Calculates new estimates for table and index statistics. This function
+is relatively quick and is used to calculate transient statistics that
+are not saved on disk.
+This was the only way to calculate statistics before the
+Persistent Statistics feature was introduced. */
+UNIV_INTERN
+void
+dict_stats_update_transient(
+/*========================*/
+ dict_table_t* table); /*!< in/out: table */
+
+/*********************************************************************//**
+Set the persistent statistics flag for a given table. This is set only
+in the in-memory table object and is not saved on disk. It will be read
+from the .frm file upon first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_set_persistent(
+/*======================*/
+ dict_table_t* table, /*!< in/out: table */
+ ibool ps_on, /*!< in: persistent stats explicitly enabled */
+ ibool ps_off) /*!< in: persistent stats explicitly disabled */
+ __attribute__((nonnull));
+
+/*********************************************************************//**
+Check whether persistent statistics is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_is_persistent_enabled(
+/*=============================*/
+ const dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Set the auto recalc flag for a given table (only honored for a persistent
+stats enabled table). The flag is set only in the in-memory table object
+and is not saved in InnoDB files. It will be read from the .frm file upon
+first open from MySQL after a server restart. */
+UNIV_INLINE
+void
+dict_stats_auto_recalc_set(
+/*=======================*/
+ dict_table_t* table, /*!< in/out: table */
+ ibool auto_recalc_on, /*!< in: explicitly enabled */
+ ibool auto_recalc_off); /*!< in: explicitly disabled */
+
+/*********************************************************************//**
+Check whether auto recalc is enabled for a given table.
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_auto_recalc_is_enabled(
+/*==============================*/
+ const dict_table_t* table); /*!< in: table */
+
+/*********************************************************************//**
+Initialize table's stats for the first time when opening a table. */
+UNIV_INLINE
+void
+dict_stats_init(
+/*============*/
+ dict_table_t* table); /*!< in/out: table */
+
+/*********************************************************************//**
+Deinitialize table's stats after the last close of the table. This is
+used to detect "FLUSH TABLE" and refresh the stats upon next open. */
+UNIV_INLINE
+void
+dict_stats_deinit(
+/*==============*/
+ dict_table_t* table) /*!< in/out: table */
+ __attribute__((nonnull));
/*********************************************************************//**
Calculates new estimates for table and index statistics. The statistics
are used in query optimization.
@return DB_* error code or DB_SUCCESS */
UNIV_INTERN
-enum db_err
+dberr_t
dict_stats_update(
/*==============*/
dict_table_t* table, /*!< in/out: table */
- dict_stats_upd_option_t stats_upd_option,
+ dict_stats_upd_option_t stats_upd_option);
/*!< in: whether to (re) calc
the stats or to fetch them from
the persistent storage */
- ibool caller_has_dict_sys_mutex);
- /*!< in: TRUE if the caller
- owns dict_sys->mutex */
/*********************************************************************//**
Removes the information for a particular index's stats from the persistent
storage if it exists and if there is data stored for this index.
-The transaction is not committed, it must not be committed in this
-function because this is the user trx that is running DROP INDEX.
-The transaction will be committed at the very end when dropping an
-index.
+This function creates its own trx and commits it.
@return DB_SUCCESS or error code */
UNIV_INTERN
-enum db_err
-dict_stats_delete_index_stats(
-/*==========================*/
- dict_index_t* index, /*!< in: index */
- trx_t* trx, /*!< in: transaction to use */
+dberr_t
+dict_stats_drop_index(
+/*==================*/
+ const char* tname, /*!< in: table name */
+ const char* iname, /*!< in: index name */
char* errstr, /*!< out: error message if != DB_SUCCESS
is returned */
ulint errstr_sz);/*!< in: size of the errstr buffer */
@@ -97,12 +164,39 @@ persistent storage if it exists and if there is data stored for the table.
This function creates its own transaction and commits it.
@return DB_SUCCESS or error code */
UNIV_INTERN
-enum db_err
-dict_stats_delete_table_stats(
-/*==========================*/
+dberr_t
+dict_stats_drop_table(
+/*==================*/
const char* table_name, /*!< in: table name */
char* errstr, /*!< out: error message
if != DB_SUCCESS is returned */
ulint errstr_sz); /*!< in: size of errstr buffer */
+/*********************************************************************//**
+Fetches or calculates new estimates for index statistics. */
+UNIV_INTERN
+void
+dict_stats_update_for_index(
+/*========================*/
+ dict_index_t* index) /*!< in/out: index */
+ __attribute__((nonnull));
+
+/*********************************************************************//**
+Renames a table in InnoDB persistent stats storage.
+This function creates its own transaction and commits it.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+dict_stats_rename_table(
+/*====================*/
+ const char* old_name, /*!< in: old table name */
+ const char* new_name, /*!< in: new table name */
+ char* errstr, /*!< out: error string if != DB_SUCCESS
+ is returned */
+ size_t errstr_sz); /*!< in: errstr size */
+
+#ifndef UNIV_NONINL
+#include "dict0stats.ic"
+#endif
+
#endif /* dict0stats_h */
diff --git a/storage/innobase/include/dict0stats.ic b/storage/innobase/include/dict0stats.ic
new file mode 100644
index 00000000000..04763f174d0
--- /dev/null
+++ b/storage/innobase/include/dict0stats.ic
@@ -0,0 +1,250 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats.ic
+Code used for calculating and manipulating table statistics.
+
+Created Jan 23, 2012 Vasil Dimov
+*******************************************************/
+
+#include "univ.i"
+#include "dict0dict.h" /* dict_table_stats_lock() */
+#include "dict0types.h" /* dict_table_t */
+#include "srv0srv.h" /* srv_stats_persistent, srv_stats_auto_recalc */
+
+/*********************************************************************//**
+Set the persistent statistics flag for a given table. This is set only
+in the in-memory table object and is not saved on disk. It will be read
+from the .frm file upon first open from MySQL after a server restart.
+dict_stats_set_persistent() @{ */
+UNIV_INLINE
+void
+dict_stats_set_persistent(
+/*======================*/
+ dict_table_t* table, /*!< in/out: table */
+ ibool ps_on, /*!< in: persistent stats explicitly enabled */
+ ibool ps_off) /*!< in: persistent stats explicitly disabled */
+{
+ /* Not allowed to have both flags set, but a CREATE or ALTER
+ statement that contains "STATS_PERSISTENT=0 STATS_PERSISTENT=1" would
+ end up having both set. In this case we clear the OFF flag. */
+ if (ps_on && ps_off) {
+ ps_off = FALSE;
+ }
+
+ ib_uint32_t stat_persistent = 0;
+
+ if (ps_on) {
+ stat_persistent |= DICT_STATS_PERSISTENT_ON;
+ }
+
+ if (ps_off) {
+ stat_persistent |= DICT_STATS_PERSISTENT_OFF;
+ }
+
+ /* we rely on this assignment to be atomic */
+ table->stat_persistent = stat_persistent;
+}
+/* @} */
+
+/*********************************************************************//**
+Check whether persistent statistics is enabled for a given table.
+dict_stats_is_persistent_enabled() @{
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_is_persistent_enabled(
+/*=============================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ /* Because of the nature of this check (non-locking) it is possible
+ that a table becomes:
+ * PS-disabled immediately after this function has returned TRUE or
+ * PS-enabled immediately after this function has returned FALSE.
+ This means that it is possible that we do:
+ + dict_stats_update(DICT_STATS_RECALC_PERSISTENT) on a table that has
+ just been PS-disabled or
+ + dict_stats_update(DICT_STATS_RECALC_TRANSIENT) on a table that has
+ just been PS-enabled.
+ This is acceptable. Avoiding this would mean that we would have to
+ protect the ::stat_persistent with dict_table_stats_lock() like the
+ other ::stat_ members which would be too big performance penalty,
+ especially when this function is called from
+ row_update_statistics_if_needed(). */
+
+ /* we rely on this read to be atomic */
+ ib_uint32_t stat_persistent = table->stat_persistent;
+
+ if (stat_persistent & DICT_STATS_PERSISTENT_ON) {
+ ut_ad(!(stat_persistent & DICT_STATS_PERSISTENT_OFF));
+ return(TRUE);
+ } else if (stat_persistent & DICT_STATS_PERSISTENT_OFF) {
+ return(FALSE);
+ } else {
+ return(srv_stats_persistent);
+ }
+}
+/* @} */
+
+/*********************************************************************//**
+Set the auto recalc flag for a given table (only honored for a persistent
+stats enabled table). The flag is set only in the in-memory table object
+and is not saved in InnoDB files. It will be read from the .frm file upon
+first open from MySQL after a server restart.
+dict_stats_auto_recalc_set() @{ */
+UNIV_INLINE
+void
+dict_stats_auto_recalc_set(
+/*=======================*/
+ dict_table_t* table, /*!< in/out: table */
+ ibool auto_recalc_on, /*!< in: explicitly enabled */
+ ibool auto_recalc_off) /*!< in: explicitly disabled */
+{
+ ut_ad(!auto_recalc_on || !auto_recalc_off);
+
+ ib_uint32_t stats_auto_recalc = 0;
+
+ if (auto_recalc_on) {
+ stats_auto_recalc |= DICT_STATS_AUTO_RECALC_ON;
+ }
+
+ if (auto_recalc_off) {
+ stats_auto_recalc |= DICT_STATS_AUTO_RECALC_OFF;
+ }
+
+ /* we rely on this assignment to be atomic */
+ table->stats_auto_recalc = stats_auto_recalc;
+}
+/* @} */
+
+/*********************************************************************//**
+Check whether auto recalc is enabled for a given table.
+dict_stats_auto_recalc_is_enabled() @{
+@return TRUE if enabled, FALSE otherwise */
+UNIV_INLINE
+ibool
+dict_stats_auto_recalc_is_enabled(
+/*==============================*/
+ const dict_table_t* table) /*!< in: table */
+{
+ /* we rely on this read to be atomic */
+ ib_uint32_t stats_auto_recalc = table->stats_auto_recalc;
+
+ if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_ON) {
+ ut_ad(!(stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF));
+ return(TRUE);
+ } else if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF) {
+ return(FALSE);
+ } else {
+ return(srv_stats_auto_recalc);
+ }
+}
+/* @} */
+
+/*********************************************************************//**
+Initialize table's stats for the first time when opening a table.
+dict_stats_init() @{ */
+UNIV_INLINE
+void
+dict_stats_init(
+/*============*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ ut_ad(!mutex_own(&dict_sys->mutex));
+
+ if (table->stat_initialized) {
+ return;
+ }
+
+ dict_stats_upd_option_t opt;
+
+ if (dict_stats_is_persistent_enabled(table)) {
+ opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
+ } else {
+ opt = DICT_STATS_RECALC_TRANSIENT;
+ }
+
+ dict_stats_update(table, opt);
+}
+/* @} */
+
+/*********************************************************************//**
+Deinitialize table's stats after the last close of the table. This is
+used to detect "FLUSH TABLE" and refresh the stats upon next open.
+dict_stats_deinit() @{ */
+UNIV_INLINE
+void
+dict_stats_deinit(
+/*==============*/
+ dict_table_t* table) /*!< in/out: table */
+{
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ ut_a(table->n_ref_count == 0);
+
+ dict_table_stats_lock(table, RW_X_LATCH);
+
+ if (!table->stat_initialized) {
+ dict_table_stats_unlock(table, RW_X_LATCH);
+ return;
+ }
+
+ table->stat_initialized = FALSE;
+
+#ifdef UNIV_DEBUG_VALGRIND
+ UNIV_MEM_INVALID(&table->stat_n_rows,
+ sizeof(table->stat_n_rows));
+ UNIV_MEM_INVALID(&table->stat_clustered_index_size,
+ sizeof(table->stat_clustered_index_size));
+ UNIV_MEM_INVALID(&table->stat_sum_of_other_index_sizes,
+ sizeof(table->stat_sum_of_other_index_sizes));
+ UNIV_MEM_INVALID(&table->stat_modified_counter,
+ sizeof(table->stat_modified_counter));
+
+ dict_index_t* index;
+
+ for (index = dict_table_get_first_index(table);
+ index != NULL;
+ index = dict_table_get_next_index(index)) {
+
+ ulint n_uniq = dict_index_get_n_unique(index);
+
+ UNIV_MEM_INVALID(
+ index->stat_n_diff_key_vals,
+ n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
+ UNIV_MEM_INVALID(
+ index->stat_n_sample_sizes,
+ n_uniq * sizeof(index->stat_n_sample_sizes[0]));
+ UNIV_MEM_INVALID(
+ index->stat_n_non_null_key_vals,
+ n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
+ UNIV_MEM_INVALID(
+ &index->stat_index_size,
+ sizeof(index->stat_index_size));
+ UNIV_MEM_INVALID(
+ &index->stat_n_leaf_pages,
+ sizeof(index->stat_n_leaf_pages));
+ }
+#endif /* UNIV_DEBUG_VALGRIND */
+
+ dict_table_stats_unlock(table, RW_X_LATCH);
+}
+/* @} */
+
+/* vim: set foldmethod=marker foldmarker=@{,@}: */
diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h
new file mode 100644
index 00000000000..dd85088c7ba
--- /dev/null
+++ b/storage/innobase/include/dict0stats_bg.h
@@ -0,0 +1,116 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/dict0stats_bg.h
+Code used for background table and index stats gathering.
+
+Created Apr 26, 2012 Vasil Dimov
+*******************************************************/
+
+#ifndef dict0stats_bg_h
+#define dict0stats_bg_h
+
+#include "univ.i"
+
+#include "dict0types.h" /* dict_table_t, table_id_t */
+#include "os0sync.h" /* os_event_t */
+#include "os0thread.h" /* DECLARE_THREAD */
+
+/** Event to wake up the stats thread */
+extern os_event_t dict_stats_event;
+
+/*****************************************************************//**
+Add a table to the recalc pool, which is processed by the
+background stats gathering thread. Only the table id is added to the
+list, so the table can be closed after being enqueued and it will be
+opened when needed. If the table does not exist later (has been DROPped),
+then it will be removed from the pool and skipped.
+dict_stats_recalc_pool_add() @{ */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_add(
+/*=======================*/
+ const dict_table_t* table); /*!< in: table to add */
+/* @} */
+
+/*****************************************************************//**
+Delete a given table from the auto recalc pool.
+dict_stats_recalc_pool_del() */
+UNIV_INTERN
+void
+dict_stats_recalc_pool_del(
+/*=======================*/
+ const dict_table_t* table); /*!< in: table to remove */
+/* @} */
+
+/*****************************************************************//**
+Wait until background stats thread has stopped using the specified table(s).
+The caller must have locked the data dictionary using
+row_mysql_lock_data_dictionary() and this function may unlock it temporarily
+and restore the lock before it exits.
+The background stats thead is guaranteed not to start using the specified
+tables after this function returns and before the caller unlocks the data
+dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
+under dict_sys->mutex.
+dict_stats_wait_bg_to_stop_using_table() @{ */
+UNIV_INTERN
+void
+dict_stats_wait_bg_to_stop_using_tables(
+/*====================================*/
+ dict_table_t* table1, /*!< in/out: table1 */
+ dict_table_t* table2, /*!< in/out: table2, could be NULL */
+ trx_t* trx); /*!< in/out: transaction to use for
+ unlocking/locking the data dict */
+/* @} */
+
+/*****************************************************************//**
+Initialize global variables needed for the operation of dict_stats_thread().
+Must be called before dict_stats_thread() is started.
+dict_stats_thread_init() @{ */
+UNIV_INTERN
+void
+dict_stats_thread_init();
+/*====================*/
+/* @} */
+
+/*****************************************************************//**
+Free resources allocated by dict_stats_thread_init(), must be called
+after dict_stats_thread() has exited.
+dict_stats_thread_deinit() @{ */
+UNIV_INTERN
+void
+dict_stats_thread_deinit();
+/*======================*/
+/* @} */
+
+/*****************************************************************//**
+This is the thread for background stats gathering. It pops tables, from
+the auto recalc list and proceeds them, eventually recalculating their
+statistics.
+dict_stats_thread() @{
+@return this function does not return, it calls os_thread_exit() */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(dict_stats_thread)(
+/*==============================*/
+ void* arg); /*!< in: a dummy parameter
+ required by os_thread_create */
+/* @} */
+
+#endif /* dict0stats_bg_h */
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index cd2863582c1..b7f7c2d9df9 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -26,15 +26,15 @@ Created 1/8/1996 Heikki Tuuri
#ifndef dict0types_h
#define dict0types_h
-typedef struct dict_sys_struct dict_sys_t;
-typedef struct dict_col_struct dict_col_t;
-typedef struct dict_field_struct dict_field_t;
-typedef struct dict_index_struct dict_index_t;
-typedef struct dict_table_struct dict_table_t;
-typedef struct dict_foreign_struct dict_foreign_t;
+struct dict_sys_t;
+struct dict_col_t;
+struct dict_field_t;
+struct dict_index_t;
+struct dict_table_t;
+struct dict_foreign_t;
-typedef struct ind_node_struct ind_node_t;
-typedef struct tab_node_struct tab_node_t;
+struct ind_node_t;
+struct tab_node_t;
/* Space id and page no where the dictionary header resides */
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
@@ -52,7 +52,7 @@ the table and index will be marked as "corrupted", and caller will
be responsible to deal with corrupted table or index.
Note: please define the IGNORE_ERR_* as bits, so their value can
be or-ed together */
-enum dict_err_ignore {
+enum dict_err_ignore_t {
DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */
DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root
page is FIL_NULL or incorrect value */
@@ -60,6 +60,11 @@ enum dict_err_ignore {
DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */
};
-typedef enum dict_err_ignore dict_err_ignore_t;
+/** Quiescing states for flushing tables to disk. */
+enum ib_quiesce_t {
+ QUIESCE_NONE,
+ QUIESCE_START, /*!< Initialise, prepare to start */
+ QUIESCE_COMPLETE /*!< All done */
+};
#endif
diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h
index 5e69cb13122..ffb4f270d0e 100644
--- a/storage/innobase/include/dyn0dyn.h
+++ b/storage/innobase/include/dyn0dyn.h
@@ -31,10 +31,9 @@ Created 2/5/1996 Heikki Tuuri
#include "mem0mem.h"
/** A block in a dynamically allocated array */
-typedef struct dyn_block_struct dyn_block_t;
+struct dyn_block_t;
/** Dynamically allocated array */
-typedef dyn_block_t dyn_array_t;
-
+typedef dyn_block_t dyn_array_t;
/** This is the initial 'payload' size of a dynamic array;
this must be > MLOG_BUF_MARGIN + 30! */
@@ -159,7 +158,7 @@ dyn_push_string(
/** @brief A block in a dynamically allocated array.
NOTE! Do not access the fields of the struct directly: the definition
appears here only for the compiler to know its size! */
-struct dyn_block_struct{
+struct dyn_block_t{
mem_heap_t* heap; /*!< in the first block this is != NULL
if dynamic allocation has been needed */
ulint used; /*!< number of data bytes used in this block;
diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innobase/include/dyn0dyn.ic
index b86697d6865..39254e632a8 100644
--- a/storage/innobase/include/dyn0dyn.ic
+++ b/storage/innobase/include/dyn0dyn.ic
@@ -23,9 +23,9 @@ The dynamically allocated array
Created 2/5/1996 Heikki Tuuri
*******************************************************/
-/** Value of dyn_block_struct::magic_n */
+/** Value of dyn_block_t::magic_n */
#define DYN_BLOCK_MAGIC_N 375767
-/** Flag for dyn_block_struct::used that indicates a full block */
+/** Flag for dyn_block_t::used that indicates a full block */
#define DYN_BLOCK_FULL_FLAG 0x1000000UL
/************************************************************//**
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 1e2b8049860..56fda8b39b1 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,6 +39,14 @@ Created 10/25/1995 Heikki Tuuri
#include "log0log.h"
#endif /* !UNIV_HOTBACKUP */
+#include <list>
+
+// Forward declaration
+struct trx_t;
+struct fil_space_t;
+
+typedef std::list<const char*> space_name_list_t;
+
/** When mysqld is run, the default directory "." is the mysqld datadir,
but in the MySQL Embedded Server Library and ibbackup it is not the default
directory, and we must set the base file path explicitly */
@@ -61,12 +69,8 @@ typedef byte fil_faddr_t; /*!< 'type' definition in C: an address
#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
-/** A struct for storing a space address FIL_ADDR, when it is used
-in C program data structures. */
-
-typedef struct fil_addr_struct fil_addr_t;
/** File space address */
-struct fil_addr_struct{
+struct fil_addr_t{
ulint page; /*!< page number within a space */
ulint boffset; /*!< byte offset within the page */
};
@@ -200,17 +204,19 @@ fil_space_get_type(
ulint id); /*!< in: space id */
#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed. */
+Appends a new file to the chain of files of a space. File must be closed.
+@return pointer to the file name, or NULL on error */
UNIV_INTERN
-void
+char*
fil_node_create(
/*============*/
const char* name, /*!< in: file name (file must be closed) */
ulint size, /*!< in: file size in database blocks, rounded
downwards to an integer */
ulint id, /*!< in: space id where to append */
- ibool is_raw);/*!< in: TRUE if a raw device or
+ ibool is_raw) /*!< in: TRUE if a raw device or
a raw disk partition */
+ __attribute__((nonnull, warn_unused_result));
#ifdef UNIV_LOG_ARCHIVE
/****************************************************************//**
Drops files from the start of a file space, so that its size is cut by
@@ -248,6 +254,16 @@ fil_assign_new_space_id(
/*====================*/
ulint* space_id); /*!< in/out: space id */
/*******************************************************************//**
+Returns the path from the first fil_node_t found for the space ID sent.
+The caller is responsible for freeing the memory allocated here for the
+value returned.
+@return a copy of fil_node_t::path, NULL if space is zero or not found. */
+UNIV_INTERN
+char*
+fil_space_get_first_path(
+/*=====================*/
+ ulint id); /*!< in: space id */
+/*******************************************************************//**
Returns the size of the space in pages. The tablespace must be cached in the
memory cache.
@return space size, 0 if space not found */
@@ -316,6 +332,14 @@ void
fil_close_all_files(void);
/*=====================*/
/*******************************************************************//**
+Closes the redo log files. There must not be any pending i/o's or not
+flushed modifications in the files. */
+UNIV_INTERN
+void
+fil_close_log_files(
+/*================*/
+ bool free); /*!< in: whether to free the memory object */
+/*******************************************************************//**
Sets the max tablespace id counter if the given number is bigger than the
previous value. */
UNIV_INTERN
@@ -329,7 +353,7 @@ Writes the flushed lsn and the latest archived log number to the page
header of the first page of each data file in the system tablespace.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
fil_write_flushed_lsn_to_data_files(
/*================================*/
lsn_t lsn, /*!< in: lsn to write */
@@ -346,6 +370,7 @@ fil_read_first_page(
parameters below already
contain sensible data */
ulint* flags, /*!< out: tablespace flags */
+ ulint* space_id, /*!< out: tablespace ID */
#ifdef UNIV_LOG_ARCHIVE
ulint* min_arch_log_no, /*!< out: min of archived
log numbers in data files */
@@ -405,25 +430,44 @@ Deletes a single-table tablespace. The tablespace must be cached in the
memory cache.
@return TRUE if success */
UNIV_INTERN
-ibool
+dberr_t
fil_delete_tablespace(
/*==================*/
+ ulint id, /*!< in: space id */
+ buf_remove_t buf_remove); /*!< in: specify the action to take
+ on the tables pages in the buffer
+ pool */
+/*******************************************************************//**
+Closes a single-table tablespace. The tablespace must be cached in the
+memory cache. Free all pages used by the tablespace.
+@return DB_SUCCESS or error */
+UNIV_INTERN
+dberr_t
+fil_close_tablespace(
+/*=================*/
+ trx_t* trx, /*!< in/out: Transaction covering the close */
ulint id); /*!< in: space id */
#ifndef UNIV_HOTBACKUP
/*******************************************************************//**
Discards a single-table tablespace. The tablespace must be cached in the
memory cache. Discarding is like deleting a tablespace, but
-1) we do not drop the table from the data dictionary;
-2) we remove all insert buffer entries for the tablespace immediately; in DROP
-TABLE they are only removed gradually in the background;
-3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
-as it originally had.
-@return TRUE if success */
+
+ 1. We do not drop the table from the data dictionary;
+
+ 2. We remove all insert buffer entries for the tablespace immediately;
+ in DROP TABLE they are only removed gradually in the background;
+
+ 3. When the user does IMPORT TABLESPACE, the tablespace will have the
+ same id as it originally had.
+
+ 4. Free all the pages in use by the tablespace if rename=TRUE.
+@return DB_SUCCESS or error */
UNIV_INTERN
-ibool
+dberr_t
fil_discard_tablespace(
/*===================*/
- ulint id); /*!< in: space id */
+ ulint id) /*!< in: space id */
+ __attribute__((warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/*******************************************************************//**
Renames a single-table tablespace. The tablespace must be cached in the
@@ -433,16 +477,70 @@ UNIV_INTERN
ibool
fil_rename_tablespace(
/*==================*/
- const char* old_name_in, /*!< in: old table name in the standard
- databasename/tablename format of
- InnoDB, or NULL if we do the rename
- based on the space id only */
+ const char* old_name_in, /*!< in: old table name in the
+ standard databasename/tablename
+ format of InnoDB, or NULL if we
+ do the rename based on the space
+ id only */
ulint id, /*!< in: space id */
- const char* new_name); /*!< in: new table name in the standard
- databasename/tablename format
- of InnoDB */
+ const char* new_name, /*!< in: new table name in the
+ standard databasename/tablename
+ format of InnoDB */
+ const char* new_path); /*!< in: new full datafile path
+ if the tablespace is remotely
+ located, or NULL if it is located
+ in the normal data directory. */
/*******************************************************************//**
+Allocates a file name for a single-table tablespace. The string must be freed
+by caller with mem_free().
+@return own: file name */
+UNIV_INTERN
+char*
+fil_make_ibd_name(
+/*==============*/
+ const char* name, /*!< in: table name or a dir path */
+ bool is_full_path); /*!< in: TRUE if it is a dir path */
+/*******************************************************************//**
+Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
+The string must be freed by caller with mem_free().
+@return own: file name */
+UNIV_INTERN
+char*
+fil_make_isl_name(
+/*==============*/
+ const char* name); /*!< in: table name */
+/*******************************************************************//**
+Creates a new InnoDB Symbolic Link (ISL) file. It is always created
+under the 'datadir' of MySQL. The datadir is the directory of a
+running mysqld program. We can refer to it by simply using the path '.'.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_create_link_file(
+/*=================*/
+ const char* tablename, /*!< in: tablename */
+ const char* filepath); /*!< in: pathname of tablespace */
+/*******************************************************************//**
+Deletes an InnoDB Symbolic Link (ISL) file. */
+UNIV_INTERN
+void
+fil_delete_link_file(
+/*==================*/
+ const char* tablename); /*!< in: name of table */
+/*******************************************************************//**
+Reads an InnoDB Symbolic Link (ISL) file.
+It is always created under the 'datadir' of MySQL. The name is of the
+form {databasename}/{tablename}. and the isl file is expected to be in a
+'{databasename}' directory called '{tablename}.isl'. The caller must free
+the memory of the null-terminated path returned if it is not null.
+@return own: filepath found in link file, NULL if not found. */
+UNIV_INTERN
+char*
+fil_read_link_file(
+/*===============*/
+ const char* name); /*!< in: tablespace name */
+/*******************************************************************//**
Creates a new single-table tablespace to a database directory of MySQL.
Database directories are under the 'datadir' of MySQL. The datadir is the
directory of a running mysqld program. We can refer to it by simply the
@@ -450,21 +548,20 @@ path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
dir of the mysqld server.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fil_create_new_single_table_tablespace(
/*===================================*/
ulint space_id, /*!< in: space id */
const char* tablename, /*!< in: the table name in the usual
databasename/tablename format
- of InnoDB, or a dir path to a temp
- table */
- ibool is_temp, /*!< in: TRUE if a table created with
- CREATE TEMPORARY TABLE */
+ of InnoDB */
+ const char* dir_path, /*!< in: NULL or a dir path */
ulint flags, /*!< in: tablespace flags */
ulint flags2, /*!< in: table flags2 */
- ulint size); /*!< in: the initial size of the
+ ulint size) /*!< in: the initial size of the
tablespace file in pages,
must be >= FIL_IBD_FILE_INITIAL_SIZE */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Tries to open a single-table tablespace and optionally checks the space id is
@@ -475,41 +572,31 @@ NOTE that we assume this operation is used either at the database startup
or under the protection of the dictionary mutex, so that two users cannot
race here. This operation does not leave the file associated with the
tablespace open, but closes it after we have looked at the space id in it.
-@return TRUE if success */
+
+If the validate boolean is set, we read the first page of the file and
+check that the space id in the file is what we expect. We assume that
+this function runs much faster if no check is made, since accessing the
+file inode probably is much faster (the OS caches them) than accessing
+the first page of the file. This boolean may be initially FALSE, but if
+a remote tablespace is found it will be changed to true.
+
+If the fix_dict boolean is set, then it is safe to use an internal SQL
+statement to update the dictionary tables if they are incorrect.
+
+@return DB_SUCCESS or error code */
UNIV_INTERN
-ibool
+dberr_t
fil_open_single_table_tablespace(
/*=============================*/
- ibool check_space_id, /*!< in: should we check that the space
- id in the file is right; we assume
- that this function runs much faster
- if no check is made, since accessing
- the file inode probably is much
- faster (the OS caches them) than
- accessing the first page of the file */
+ bool validate, /*!< in: Do we validate tablespace? */
+ bool fix_dict, /*!< in: Can we fix the dictionary? */
ulint id, /*!< in: space id */
ulint flags, /*!< in: tablespace flags */
- const char* name); /*!< in: table name in the
- databasename/tablename format */
-/********************************************************************//**
-It is possible, though very improbable, that the lsn's in the tablespace to be
-imported have risen above the current system lsn, if a lengthy purge, ibuf
-merge, or rollback was performed on a backup taken with ibbackup. If that is
-the case, reset page lsn's in the file. We assume that mysqld was shut down
-after it performed these cleanup operations on the .ibd file, so that it at
-the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
-first page of the .ibd file, and we can determine whether we need to reset the
-lsn's just by looking at that flush lsn.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_reset_too_high_lsns(
-/*====================*/
- const char* name, /*!< in: table name in the
+ const char* tablename, /*!< in: table name in the
databasename/tablename format */
- lsn_t current_lsn); /*!< in: reset lsn's if the lsn stamped
- to FIL_PAGE_FILE_FLUSH_LSN in the
- first page is too high */
+ const char* filepath) /*!< in: tablespace filepath */
+ __attribute__((nonnull(5), warn_unused_result));
+
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
At the server startup, if we need crash recovery, scans the database
@@ -520,7 +607,7 @@ in the doublewrite buffer, also to know where to apply log records where the
space id is != 0.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
fil_load_single_table_tablespaces(void);
/*===================================*/
/*******************************************************************//**
@@ -562,11 +649,15 @@ fil_space_for_table_exists_in_mem(
data dictionary, so that
we can print a warning about orphaned
tablespaces */
- ibool print_error_if_does_not_exist);
+ ibool print_error_if_does_not_exist,
/*!< in: print detailed error
information to the .err log if a
matching tablespace is not found from
memory */
+ bool adjust_space, /*!< in: whether to adjust space id
+ when find table space mismatch */
+ mem_heap_t* heap, /*!< in: heap memory */
+ table_id_t table_id); /*!< in: table id */
#else /* !UNIV_HOTBACKUP */
/********************************************************************//**
Extends all tablespaces to the size stored in the space header. During the
@@ -625,7 +716,7 @@ Reads or writes data. This operation is asynchronous (aio).
@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
i/o on a tablespace which does not exist */
UNIV_INTERN
-ulint
+dberr_t
fil_io(
/*===*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
@@ -651,8 +742,9 @@ fil_io(
void* buf, /*!< in/out: buffer where to store read data
or from where to write; in aio this must be
appropriately aligned */
- void* message); /*!< in: message for aio handler if non-sync
+ void* message) /*!< in: message for aio handler if non-sync
aio used, else ignored */
+ __attribute__((nonnull(8)));
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
@@ -739,8 +831,154 @@ fil_tablespace_is_being_deleted(
/*============================*/
ulint id); /*!< in: space id */
-typedef struct fil_space_struct fil_space_t;
+/********************************************************************//**
+Delete the tablespace file and any related files like .cfg.
+This should not be called for temporary tables. */
+UNIV_INTERN
+void
+fil_delete_file(
+/*============*/
+ const char* path); /*!< in: filepath of the ibd tablespace */
+
+/** Callback functor. */
+struct PageCallback {
+
+ /**
+ Default constructor */
+ PageCallback()
+ :
+ m_zip_size(),
+ m_page_size(),
+ m_filepath() UNIV_NOTHROW {}
+
+ virtual ~PageCallback() UNIV_NOTHROW {}
+
+ /**
+ Called for page 0 in the tablespace file at the start.
+ @param file_size - size of the file in bytes
+ @param block - contents of the first page in the tablespace file
+ @retval DB_SUCCESS or error code.*/
+ virtual dberr_t init(
+ os_offset_t file_size,
+ const buf_block_t* block) UNIV_NOTHROW = 0;
+
+ /**
+ Called for every page in the tablespace. If the page was not
+ updated then its state must be set to BUF_PAGE_NOT_USED. For
+ compressed tables the page descriptor memory will be at offset:
+ block->frame + UNIV_PAGE_SIZE;
+ @param offset - physical offset within the file
+ @param block - block read from file, note it is not from the buffer pool
+ @retval DB_SUCCESS or error code. */
+ virtual dberr_t operator()(
+ os_offset_t offset,
+ buf_block_t* block) UNIV_NOTHROW = 0;
+
+ /**
+ Set the name of the physical file and the file handle that is used
+ to open it for the file that is being iterated over.
+ @param filename - then physical name of the tablespace file.
+ @param file - OS file handle */
+ void set_file(const char* filename, os_file_t file) UNIV_NOTHROW
+ {
+ m_file = file;
+ m_filepath = filename;
+ }
+
+ /**
+ @return the space id of the tablespace */
+ virtual ulint get_space_id() const UNIV_NOTHROW = 0;
+
+ /** The compressed page size
+ @return the compressed page size */
+ ulint get_zip_size() const
+ {
+ return(m_zip_size);
+ }
+
+ /**
+ Set the tablespace compressed table size.
+ @return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
+ dberr_t set_zip_size(const buf_frame_t* page) UNIV_NOTHROW;
+
+ /** The compressed page size
+ @return the compressed page size */
+ ulint get_page_size() const
+ {
+ return(m_page_size);
+ }
+
+ /** Compressed table page size */
+ ulint m_zip_size;
+
+ /** The tablespace page size. */
+ ulint m_page_size;
+
+ /** File handle to the tablespace */
+ os_file_t m_file;
+
+ /** Physical file path. */
+ const char* m_filepath;
+
+protected:
+ // Disable copying
+ PageCallback(const PageCallback&);
+ PageCallback& operator=(const PageCallback&);
+};
+
+/********************************************************************//**
+Iterate over all the pages in the tablespace.
+@param table - the table definiton in the server
+@param n_io_buffers - number of blocks to read and write together
+@param callback - functor that will do the page updates
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+fil_tablespace_iterate(
+/*===================*/
+ dict_table_t* table,
+ ulint n_io_buffers,
+ PageCallback& callback)
+ __attribute__((nonnull, warn_unused_result));
-#endif /* !UNIV_INNOCHECKSUM */
+/*******************************************************************//**
+Checks if a single-table tablespace for a given table name exists in the
+tablespace memory cache.
+@return space id, ULINT_UNDEFINED if not found */
+UNIV_INTERN
+ulint
+fil_get_space_id_for_table(
+/*=======================*/
+ const char* name); /*!< in: table name in the standard
+ 'databasename/tablename' format */
+
+/**
+Iterate over all the spaces in the space list and fetch the
+tablespace names. It will return a copy of the name that must be
+freed by the caller using: delete[].
+@return DB_SUCCESS if all OK. */
+UNIV_INTERN
+dberr_t
+fil_get_space_names(
+/*================*/
+ space_name_list_t& space_name_list)
+ /*!< in/out: Vector for collecting the names. */
+ __attribute__((warn_unused_result));
-#endif
+/****************************************************************//**
+Generate redo logs for swapping two .ibd files */
+UNIV_INTERN
+void
+fil_mtr_rename_log(
+/*===============*/
+ ulint old_space_id, /*!< in: tablespace id of the old
+ table. */
+ const char* old_name, /*!< in: old table name */
+ ulint new_space_id, /*!< in: tablespace id of the new
+ table */
+ const char* new_name, /*!< in: new table name */
+ const char* tmp_name); /*!< in: temp table name used while
+ swapping */
+
+#endif /* !UNIV_INNOCHECKSUM */
+#endif /* fil0fil_h */
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 994783c2db9..a587ccc9f20 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -50,11 +50,15 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS 1
/** Number of flag bits used to indicate the tablespace page size */
#define FSP_FLAGS_WIDTH_PAGE_SSIZE 4
+/** Width of the DATA_DIR flag. This flag indicates that the tablespace
+is found in a remote location, not the default data directory. */
+#define FSP_FLAGS_WIDTH_DATA_DIR 1
/** Width of all the currently known tablespace flags */
#define FSP_FLAGS_WIDTH (FSP_FLAGS_WIDTH_POST_ANTELOPE \
+ FSP_FLAGS_WIDTH_ZIP_SSIZE \
+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS \
- + FSP_FLAGS_WIDTH_PAGE_SSIZE)
+ + FSP_FLAGS_WIDTH_PAGE_SSIZE \
+ + FSP_FLAGS_WIDTH_DATA_DIR)
/** A mask of all the known/used bits in tablespace flags */
#define FSP_FLAGS_MASK (~(~0 << FSP_FLAGS_WIDTH))
@@ -71,8 +75,11 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_BLOBS \
+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
/** Zero relative shift position of the start of the UNUSED bits */
-#define FSP_FLAGS_POS_UNUSED (FSP_FLAGS_POS_PAGE_SSIZE \
+#define FSP_FLAGS_POS_DATA_DIR (FSP_FLAGS_POS_PAGE_SSIZE \
+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
+/** Zero relative shift position of the start of the UNUSED bits */
+#define FSP_FLAGS_POS_UNUSED (FSP_FLAGS_POS_DATA_DIR \
+ + FSP_FLAGS_WIDTH_DATA_DIR)
/** Bit mask of the POST_ANTELOPE field */
#define FSP_FLAGS_MASK_POST_ANTELOPE \
@@ -90,6 +97,10 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
#define FSP_FLAGS_MASK_PAGE_SSIZE \
((~(~0 << FSP_FLAGS_WIDTH_PAGE_SSIZE)) \
<< FSP_FLAGS_POS_PAGE_SSIZE)
+/** Bit mask of the DATA_DIR field */
+#define FSP_FLAGS_MASK_DATA_DIR \
+ ((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR)) \
+ << FSP_FLAGS_POS_DATA_DIR)
/** Return the value of the POST_ANTELOPE field */
#define FSP_FLAGS_GET_POST_ANTELOPE(flags) \
@@ -107,6 +118,10 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
#define FSP_FLAGS_GET_PAGE_SSIZE(flags) \
((flags & FSP_FLAGS_MASK_PAGE_SSIZE) \
>> FSP_FLAGS_POS_PAGE_SSIZE)
+/** Return the value of the DATA_DIR field */
+#define FSP_FLAGS_HAS_DATA_DIR(flags) \
+ ((flags & FSP_FLAGS_MASK_DATA_DIR) \
+ >> FSP_FLAGS_POS_DATA_DIR)
/** Return the contents of the UNUSED bits */
#define FSP_FLAGS_GET_UNUSED(flags) \
(flags >> FSP_FLAGS_POS_UNUSED)
@@ -555,6 +570,17 @@ fseg_free_page(
ulint page, /*!< in: page offset */
mtr_t* mtr); /*!< in/out: mini-transaction */
/**********************************************************************//**
+Checks if a single page of a segment is free.
+@return true if free */
+UNIV_INTERN
+bool
+fseg_page_is_free(
+/*==============*/
+ fseg_header_t* seg_header, /*!< in: segment header */
+ ulint space, /*!< in: space id */
+ ulint page) /*!< in: page offset */
+ __attribute__((nonnull, warn_unused_result));
+/**********************************************************************//**
Frees part of a segment. This function can be used to free a segment
by repeatedly calling this function in different mini-transactions.
Doing the freeing in a single mini-transaction might result in
@@ -643,12 +669,13 @@ tablespace header at offset FSP_SPACE_FLAGS. They should be 0 for
ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
COMPRESSED and DYNAMIC, use a file format > Antelope so they should
have a file format number plus the DICT_TF_COMPACT bit set.
-@return ulint containing the validated tablespace flags. */
+@return true if check ok */
UNIV_INLINE
-ulint
-fsp_flags_validate(
+bool
+fsp_flags_is_valid(
/*===============*/
- ulint flags); /*!< in: tablespace flags */
+ ulint flags) /*!< in: tablespace flags */
+ __attribute__((warn_unused_result, const));
/********************************************************************//**
Determine if the tablespace is compressed from dict_table_t::flags.
@return TRUE if compressed, FALSE if not compressed */
@@ -658,6 +685,40 @@ fsp_flags_is_compressed(
/*====================*/
ulint flags); /*!< in: tablespace flags */
+/********************************************************************//**
+Calculates the descriptor index within a descriptor page.
+@return descriptor index */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_index(
+/*=======================*/
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint offset); /*!< in: page offset */
+
+/**********************************************************************//**
+Gets a descriptor bit of a page.
+@return TRUE if free */
+UNIV_INLINE
+ibool
+xdes_get_bit(
+/*=========*/
+ const xdes_t* descr, /*!< in: descriptor */
+ ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ulint offset);/*!< in: page offset within extent:
+ 0 ... FSP_EXTENT_SIZE - 1 */
+
+/********************************************************************//**
+Calculates the page where the descriptor of a page resides.
+@return descriptor page offset */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_page(
+/*======================*/
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint offset); /*!< in: page offset */
+
#endif /* !UNIV_INNOCHECKSUM */
/********************************************************************//**
@@ -669,7 +730,7 @@ UNIV_INLINE
ulint
fsp_flags_get_zip_size(
/*====================*/
- ulint flags); /*!< in: tablespace flags */
+ ulint flags); /*!< in: tablespace flags */
/********************************************************************//**
Extract the page size from tablespace flags.
@return page size of the tablespace in bytes */
@@ -677,16 +738,7 @@ UNIV_INLINE
ulint
fsp_flags_get_page_size(
/*====================*/
- ulint flags); /*!< in: tablespace flags */
-
-/********************************************************************//**
-Set page size */
-UNIV_INLINE
-ulint
-fsp_flags_set_page_size(
-/*====================*/
- ulint flags, /*!< in: tablespace flags */
- ulint page_size); /*!< in: page size in bytes */
+ ulint flags); /*!< in: tablespace flags */
#ifndef UNIV_NONINL
#include "fsp0fsp.ic"
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index 498f9000888..0d81e817cc9 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -51,11 +51,10 @@ tablespace header at offset FSP_SPACE_FLAGS. They should be 0 for
ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats,
COMPRESSED and DYNAMIC, use a file format > Antelope so they should
have a file format number plus the DICT_TF_COMPACT bit set.
-@return Same as input after validating it as FSP_SPACE_FLAGS.
-If there is an error, trigger assertion failure. */
+@return true if check ok */
UNIV_INLINE
-ulint
-fsp_flags_validate(
+bool
+fsp_flags_is_valid(
/*===============*/
ulint flags) /*!< in: tablespace flags */
{
@@ -65,16 +64,20 @@ fsp_flags_validate(
ulint page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
ulint unused = FSP_FLAGS_GET_UNUSED(flags);
- /* Make sure there are no bits that we do not know about. */
- ut_a(unused == 0);
+ DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
/* fsp_flags is zero unless atomic_blobs is set. */
- ut_a(flags != 1);
- if (post_antelope) {
+ /* Make sure there are no bits that we do not know about. */
+ if (unused != 0 || flags == 1) {
+ return(false);
+ } else if (post_antelope) {
/* The Antelope row formats REDUNDANT and COMPACT did
not use tablespace flags, so this flag and the entire
4-byte field is zero for Antelope row formats. */
- ut_a(atomic_blobs);
+
+ if (!atomic_blobs) {
+ return(false);
+ }
}
if (!atomic_blobs) {
@@ -82,27 +85,33 @@ fsp_flags_validate(
the page structure introduced for the COMPACT row format
by allowing long fields to be broken into prefix and
externally stored parts. */
- ut_a(!post_antelope);
- ut_a(zip_ssize == 0);
- } else {
- ut_a(post_antelope);
- /* Validate the zip shift size is within allowed range. */
- ut_a(zip_ssize <= PAGE_ZIP_SSIZE_MAX);
- }
+ if (post_antelope || zip_ssize != 0) {
+ return(false);
+ }
+
+ } else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+ return(false);
+ } else if (page_ssize > UNIV_PAGE_SSIZE_MAX) {
+
+ /* The page size field can be used for any row type, or it may
+ be zero for an original 16k page size.
+ Validate the page shift size is within allowed range. */
+
+ return(false);
- /* The page size field can be used for any row type, or it may
- be zero for an original 16k page size.
- Validate the page shift size is within allowed range. */
- ut_a(page_ssize <= UNIV_PAGE_SSIZE_MAX);
- ut_a((UNIV_PAGE_SIZE == UNIV_PAGE_SIZE_ORIG) || (page_ssize));
+ } else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) {
+ return(false);
+ }
#if UNIV_FORMAT_MAX != UNIV_FORMAT_B
# error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations."
#endif
- /* Return the flags sent in if we did not fail an assert. */
- return(flags);
+ /* The DATA_DIR field can be used for any row type so there is
+ nothing here to validate. */
+
+ return(true);
}
/********************************************************************//**
@@ -208,9 +217,98 @@ fsp_flags_set_page_size(
flags = FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize);
- ut_ad(flags == fsp_flags_validate(flags));
+ ut_ad(fsp_flags_is_valid(flags));
return(flags);
}
+/********************************************************************//**
+Calculates the descriptor index within a descriptor page.
+@return descriptor index */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_index(
+/*=======================*/
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint offset) /*!< in: page offset */
+{
+ ut_ad(ut_is_2pow(zip_size));
+
+ if (zip_size == 0) {
+ return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
+ / FSP_EXTENT_SIZE);
+ } else {
+ return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
+ }
+}
+
+/**********************************************************************//**
+Gets a descriptor bit of a page.
+@return TRUE if free */
+UNIV_INLINE
+ibool
+xdes_get_bit(
+/*=========*/
+ const xdes_t* descr, /*!< in: descriptor */
+ ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+ ulint offset) /*!< in: page offset within extent:
+ 0 ... FSP_EXTENT_SIZE - 1 */
+{
+ ut_ad(offset < FSP_EXTENT_SIZE);
+ ut_ad(bit == XDES_FREE_BIT || bit == XDES_CLEAN_BIT);
+
+ ulint index = bit + XDES_BITS_PER_PAGE * offset;
+
+ ulint bit_index = index % 8;
+ ulint byte_index = index / 8;
+
+ return(ut_bit_get_nth(
+ mach_read_ulint(descr + XDES_BITMAP + byte_index,
+ MLOG_1BYTE),
+ bit_index));
+}
+
+/********************************************************************//**
+Calculates the page where the descriptor of a page resides.
+@return descriptor page offset */
+UNIV_INLINE
+ulint
+xdes_calc_descriptor_page(
+/*======================*/
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint offset) /*!< in: page offset */
+{
+#ifndef DOXYGEN /* Doxygen gets confused by these */
+# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET \
+ + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX) \
+ * XDES_SIZE_MAX
+# error
+# endif
+# if UNIV_ZIP_SIZE_MIN <= XDES_ARR_OFFSET \
+ + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE_MIN) \
+ * XDES_SIZE_MIN
+# error
+# endif
+#endif /* !DOXYGEN */
+
+ ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
+ + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE)
+ * XDES_SIZE);
+ ut_ad(UNIV_ZIP_SIZE_MIN > XDES_ARR_OFFSET
+ + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
+ * XDES_SIZE);
+
+ ut_ad(ut_is_2pow(zip_size));
+
+ if (zip_size == 0) {
+ return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
+ } else {
+ ut_ad(zip_size > XDES_ARR_OFFSET
+ + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
+ return(ut_2pow_round(offset, zip_size));
+ }
+}
+
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/fts0ast.h b/storage/innobase/include/fts0ast.h
index da40e2bbc96..7f2525dc450 100644
--- a/storage/innobase/include/fts0ast.h
+++ b/storage/innobase/include/fts0ast.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,7 @@ Created 2007/03/16/03 Sunny Bains
#include "mem0mem.h"
/* The type of AST Node */
-enum fts_ast_type_enum {
+enum fts_ast_type_t {
FTS_AST_OPER, /*!< Operator */
FTS_AST_NUMB, /*!< Number */
FTS_AST_TERM, /*!< Term (or word) */
@@ -39,7 +39,7 @@ enum fts_ast_type_enum {
};
/* The FTS query operators that we support */
-enum fts_ast_oper_enum {
+enum fts_ast_oper_t {
FTS_NONE, /*!< No operator */
FTS_IGNORE, /*!< Ignore rows that contain
@@ -58,20 +58,18 @@ enum fts_ast_oper_enum {
FTS_DECR_RATING, /*!< Decrease the rank for this
word*/
- FTS_DISTANCE /*!< Proximity distance */
+ FTS_DISTANCE, /*!< Proximity distance */
+ FTS_IGNORE_SKIP /*!< Transient node operator
+ signifies that this is a
+ FTS_IGNORE node, and ignored in
+ the first pass of
+ fts_ast_visit() */
};
-/* Enum types used by the FTS parser */
-typedef enum fts_ast_type_enum fts_ast_type_t;
-typedef enum fts_ast_oper_enum fts_ast_oper_t;
-
/* Data types used by the FTS parser */
-typedef struct fts_lexer_struct fts_lexer_t;
-typedef struct fts_ast_text_struct fts_ast_text_t;
-typedef struct fts_ast_term_struct fts_ast_term_t;
-typedef struct fts_ast_node_struct fts_ast_node_t;
-typedef struct fts_ast_list_struct fts_ast_list_t;
-typedef struct fts_ast_state_struct fts_ast_state_t;
+struct fts_lexer_t;
+struct fts_ast_node_t;
+struct fts_ast_state_t;
typedef ulint (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
@@ -180,60 +178,76 @@ fts_ast_state_free(
/*===============*/
fts_ast_state_t*state); /*!< in: state instance
to free */
-/********************************************************************
-Traverse the AST.*/
-ulint
+/******************************************************************//**
+Traverse the AST - in-order traversal.
+@return DB_SUCCESS if all went well */
+UNIV_INTERN
+dberr_t
fts_ast_visit(
/*==========*/
fts_ast_oper_t oper, /*!< in: FTS operator */
fts_ast_node_t* node, /*!< in: instance to traverse*/
fts_ast_callback visitor, /*!< in: callback */
- void* arg); /*!< in: callback arg */
-/********************************************************************
-Traverse the sub expression list.*/
-ulint
+ void* arg, /*!< in: callback arg */
+ bool* has_ignore) /*!< out: whether we encounter
+ and ignored processing an
+ operator, currently we only
+ ignore FTS_IGNORE operator */
+ __attribute__((nonnull, warn_unused_result));
+/*****************************************************************//**
+Process (nested) sub-expression, create a new result set to store the
+sub-expression result by processing nodes under current sub-expression
+list. Merge the sub-expression result with that of parent expression list.
+@return DB_SUCCESS if all went well */
+UNIV_INTERN
+dberr_t
fts_ast_visit_sub_exp(
-/*==========*/
+/*==================*/
fts_ast_node_t* node, /*!< in: instance to traverse*/
fts_ast_callback visitor, /*!< in: callback */
- void* arg); /*!< in: callback arg */
+ void* arg) /*!< in: callback arg */
+ __attribute__((nonnull, warn_unused_result));
/********************************************************************
Create a lex instance.*/
+UNIV_INTERN
fts_lexer_t*
fts_lexer_create(
/*=============*/
ibool boolean_mode, /*!< in: query type */
const byte* query, /*!< in: query string */
- ulint query_len); /*!< in: query string len */
+ ulint query_len) /*!< in: query string len */
+ __attribute__((nonnull, malloc, warn_unused_result));
/********************************************************************
Free an fts_lexer_t instance.*/
+UNIV_INTERN
void
fts_lexer_free(
/*===========*/
- fts_lexer_t* fts_lexer); /*!< in: lexer instance to
+ fts_lexer_t* fts_lexer) /*!< in: lexer instance to
free */
+ __attribute__((nonnull));
/* Query term type */
-struct fts_ast_term_struct {
+struct fts_ast_term_t {
byte* ptr; /*!< Pointer to term string.*/
ibool wildcard; /*!< TRUE if wild card set.*/
};
/* Query text type */
-struct fts_ast_text_struct {
+struct fts_ast_text_t {
byte* ptr; /*!< Pointer to term string.*/
ulint distance; /*!< > 0 if proximity distance
set */
};
/* The list of nodes in an expr list */
-struct fts_ast_list_struct {
+struct fts_ast_list_t {
fts_ast_node_t* head; /*!< Children list head */
fts_ast_node_t* tail; /*!< Children list tail */
};
/* FTS AST node to store the term, text, operator and sub-expressions.*/
-struct fts_ast_node_struct {
+struct fts_ast_node_t {
fts_ast_type_t type; /*!< The type of node */
fts_ast_text_t text; /*!< Text node */
fts_ast_term_t term; /*!< Term node */
@@ -241,10 +255,12 @@ struct fts_ast_node_struct {
fts_ast_list_t list; /*!< Expression list */
fts_ast_node_t* next; /*!< Link for expr list */
fts_ast_node_t* next_alloc; /*!< For tracking allocations */
+ bool visited; /*!< whether this node is
+ already processed */
};
/* To track state during parsing */
-struct fts_ast_state_struct {
+struct fts_ast_state_t {
mem_heap_t* heap; /*!< Heap to use for alloc */
fts_ast_node_t* root; /*!< If all goes OK, then this
will point to the root.*/
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index e515772bdbd..f2f8617012a 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -114,16 +114,16 @@ to mark invalid states.
NOTE: Do not change the order or value of these, fts_trx_row_get_new_state
depends on them being exactly as they are. */
-typedef enum {
+enum fts_row_state {
FTS_INSERT = 0,
FTS_MODIFY,
FTS_DELETE,
FTS_NOTHING,
FTS_INVALID
-} fts_row_state;
+};
/** The FTS table types. */
-enum fts_table_type_enum {
+enum fts_table_type_t {
FTS_INDEX_TABLE, /*!< FTS auxiliary table that is
specific to a particular FTS index
on a table */
@@ -132,21 +132,11 @@ enum fts_table_type_enum {
for all FTS index on a table */
};
-typedef struct fts_struct fts_t;
-typedef struct fts_doc_struct fts_doc_t;
-typedef struct fts_trx_struct fts_trx_t;
-typedef struct fts_table_struct fts_table_t;
-typedef struct fts_cache_struct fts_cache_t;
-typedef struct fts_token_struct fts_token_t;
-typedef struct fts_string_struct fts_string_t;
-typedef struct fts_result_struct fts_result_t;
-typedef struct fts_ranking_struct fts_ranking_t;
-typedef struct fts_trx_row_struct fts_trx_row_t;
-typedef struct fts_doc_ids_struct fts_doc_ids_t;
-typedef enum fts_table_type_enum fts_table_type_t;
-typedef struct fts_trx_table_struct fts_trx_table_t;
-typedef struct fts_savepoint_struct fts_savepoint_t;
-typedef struct fts_index_cache_struct fts_index_cache_t;
+struct fts_doc_t;
+struct fts_cache_t;
+struct fts_token_t;
+struct fts_doc_ids_t;
+struct fts_index_cache_t;
/** Initialize the "fts_table" for internal query into FTS auxiliary
@@ -172,7 +162,7 @@ do { \
/** Information about changes in a single transaction affecting
the FTS system. */
-struct fts_trx_struct {
+struct fts_trx_t {
trx_t* trx; /*!< InnoDB transaction */
ib_vector_t* savepoints; /*!< Active savepoints, must have at
@@ -184,7 +174,7 @@ struct fts_trx_struct {
};
/** Information required for transaction savepoint handling. */
-struct fts_savepoint_struct {
+struct fts_savepoint_t {
char* name; /*!< First entry is always NULL, the
default instance. Otherwise the name
of the savepoint */
@@ -193,7 +183,7 @@ struct fts_savepoint_struct {
};
/** Information about changed rows in a transaction for a single table. */
-struct fts_trx_table_struct {
+struct fts_trx_table_t {
dict_table_t* table; /*!< table */
fts_trx_t* fts_trx; /*!< link to parent */
@@ -209,7 +199,7 @@ struct fts_trx_table_struct {
};
/** Information about one changed row in a transaction. */
-struct fts_trx_row_struct {
+struct fts_trx_row_t {
doc_id_t doc_id; /*!< Id of the ins/upd/del document */
fts_row_state state; /*!< state of the row */
@@ -220,7 +210,7 @@ struct fts_trx_row_struct {
/** List of document ids that were added during a transaction. This
list is passed on to a background 'Add' thread and OPTIMIZE, so it
needs its own memory heap. */
-struct fts_doc_ids_struct {
+struct fts_doc_ids_t {
ib_vector_t* doc_ids; /*!< document ids (each element is
of type doc_id_t). */
@@ -237,7 +227,7 @@ as our in-memory format. This typedef is a single such character. */
typedef unsigned short ib_uc_t;
/** An UTF-16 ro UTF-8 string. */
-struct fts_string_struct {
+struct fts_string_t {
byte* f_str; /*!< string, not necessary terminated in
any way */
ulint f_len; /*!< Length of the string in bytes */
@@ -245,7 +235,7 @@ struct fts_string_struct {
};
/** Query ranked doc ids. */
-struct fts_ranking_struct {
+struct fts_ranking_t {
doc_id_t doc_id; /*!< Document id */
fts_rank_t rank; /*!< Rank is between 0 .. 1 */
@@ -256,7 +246,7 @@ struct fts_ranking_struct {
};
/** Query result. */
-struct fts_result_struct {
+struct fts_result_t {
ib_rbt_node_t* current; /*!< Current element */
ib_rbt_t* rankings_by_id; /*!< RB tree of type fts_ranking_t
@@ -268,7 +258,7 @@ struct fts_result_struct {
/** This is used to generate the FTS auxiliary table name, we need the
table id and the index id to generate the column specific FTS auxiliary
table name. */
-struct fts_table_struct {
+struct fts_table_t {
const char* parent; /*!< Parent table name, this is
required only for the database
name */
@@ -311,10 +301,10 @@ enum fts_status {
typedef enum fts_status fts_status_t;
/** The state of the FTS sub system. */
-struct fts_struct {
+struct fts_t {
/*!< mutex protecting bg_threads* and
fts_add_wq. */
- mutex_t bg_threads_mutex;
+ ib_mutex_t bg_threads_mutex;
ulint bg_threads; /*!< number of background threads
accessing this table */
@@ -339,10 +329,10 @@ struct fts_struct {
ib_vector_t* indexes; /*!< Vector of FTS indexes, this is
mainly for caching purposes. */
- mem_heap_t* fts_heap; /*!< heap for fts_struct allocation */
+ mem_heap_t* fts_heap; /*!< heap for fts_t allocation */
};
-typedef struct fts_stopword_struct fts_stopword_t;
+struct fts_stopword_t;
/** status bits for fts_stopword_t status field. */
#define STOPWORD_NOT_INIT 0x1
@@ -395,15 +385,15 @@ fts_cache_index_cache_create(
/******************************************************************//**
Get the next available document id. This function creates a new
-transaction to generate the document id. */
+transaction to generate the document id.
+@return DB_SUCCESS if OK */
UNIV_INTERN
-ulint
+dberr_t
fts_get_next_doc_id(
/*================*/
- /*!< out: DB_SUCCESS if OK */
- const dict_table_t* table, /*!< in: table */
- doc_id_t* doc_id); /*!< out: new document id */
-
+ const dict_table_t* table, /*!< in: table */
+ doc_id_t* doc_id) /*!< out: new document id */
+ __attribute__((nonnull));
/*********************************************************************//**
Update the next and last Doc ID in the CONFIG table to be the input
"doc_id" value (+ 1). We would do so after each FTS index build or
@@ -412,28 +402,17 @@ UNIV_INTERN
void
fts_update_next_doc_id(
/*===================*/
+ trx_t* trx, /*!< in/out: transaction */
const dict_table_t* table, /*!< in: table */
- const char* table_name, /*!< in: table name */
- doc_id_t doc_id); /*!< in: DOC ID to set */
-
-/******************************************************************//**
-Update the last document id. This function could create a new
-transaction to update the last document id. */
-UNIV_INTERN
-ulint
-fts_update_sync_doc_id(
-/*===================*/
- /*!< out: DB_SUCCESS if OK */
- const dict_table_t* table, /*!< in: table */
- const char* table_name, /*!< in: table name */
- doc_id_t doc_id, /*!< in: last document id */
- trx_t* trx); /*!< in: update trx */
+ const char* table_name, /*!< in: table name, or NULL */
+ doc_id_t doc_id) /*!< in: DOC ID to set */
+ __attribute__((nonnull(2)));
/******************************************************************//**
Create a new document id .
@return DB_SUCCESS if all went well else error */
UNIV_INTERN
-ulint
+dberr_t
fts_create_doc_id(
/*==============*/
dict_table_t* table, /*!< in: row is of this
@@ -442,8 +421,8 @@ fts_create_doc_id(
value to this row. This is the
current row that is being
inserted. */
- mem_heap_t* heap); /*!< in: heap */
-
+ mem_heap_t* heap) /*!< in: heap */
+ __attribute__((nonnull));
/******************************************************************//**
Create a new fts_doc_ids_t.
@return new fts_doc_ids_t. */
@@ -488,7 +467,7 @@ on the given table. row_mysql_lock_data_dictionary must have been
called before this.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_create_common_tables(
/*=====================*/
trx_t* trx, /*!< in: transaction handle */
@@ -496,27 +475,27 @@ fts_create_common_tables(
table, /*!< in: table with one FTS
index */
const char* name, /*!< in: table name */
- ibool skip_doc_id_index);
- /*!< in: Skip index on doc id */
+ bool skip_doc_id_index) /*!< in: Skip index on doc id */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Wrapper function of fts_create_index_tables_low(), create auxiliary
tables for an FTS index
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_create_index_tables(
/*====================*/
trx_t* trx, /*!< in: transaction handle */
- const dict_index_t* index); /*!< in: the FTS index
+ const dict_index_t* index) /*!< in: the FTS index
instance */
-
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Creates the column specific ancillary tables needed for supporting an
FTS index on the given table. row_mysql_lock_data_dictionary must have
been called before this.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_create_index_tables_low(
/*========================*/
trx_t* trx, /*!< in: transaction handle */
@@ -524,16 +503,17 @@ fts_create_index_tables_low(
index, /*!< in: the FTS index
instance */
const char* table_name, /*!< in: the table name */
- table_id_t table_id); /*!< in: the table id */
-
+ table_id_t table_id) /*!< in: the table id */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Add the FTS document id hidden column. */
UNIV_INTERN
void
fts_add_doc_id_column(
/*==================*/
- dict_table_t* table); /*!< in/out: Table with
- FTS index */
+ dict_table_t* table, /*!< in/out: Table with FTS index */
+ mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
+ __attribute__((nonnull(1)));
/*********************************************************************//**
Drops the ancillary tables needed for supporting an FTS index on the
@@ -541,28 +521,29 @@ given table. row_mysql_lock_data_dictionary must have been called before
this.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_drop_tables(
/*============*/
trx_t* trx, /*!< in: transaction */
- dict_table_t* table); /*!< in: table has the FTS
+ dict_table_t* table) /*!< in: table has the FTS
index */
-
+ __attribute__((nonnull));
/******************************************************************//**
The given transaction is about to be committed; do whatever is necessary
from the FTS system's POV.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_commit(
/*=======*/
- trx_t* trx); /*!< in: transaction */
+ trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
FTS Query entry point.
@return DB_SUCCESS if successful otherwise error code */
UNIV_INTERN
-ulint
+dberr_t
fts_query(
/*======*/
trx_t* trx, /*!< in: transaction */
@@ -571,8 +552,9 @@ fts_query(
const byte* query, /*!< in: FTS query */
ulint query_len, /*!< in: FTS query string len
in bytes */
- fts_result_t** result); /*!< out: query result, to be
+ fts_result_t** result) /*!< out: query result, to be
freed by the caller.*/
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Retrieve the FTS Relevance Ranking result for doc with doc_id
@@ -686,10 +668,11 @@ fts_free(
Run OPTIMIZE on the given table.
@return DB_SUCCESS if all OK */
UNIV_INTERN
-ulint
+dberr_t
fts_optimize_table(
/*===============*/
- dict_table_t* table); /*!< in: table to optimiza */
+ dict_table_t* table) /*!< in: table to optimiza */
+ __attribute__((nonnull));
/**********************************************************************//**
Startup the optimize thread and create the work queue. */
@@ -710,11 +693,12 @@ fts_optimize_is_init(void);
Drops index ancillary tables for a FTS index
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_drop_index_tables(
/*==================*/
trx_t* trx, /*!< in: transaction */
- dict_index_t* index); /*!< in: Index to drop */
+ dict_index_t* index) /*!< in: Index to drop */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Remove the table from the OPTIMIZER's list. We do wait for
@@ -740,24 +724,22 @@ fts_optimize_end(void);
/*===================*/
/**********************************************************************//**
-Take a FTS savepoint.
-@return DB_SUCCESS or error code */
+Take a FTS savepoint. */
UNIV_INTERN
void
fts_savepoint_take(
/*===============*/
trx_t* trx, /*!< in: transaction */
- const char* name); /*!< in: savepoint name */
-
+ const char* name) /*!< in: savepoint name */
+ __attribute__((nonnull));
/**********************************************************************//**
-Refresh last statement savepoint.
-@return DB_SUCCESS or error code */
+Refresh last statement savepoint. */
UNIV_INTERN
void
fts_savepoint_laststmt_refresh(
/*===========================*/
- trx_t* trx); /*!< in: transaction */
-
+ trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull));
/**********************************************************************//**
Release the savepoint data identified by name. */
UNIV_INTERN
@@ -821,26 +803,26 @@ fts_drop_orphaned_tables(void);
/*==========================*/
/******************************************************************//**
-Since we do a horizontal split on the index table, we need to drop the
-all the split tables. */
+Since we do a horizontal split on the index table, we need to drop
+all the split tables.
+@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_drop_index_split_tables(
/*========================*/
- /*!< out: DB_SUCCESS
- or error code */
trx_t* trx, /*!< in: transaction */
- dict_index_t* index); /*!< in: fts instance */
+ dict_index_t* index) /*!< in: fts instance */
+ __attribute__((nonnull, warn_unused_result));
/****************************************************************//**
Run SYNC on the table, i.e., write out data from the cache to the
-FTS auxiliary INDEX table and clear the cache at the end.
-@return DB_SUCCESS if all OK */
+FTS auxiliary INDEX table and clear the cache at the end. */
UNIV_INTERN
-ulint
+void
fts_sync_table(
/*===========*/
- dict_table_t* table); /*!< in: table */
+ dict_table_t* table) /*!< in: table */
+ __attribute__((nonnull));
/****************************************************************//**
Free the query graph but check whether dict_sys->mutex is already
@@ -978,9 +960,9 @@ fts_get_docs_create(
/****************************************************************//**
Read the rows from the FTS index
-@return vector of rows fetched */
+@return DB_SUCCESS if OK */
UNIV_INTERN
-ulint
+dberr_t
fts_table_fetch_doc_ids(
/*====================*/
trx_t* trx, /*!< in: transaction */
@@ -1011,12 +993,13 @@ fts_add_index(
Drop auxiliary tables related to an FTS index
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
fts_drop_index(
/*===========*/
dict_table_t* table, /*!< in: Table where indexes are dropped */
dict_index_t* index, /*!< in: Index to be dropped */
- trx_t* trx); /*!< in: Transaction for the drop */
+ trx_t* trx) /*!< in: Transaction for the drop */
+ __attribute__((nonnull));
/*******************************************************************//**
Check indexes in the fts->indexes is also present in index cache and
diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h
index 8524f988e47..c6aca27f6ec 100644
--- a/storage/innobase/include/fts0priv.h
+++ b/storage/innobase/include/fts0priv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -113,23 +113,25 @@ component.
/******************************************************************//**
Parse an SQL string. %s is replaced with the table's id.
-@return DB_SUCCESS or error code */
+@return query graph */
UNIV_INTERN
que_t*
fts_parse_sql(
/*==========*/
fts_table_t* fts_table, /*!< in: FTS aux table */
pars_info_t* info, /*!< in: info struct, or NULL */
- const char* sql); /*!< in: SQL string to evaluate */
+ const char* sql) /*!< in: SQL string to evaluate */
+ __attribute__((nonnull(3), malloc, warn_unused_result));
/******************************************************************//**
Evaluate a parsed SQL statement
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_eval_sql(
/*=========*/
trx_t* trx, /*!< in: transaction */
- que_t* graph); /*!< in: Parsed statement */
+ que_t* graph) /*!< in: Parsed statement */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Construct the name of an ancillary FTS table for the given table.
@return own: table name, must be freed with mem_free() */
@@ -138,7 +140,8 @@ char*
fts_get_table_name(
/*===============*/
const fts_table_t*
- fts_table); /*!< in: FTS aux table info */
+ fts_table) /*!< in: FTS aux table info */
+ __attribute__((nonnull, malloc, warn_unused_result));
/******************************************************************//**
Construct the column specification part of the SQL string for selecting the
indexed FTS columns for the given table. Adds the necessary bound
@@ -160,7 +163,8 @@ fts_get_select_columns_str(
/*=======================*/
dict_index_t* index, /*!< in: FTS index */
pars_info_t* info, /*!< in/out: parser info */
- mem_heap_t* heap); /*!< in: memory heap */
+ mem_heap_t* heap) /*!< in: memory heap */
+ __attribute__((nonnull, warn_unused_result));
/** define for fts_doc_fetch_by_doc_id() "option" value, defines whether
we want to get Doc whose ID is equal to or greater or smaller than supplied
@@ -174,41 +178,45 @@ Fetch document (= a single row's indexed text) with the given
document id.
@return: DB_SUCCESS if fetch is successful, else error */
UNIV_INTERN
-ulint
+dberr_t
fts_doc_fetch_by_doc_id(
/*====================*/
fts_get_doc_t* get_doc, /*!< in: state */
doc_id_t doc_id, /*!< in: id of document to fetch */
- dict_index_t* index_to_use, /*!< in: caller supplied FTS index */
+ dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
+ or NULL */
ulint option, /*!< in: search option, if it is
greater than doc_id or equal */
fts_sql_callback
callback, /*!< in: callback to read
records */
- void* arg); /*!< in: callback arg */
+ void* arg) /*!< in: callback arg */
+ __attribute__((nonnull(6)));
/*******************************************************************//**
Callback function for fetch that stores the text of an FTS document,
converting each column to UTF-16.
-@return: always returns NULL */
+@return always FALSE */
UNIV_INTERN
ibool
fts_query_expansion_fetch_doc(
/*==========================*/
void* row, /*!< in: sel_node_t* */
- void* user_arg); /*!< in: fts_doc_t* */
+ void* user_arg) /*!< in: fts_doc_t* */
+ __attribute__((nonnull));
/********************************************************************
Write out a single word's data as new entry/entries in the INDEX table.
@return DB_SUCCESS if all OK. */
UNIV_INTERN
-ulint
+dberr_t
fts_write_node(
/*===========*/
trx_t* trx, /*!< in: transaction */
que_t** graph, /*!< in: query graph */
fts_table_t* fts_table, /*!< in: the FTS aux index */
fts_string_t* word, /*!< in: word in UTF-8 */
- fts_node_t* node); /*!< in: node columns */
+ fts_node_t* node) /*!< in: node columns */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Tokenize a document. */
UNIV_INTERN
@@ -217,8 +225,10 @@ fts_tokenize_document(
/*==================*/
fts_doc_t* doc, /*!< in/out: document to
tokenize */
- fts_doc_t* result); /*!< out: if provided, save
+ fts_doc_t* result) /*!< out: if provided, save
result tokens here */
+ __attribute__((nonnull(1)));
+
/*******************************************************************//**
Continue to tokenize a document. */
UNIV_INTERN
@@ -229,16 +239,18 @@ fts_tokenize_document_next(
tokenize */
ulint add_pos, /*!< in: add this position to all
tokens from this tokenization */
- fts_doc_t* result); /*!< out: if provided, save
+ fts_doc_t* result) /*!< out: if provided, save
result tokens here */
+ __attribute__((nonnull(1)));
/******************************************************************//**
-Create a new empty document.
-@return own: new document */
+Initialize a document. */
UNIV_INTERN
-fts_doc_t*
+void
fts_doc_init(
/*=========*/
- fts_doc_t* doc); /*!< in: doc to initialize */
+ fts_doc_t* doc) /*!< in: doc to initialize */
+ __attribute__((nonnull));
+
/******************************************************************//**
Do a binary search for a doc id in the array
@return +ve index if found -ve index where it should be
@@ -250,26 +262,29 @@ fts_bsearch(
fts_update_t* array, /*!< in: array to sort */
int lower, /*!< in: lower bound of array*/
int upper, /*!< in: upper bound of array*/
- doc_id_t doc_id); /*!< in: doc id to lookup */
+ doc_id_t doc_id) /*!< in: doc id to lookup */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Free document. */
UNIV_INTERN
void
fts_doc_free(
/*=========*/
- fts_doc_t* doc); /*!< in: document */
+ fts_doc_t* doc) /*!< in: document */
+ __attribute__((nonnull));
/******************************************************************//**
Free fts_optimizer_word_t instanace.*/
-
+UNIV_INTERN
void
fts_word_free(
/*==========*/
- fts_word_t* word); /*!< in: instance to free.*/
+ fts_word_t* word) /*!< in: instance to free.*/
+ __attribute__((nonnull));
/******************************************************************//**
Read the rows from the FTS inde
-@return vector of rows fetched */
+@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_index_fetch_nodes(
/*==================*/
trx_t* trx, /*!< in: transaction */
@@ -277,7 +292,8 @@ fts_index_fetch_nodes(
fts_table_t* fts_table, /*!< in: FTS aux table */
const fts_string_t*
word, /*!< in: the word to fetch */
- fts_fetch_t* fetch); /*!< in: fetch callback.*/
+ fts_fetch_t* fetch) /*!< in: fetch callback.*/
+ __attribute__((nonnull));
/******************************************************************//**
Create a fts_optimizer_word_t instance.
@return new instance */
@@ -287,7 +303,8 @@ fts_word_init(
/*==========*/
fts_word_t* word, /*!< in: word to initialize */
byte* utf8, /*!< in: UTF-8 string */
- ulint len); /*!< in: length of string in bytes */
+ ulint len) /*!< in: length of string in bytes */
+ __attribute__((nonnull));
/******************************************************************//**
Compare two fts_trx_table_t instances, we actually compare the
table id's here.
@@ -297,7 +314,8 @@ int
fts_trx_table_cmp(
/*==============*/
const void* v1, /*!< in: id1 */
- const void* v2); /*!< in: id2 */
+ const void* v2) /*!< in: id2 */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Compare a table id with a trx_table_t table id.
@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
@@ -306,23 +324,26 @@ int
fts_trx_table_id_cmp(
/*=================*/
const void* p1, /*!< in: id1 */
- const void* p2); /*!< in: id2 */
+ const void* p2) /*!< in: id2 */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Commit a transaction.
@return DB_SUCCESS if all OK */
UNIV_INTERN
-ulint
+dberr_t
fts_sql_commit(
/*===========*/
- trx_t* trx); /*!< in: transaction */
+ trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull));
/******************************************************************//**
Rollback a transaction.
@return DB_SUCCESS if all OK */
UNIV_INTERN
-ulint
+dberr_t
fts_sql_rollback(
/*=============*/
- trx_t* trx); /*!< in: transaction */
+ trx_t* trx) /*!< in: transaction */
+ __attribute__((nonnull));
/******************************************************************//**
Parse an SQL string. %s is replaced with the table's id. Don't acquire
the dict mutex
@@ -333,41 +354,44 @@ fts_parse_sql_no_dict_lock(
/*=======================*/
fts_table_t* fts_table, /*!< in: table with FTS index */
pars_info_t* info, /*!< in: parser info */
- const char* sql); /*!< in: SQL string to evaluate */
+ const char* sql) /*!< in: SQL string to evaluate */
+ __attribute__((nonnull(3), malloc, warn_unused_result));
/******************************************************************//**
Get value from config table. The caller must ensure that enough
space is allocated for value to hold the column contents
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_get_value(
/*=================*/
trx_t* trx, /* transaction */
fts_table_t* fts_table, /*!< in: the indexed FTS table */
const char* name, /*!< in: get config value for
this parameter name */
- fts_string_t* value); /*!< out: value read from
+ fts_string_t* value) /*!< out: value read from
config table */
+ __attribute__((nonnull));
/******************************************************************//**
Get value specific to an FTS index from the config table. The caller
must ensure that enough space is allocated for value to hold the
column contents.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_get_index_value(
/*=======================*/
trx_t* trx, /*!< transaction */
dict_index_t* index, /*!< in: index */
const char* param, /*!< in: get config value for
this parameter name */
- fts_string_t* value); /*!< out: value read from
+ fts_string_t* value) /*!< out: value read from
config table */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Set the value in the config table for name.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_set_value(
/*=================*/
trx_t* trx, /*!< transaction */
@@ -375,89 +399,96 @@ fts_config_set_value(
const char* name, /*!< in: get config value for
this parameter name */
const fts_string_t*
- value); /*!< in: value to update */
+ value) /*!< in: value to update */
+ __attribute__((nonnull));
/****************************************************************//**
Set an ulint value in the config table.
@return DB_SUCCESS if all OK else error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_set_ulint(
/*=================*/
trx_t* trx, /*!< in: transaction */
fts_table_t* fts_table, /*!< in: the indexed FTS table */
const char* name, /*!< in: param name */
- ulint int_value); /*!< in: value */
-
+ ulint int_value) /*!< in: value */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Set the value specific to an FTS index in the config table.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_set_index_value(
/*=======================*/
trx_t* trx, /*!< transaction */
dict_index_t* index, /*!< in: index */
const char* param, /*!< in: get config value for
this parameter name */
- fts_string_t* value); /*!< out: value read from
+ fts_string_t* value) /*!< out: value read from
config table */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Increment the value in the config table for column name.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_increment_value(
/*=======================*/
trx_t* trx, /*!< transaction */
fts_table_t* fts_table, /*!< in: the indexed FTS table */
const char* name, /*!< in: increment config value
for this parameter name */
- ulint delta); /*!< in: increment by this much */
+ ulint delta) /*!< in: increment by this much */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Increment the per index value in the config table for column name.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_increment_index_value(
/*=============================*/
trx_t* trx, /*!< transaction */
dict_index_t* index, /*!< in: FTS index */
const char* name, /*!< in: increment config value
for this parameter name */
- ulint delta); /*!< in: increment by this much */
+ ulint delta) /*!< in: increment by this much */
+ __attribute__((nonnull));
/******************************************************************//**
Get an ulint value from the config table.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_get_index_ulint(
/*=======================*/
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: FTS index */
const char* name, /*!< in: param name */
- ulint* int_value); /*!< out: value */
+ ulint* int_value) /*!< out: value */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Set an ulint value int the config table.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_set_index_ulint(
/*=======================*/
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: FTS index */
const char* name, /*!< in: param name */
- ulint int_value); /*!< in: value */
+ ulint int_value) /*!< in: value */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Get an ulint value from the config table.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_config_get_ulint(
/*=================*/
trx_t* trx, /*!< in: transaction */
fts_table_t* fts_table, /*!< in: the indexed FTS table */
const char* name, /*!< in: param name */
- ulint* int_value); /*!< out: value */
+ ulint* int_value) /*!< out: value */
+ __attribute__((nonnull));
/******************************************************************//**
Search cache for word.
@return the word node vector if found else NULL */
@@ -468,7 +499,8 @@ fts_cache_find_word(
const fts_index_cache_t*
index_cache, /*!< in: cache to search */
const fts_string_t*
- text); /*!< in: word to search for */
+ text) /*!< in: word to search for */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Check cache for deleted doc id.
@return TRUE if deleted */
@@ -478,7 +510,8 @@ fts_cache_is_deleted_doc_id(
/*========================*/
const fts_cache_t*
cache, /*!< in: cache ito search */
- doc_id_t doc_id); /*!< in: doc id to search for */
+ doc_id_t doc_id) /*!< in: doc id to search for */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Append deleted doc ids to vector and sort the vector. */
UNIV_INTERN
@@ -502,35 +535,31 @@ fts_wait_for_background_thread_to_start(
ulint max_wait); /*!< in: time in microseconds, if set
to 0 then it disables timeout
checking */
-/*********************************************************************//**
-Get the total number of documents in the FTS.
-@return estimated number of rows in the table */
-UNIV_INTERN
-ulint
-fts_get_total_document_count(
-/*=========================*/
- dict_table_t* table); /*!< in: table instance */
+#ifdef FTS_DOC_STATS_DEBUG
/******************************************************************//**
Get the total number of words in the FTS for a particular FTS index.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
fts_get_total_word_count(
/*=====================*/
trx_t* trx, /*!< in: transaction */
dict_index_t* index, /*!< in: for this index */
- ulint* total); /*!< out: total words */
+ ulint* total) /*!< out: total words */
+ __attribute__((nonnull, warn_unused_result));
+#endif
/******************************************************************//**
Search the index specific cache for a particular FTS index.
@return the index specific cache else NULL */
UNIV_INTERN
-const fts_index_cache_t*
+fts_index_cache_t*
fts_find_index_cache(
/*================*/
const fts_cache_t*
cache, /*!< in: cache to search */
const dict_index_t*
- index); /*!< in: index to search for */
+ index) /*!< in: index to search for */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Write the table id to the given buffer (including final NUL). Buffer must be
at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
@@ -539,8 +568,9 @@ UNIV_INLINE
int
fts_write_object_id(
/*================*/
- ib_id_t id, /*!< in: a table/index id */
- char* str); /*!< in: buffer to write the id to */
+ ib_id_t id, /*!< in: a table/index id */
+ char* str) /*!< in: buffer to write the id to */
+ __attribute__((nonnull));
/******************************************************************//**
Read the table id from the string generated by fts_write_object_id().
@return TRUE if parse successful */
@@ -549,7 +579,8 @@ ibool
fts_read_object_id(
/*===============*/
ib_id_t* id, /*!< out: a table id */
- const char* str); /*!< in: buffer to read from */
+ const char* str) /*!< in: buffer to read from */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Get the table id.
@return number of bytes written */
@@ -559,23 +590,26 @@ fts_get_table_id(
/*=============*/
const fts_table_t*
fts_table, /*!< in: FTS Auxiliary table */
- char* table_id); /*!< out: table id, must be at least
+ char* table_id) /*!< out: table id, must be at least
FTS_AUX_MIN_TABLE_ID_LENGTH bytes
long */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Add the table to add to the OPTIMIZER's list. */
UNIV_INTERN
void
fts_optimize_add_table(
/*===================*/
- dict_table_t* table); /*!< in: table to add */
+ dict_table_t* table) /*!< in: table to add */
+ __attribute__((nonnull));
/******************************************************************//**
Optimize a table. */
UNIV_INTERN
void
fts_optimize_do_table(
/*==================*/
- dict_table_t* table); /*!< in: table to optimize */
+ dict_table_t* table) /*!< in: table to optimize */
+ __attribute__((nonnull));
/******************************************************************//**
Construct the prefix name of an FTS table.
@return own: table name, must be freed with mem_free() */
@@ -584,7 +618,8 @@ char*
fts_get_table_name_prefix(
/*======================*/
const fts_table_t*
- fts_table); /*!< in: Auxiliary table type */
+ fts_table) /*!< in: Auxiliary table type */
+ __attribute__((nonnull, malloc, warn_unused_result));
/******************************************************************//**
Add node positions. */
UNIV_INTERN
@@ -594,7 +629,8 @@ fts_cache_node_add_positions(
fts_cache_t* cache, /*!< in: cache */
fts_node_t* node, /*!< in: word node */
doc_id_t doc_id, /*!< in: doc id */
- ib_vector_t* positions); /*!< in: fts_token_t::positions */
+ ib_vector_t* positions) /*!< in: fts_token_t::positions */
+ __attribute__((nonnull(2,4)));
/******************************************************************//**
Create the config table name for retrieving index specific value.
@@ -604,7 +640,8 @@ char*
fts_config_create_index_param_name(
/*===============================*/
const char* param, /*!< in: base name of param */
- const dict_index_t* index); /*!< in: index for config */
+ const dict_index_t* index) /*!< in: index for config */
+ __attribute__((nonnull, malloc, warn_unused_result));
#ifndef UNIV_NONINL
#include "fts0priv.ic"
diff --git a/storage/innobase/include/fts0priv.ic b/storage/innobase/include/fts0priv.ic
index 716ea4713b5..268bb7e2227 100644
--- a/storage/innobase/include/fts0priv.ic
+++ b/storage/innobase/include/fts0priv.ic
@@ -31,15 +31,9 @@ UNIV_INLINE
int
fts_write_object_id(
/*================*/
- ib_id_t id, /* in: a table/index id */
+ ib_id_t id, /* in: a table/index id */
char* str) /* in: buffer to write the id to */
{
-#ifdef __WIN__
-# define UINT64PFx "%016I64u"
-#else
-# define UINT64PFx "%016"PRIx64
-# endif /* __WIN__ */
-
// FIXME: Use ut_snprintf()
return(sprintf(str, UINT64PFx, id));
}
@@ -54,6 +48,45 @@ fts_read_object_id(
ib_id_t* id, /* out: an id */
const char* str) /* in: buffer to read from */
{
- return(sscanf(str, IB_ID_FMT, id) == 2);
+ return(sscanf(str, UINT64PFx, id) == 1);
+}
+
+/******************************************************************//**
+Compare two fts_trx_table_t instances.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_cmp(
+/*==============*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table;
+ const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+
+ return((table1->id > table2->id)
+ ? 1
+ : (table1->id == table2->id)
+ ? 0
+ : -1);
}
+/******************************************************************//**
+Compare a table id with a fts_trx_table_t table id.
+@return < 0 if n1 < n2, 0 if n1 == n2,> 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_id_cmp(
+/*=================*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const ullint* table_id = (const ullint*) p1;
+ const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+
+ return((*table_id > table2->id)
+ ? 1
+ : (*table_id == table2->id)
+ ? 0
+ : -1);
+}
diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h
index 5b28f2c9473..8fc52c9fc5e 100644
--- a/storage/innobase/include/fts0types.h
+++ b/storage/innobase/include/fts0types.h
@@ -32,41 +32,35 @@ Created 2007-03-27 Sunny Bains
#include "ut0rbt.h"
#include "fts0fts.h"
-/** Types (aliases) used within FTS. */
-typedef struct fts_que_struct fts_que_t;
-typedef struct fts_node_struct fts_node_t;
-typedef struct fts_word_struct fts_word_t;
-typedef struct fts_fetch_struct fts_fetch_t;
-typedef struct fts_update_struct fts_update_t;
-typedef struct fts_get_doc_struct fts_get_doc_t;
-typedef struct fts_utf8_str_struct fts_utf8_str_t;
-typedef struct fts_doc_stats_struct fts_doc_stats_t;
-typedef struct fts_tokenizer_word_struct fts_tokenizer_word_t;
-typedef struct fts_index_selector_struct fts_index_selector_t;
+/** Types used within FTS. */
+struct fts_que_t;
+struct fts_node_t;
+struct fts_utf8_str_t;
/** Callbacks used within FTS. */
typedef pars_user_func_cb_t fts_sql_callback;
typedef void (*fts_filter)(void*, fts_node_t*, void*, ulint len);
/** Statistics relevant to a particular document, used during retrieval. */
-struct fts_doc_stats_struct {
+struct fts_doc_stats_t {
doc_id_t doc_id; /*!< Document id */
ulint word_count; /*!< Total words in the document */
};
/** It's main purpose is to store the SQL prepared statements that
are required to retrieve a document from the database. */
-struct fts_get_doc_struct {
+struct fts_get_doc_t {
fts_index_cache_t*
index_cache; /*!< The index cache instance */
/*!< Parsed sql statement */
que_t* get_document_graph;
+ fts_cache_t* cache; /*!< The parent cache */
};
/** Since we can have multiple FTS indexes on a table, we keep a
per index cache of words etc. */
-struct fts_index_cache_struct {
+struct fts_index_cache_t {
dict_index_t* index; /*!< The FTS index instance */
ib_rbt_t* words; /*!< Nodes; indexed by fts_string_t*,
@@ -88,7 +82,7 @@ struct fts_index_cache_struct {
/** For supporting the tracking of updates on multiple FTS indexes we need
to track which FTS indexes need to be updated. For INSERT and DELETE we
update all fts indexes. */
-struct fts_update_struct {
+struct fts_update_t {
doc_id_t doc_id; /*!< The doc id affected */
ib_vector_t* fts_indexes; /*!< The FTS indexes that need to be
@@ -100,7 +94,7 @@ struct fts_update_struct {
};
/** Stop word control infotmation. */
-struct fts_stopword_struct {
+struct fts_stopword_t {
ulint status; /*!< Status of the stopword tree */
ib_alloc_t* heap; /*!< The memory allocator to use */
ib_rbt_t* cached_stopword;/*!< This stores all active stopwords */
@@ -109,7 +103,7 @@ struct fts_stopword_struct {
/** The SYNC state of the cache. There is one instance of this struct
associated with each ADD thread. */
-struct fts_sync_struct {
+struct fts_sync_t {
trx_t* trx; /*!< The transaction used for SYNCing
the cache to disk */
dict_table_t* table; /*!< Table with FTS index(es) */
@@ -131,12 +125,10 @@ struct fts_sync_struct {
ib_time_t start_time; /*!< SYNC start time */
};
-typedef struct fts_sync_struct fts_sync_t;
-
/** The cache for the FTS system. It is a memory-based inverted index
that new entries are added to, until it grows over the configured maximum
size, at which time its contents are written to the INDEX table. */
-struct fts_cache_struct {
+struct fts_cache_t {
rw_lock_t lock; /*!< lock protecting all access to the
memory buffer. FIXME: this needs to
be our new upgrade-capable rw-lock */
@@ -145,11 +137,11 @@ struct fts_cache_struct {
intialization, it has different
SYNC level as above cache lock */
- mutex_t optimize_lock; /*!< Lock for OPTIMIZE */
+ ib_mutex_t optimize_lock; /*!< Lock for OPTIMIZE */
- mutex_t deleted_lock; /*!< Lock covering deleted_doc_ids */
+ ib_mutex_t deleted_lock; /*!< Lock covering deleted_doc_ids */
- mutex_t doc_id_lock; /*!< Lock covering Doc ID */
+ ib_mutex_t doc_id_lock; /*!< Lock covering Doc ID */
ib_vector_t* deleted_doc_ids;/*!< Array of deleted doc ids, each
element is of type fts_update_t */
@@ -200,7 +192,7 @@ struct fts_cache_struct {
};
/** Columns of the FTS auxiliary INDEX table */
-struct fts_node_struct {
+struct fts_node_t {
doc_id_t first_doc_id; /*!< First document id in ilist. */
doc_id_t last_doc_id; /*!< Last document id in ilist. */
@@ -223,7 +215,7 @@ struct fts_node_struct {
};
/** A tokenizer word. Contains information about one word. */
-struct fts_tokenizer_word_struct {
+struct fts_tokenizer_word_t {
fts_string_t text; /*!< Token text. */
ib_vector_t* nodes; /*!< Word node ilists, each element is
@@ -231,7 +223,7 @@ struct fts_tokenizer_word_struct {
};
/** Word text plus it's array of nodes as on disk in FTS index */
-struct fts_word_struct {
+struct fts_word_t {
fts_string_t text; /*!< Word value in UTF-8 */
ib_vector_t* nodes; /*!< Nodes read from disk */
@@ -239,7 +231,7 @@ struct fts_word_struct {
};
/** Callback for reading and filtering nodes that are read from FTS index */
-struct fts_fetch_struct {
+struct fts_fetch_t {
void* read_arg; /*!< Arg for the sql_callback */
fts_sql_callback
@@ -248,7 +240,7 @@ struct fts_fetch_struct {
};
/** For horizontally splitting an FTS auxiliary index */
-struct fts_index_selector_struct {
+struct fts_index_selector_t {
ulint value; /*!< Character value at which
to split */
@@ -256,7 +248,7 @@ struct fts_index_selector_struct {
};
/** This type represents a single document. */
-struct fts_doc_struct {
+struct fts_doc_t {
fts_string_t text; /*!< document text */
ibool found; /*!< TRUE if the document was found
@@ -276,7 +268,7 @@ struct fts_doc_struct {
};
/** A token and its positions within a document. */
-struct fts_token_struct {
+struct fts_token_t {
fts_string_t text; /*!< token text */
ib_vector_t* positions; /*!< an array of the positions the
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic
index 2734a331a86..b96c3f9dac8 100644
--- a/storage/innobase/include/fts0types.ic
+++ b/storage/innobase/include/fts0types.ic
@@ -37,46 +37,6 @@ extern const ulint UTF8_ERROR;
#define fts_utf8_is_valid(b) (((b) & 0xC0) == 0x80)
/******************************************************************//**
-Compare two fts_trx_table_t instances.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_table_cmp(
-/*==============*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table;
- const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
-
- return((table1->id > table2->id)
- ? 1
- : (table1->id == table2->id)
- ? 0
- : -1);
-}
-
-/******************************************************************//**
-Compare a table id with a fts_trx_table_t table id.
-@return < 0 if n1 < n2, 0 if n1 == n2,> 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_table_id_cmp(
-/*=================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const ullint* table_id = (const ullint*) p1;
- const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
-
- return((*table_id > table2->id)
- ? 1
- : (*table_id == table2->id)
- ? 0
- : -1);
-}
-
-/******************************************************************//**
Duplicate an UTF-8 string.
@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
UNIV_INLINE
diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h
index 1a2b8dac014..2e4397ea5fc 100644
--- a/storage/innobase/include/ha0ha.h
+++ b/storage/innobase/include/ha0ha.h
@@ -221,10 +221,7 @@ ha_print_info(
#endif /* !UNIV_HOTBACKUP */
/** The hash table external chain node */
-typedef struct ha_node_struct ha_node_t;
-
-/** The hash table external chain node */
-struct ha_node_struct {
+struct ha_node_t {
ha_node_t* next; /*!< next chain node or NULL if none */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
buf_block_t* block; /*!< buffer block containing the data, or NULL */
diff --git a/storage/innobase/include/ha0storage.h b/storage/innobase/include/ha0storage.h
index caf42abfcfe..0073930b502 100644
--- a/storage/innobase/include/ha0storage.h
+++ b/storage/innobase/include/ha0storage.h
@@ -39,7 +39,7 @@ constant per ha_storage's lifetime. */
#define HA_STORAGE_DEFAULT_HASH_CELLS 4096
/** Hash storage */
-typedef struct ha_storage_struct ha_storage_t;
+struct ha_storage_t;
/*******************************************************************//**
Creates a hash storage. If any of the parameters is 0, then a default
diff --git a/storage/innobase/include/ha0storage.ic b/storage/innobase/include/ha0storage.ic
index ce6e7406b43..7150ca045ec 100644
--- a/storage/innobase/include/ha0storage.ic
+++ b/storage/innobase/include/ha0storage.ic
@@ -31,7 +31,7 @@ Created September 24, 2007 Vasil Dimov
#include "mem0mem.h"
/** Hash storage for strings */
-struct ha_storage_struct {
+struct ha_storage_t {
mem_heap_t* heap; /*!< memory heap from which memory is
allocated */
hash_table_t* hash; /*!< hash table used to avoid
@@ -39,9 +39,7 @@ struct ha_storage_struct {
};
/** Objects of this type are stored in ha_storage_t */
-typedef struct ha_storage_node_struct ha_storage_node_t;
-/** Objects of this type are stored in ha_storage_struct */
-struct ha_storage_node_struct {
+struct ha_storage_node_t {
ulint data_len;/*!< length of the data */
const void* data; /*!< pointer to data */
ha_storage_node_t* next; /*!< next node in hash chain */
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index 5512bf7c62f..fb4b0120bbb 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2006, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,15 +28,19 @@ Created 5/11/2006 Osku Salerma
#define HA_INNODB_PROTOTYPES_H
#include "my_dbug.h"
+#include "mysqld_error.h"
#include "my_compare.h"
#include "my_sys.h"
#include "m_string.h"
+#include "debug_sync.h"
+#include "my_base.h"
#include "trx0types.h"
#include "m_ctype.h" /* CHARSET_INFO */
-// Forward declaration
-typedef struct fts_string_struct fts_string_t;
+// Forward declarations
+class Field;
+struct fts_string_t;
/*********************************************************************//**
Wrapper around MySQL's copy_and_convert function.
@@ -105,7 +109,7 @@ innobase_convert_name(
ulint buflen, /*!< in: length of buf, in bytes */
const char* id, /*!< in: identifier to convert */
ulint idlen, /*!< in: length of id, in bytes */
- void* thd, /*!< in: MySQL connection thread, or NULL */
+ THD* thd, /*!< in: MySQL connection thread, or NULL */
ibool table_id);/*!< in: TRUE=id is a table or database name;
FALSE=id is an index name */
@@ -120,7 +124,19 @@ UNIV_INTERN
ibool
thd_is_replication_slave_thread(
/*============================*/
- void* thd); /*!< in: thread handle (THD*) */
+ THD* thd); /*!< in: thread handle */
+
+/******************************************************************//**
+Gets information on the durability property requested by thread.
+Used when writing either a prepare or commit record to the log
+buffer.
+@return the durability property. */
+UNIV_INTERN
+enum durability_properties
+thd_requested_durability(
+/*=====================*/
+ const THD* thd) /*!< in: thread handle */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Returns true if the transaction this thread is processing has edited
@@ -132,7 +148,7 @@ UNIV_INTERN
ibool
thd_has_edited_nontrans_tables(
/*===========================*/
- void* thd); /*!< in: thread handle (THD*) */
+ THD* thd); /*!< in: thread handle */
/*************************************************************//**
Prints info of a THD object (== user session thread) to the given file. */
@@ -141,21 +157,10 @@ void
innobase_mysql_print_thd(
/*=====================*/
FILE* f, /*!< in: output stream */
- void* thd, /*!< in: pointer to a MySQL THD object */
+ THD* thd, /*!< in: pointer to a MySQL THD object */
uint max_query_len); /*!< in: max query length to print, or 0 to
use the default max length */
-/*****************************************************************//**
-Log code calls this whenever log has been written and/or flushed up
-to a new position. We use this to notify upper layer of a new commit
-checkpoint when necessary.*/
-UNIV_INTERN
-void
-innobase_mysql_log_notify(
-/*===============*/
- ib_uint64_t write_lsn, /*!< in: LSN written to log file */
- ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */
-
/*************************************************************//**
InnoDB uses this function to compare two data fields for which the data type
is such that we must use MySQL code to compare them.
@@ -173,6 +178,18 @@ innobase_mysql_cmp(
unsigned int b_length) /*!< in: data field length,
not UNIV_SQL_NULL */
__attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Log code calls this whenever log has been written and/or flushed up
+to a new position. We use this to notify upper layer of a new commit
+checkpoint when necessary.*/
+extern "C" UNIV_INTERN
+void
+innobase_mysql_log_notify(
+/*===============*/
+ ib_uint64_t write_lsn, /*!< in: LSN written to log file */
+ ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */
+
/**************************************************************//**
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
@@ -233,11 +250,11 @@ innobase_basename(
/******************************************************************//**
Returns true if the thread is executing a SELECT statement.
@return true if thd is executing SELECT */
-
+UNIV_INTERN
ibool
thd_is_select(
/*==========*/
- const void* thd); /*!< in: thread handle (THD*) */
+ const THD* thd); /*!< in: thread handle */
/******************************************************************//**
Converts an identifier to a table name. */
@@ -276,7 +293,7 @@ UNIV_INTERN
struct charset_info_st*
innobase_get_charset(
/*=================*/
- void* mysql_thd); /*!< in: MySQL thread handle */
+ THD* thd); /*!< in: MySQL thread handle */
/**********************************************************************//**
Determines the current SQL statement.
@return SQL statement string */
@@ -284,7 +301,7 @@ UNIV_INTERN
const char*
innobase_get_stmt(
/*==============*/
- void* mysql_thd, /*!< in: MySQL thread handle */
+ THD* thd, /*!< in: MySQL thread handle */
size_t* length) /*!< out: length of the SQL statement */
__attribute__((nonnull));
/******************************************************************//**
@@ -321,17 +338,17 @@ UNIV_INTERN
ibool
thd_supports_xa(
/*============*/
- void* thd); /*!< in: thread handle (THD*), or NULL to query
+ THD* thd); /*!< in: thread handle, or NULL to query
the global innodb_supports_xa */
/******************************************************************//**
Returns the lock wait timeout for the current connection.
@return the lock wait timeout, in seconds */
-
+UNIV_INTERN
ulong
thd_lock_wait_timeout(
/*==================*/
- void* thd); /*!< in: thread handle (THD*), or NULL to query
+ THD* thd); /*!< in: thread handle, or NULL to query
the global innodb_lock_wait_timeout */
/******************************************************************//**
Add up the time waited for the lock for the current query. */
@@ -339,7 +356,7 @@ UNIV_INTERN
void
thd_set_lock_wait_time(
/*===================*/
- void* thd, /*!< in: thread handle (THD*) */
+ THD* thd, /*!< in/out: thread handle */
ulint value); /*!< in: time waited for the lock */
/**********************************************************************//**
@@ -363,6 +380,15 @@ ulint
innobase_get_lower_case_table_names(void);
/*=====================================*/
+/*****************************************************************//**
+Frees a possible InnoDB trx object associated with the current THD.
+@return 0 or error number */
+UNIV_INTERN
+int
+innobase_close_thd(
+/*===============*/
+ THD* thd); /*!< in: MySQL thread handle for
+ which to close the connection */
/*************************************************************//**
Get the next token from the given string and store it in *token. */
UNIV_INTERN
@@ -414,7 +440,7 @@ UNIV_INTERN
ibool
thd_trx_is_read_only(
/*=================*/
- void* thd); /*!< in: thread handle (THD*) */
+ THD* thd); /*!< in/out: thread handle */
/******************************************************************//**
Check if the transaction is an auto-commit transaction. TRUE also
@@ -424,5 +450,139 @@ UNIV_INTERN
ibool
thd_trx_is_auto_commit(
/*===================*/
- void* thd); /*!< in: thread handle (THD*) can be NULL */
+ THD* thd); /*!< in: thread handle, or NULL */
+
+/*****************************************************************//**
+A wrapper function of innobase_convert_name(), convert a table or
+index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
+@return pointer to the end of buf */
+UNIV_INTERN
+void
+innobase_format_name(
+/*==================*/
+ char* buf, /*!< out: buffer for converted
+ identifier */
+ ulint buflen, /*!< in: length of buf, in bytes */
+ const char* name, /*!< in: index or table name
+ to format */
+ ibool is_index_name) /*!< in: index name */
+ __attribute__((nonnull));
+
+/** Corresponds to Sql_condition:enum_warning_level. */
+enum ib_log_level_t {
+ IB_LOG_LEVEL_INFO,
+ IB_LOG_LEVEL_WARN,
+ IB_LOG_LEVEL_ERROR,
+ IB_LOG_LEVEL_FATAL
+};
+
+/******************************************************************//**
+Use this when the args are first converted to a formatted string and then
+passed to the format string from errmsg-utf8.txt. The error message format
+must be: "Some string ... %s".
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+ THD *thd, Sql_condition::enum_warning_level level,
+ uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_errf(
+/*====*/
+ THD* thd, /*!< in/out: session */
+ ib_log_level_t level, /*!< in: warning level */
+ ib_uint32_t code, /*!< MySQL error code */
+ const char* format, /*!< printf format */
+ ...) /*!< Args */
+ __attribute__((format(printf, 4, 5)));
+
+/******************************************************************//**
+Use this when the args are passed to the format string from
+errmsg-utf8.txt directly as is.
+
+Push a warning message to the client, it is a wrapper around:
+
+void push_warning_printf(
+ THD *thd, Sql_condition::enum_warning_level level,
+ uint code, const char *format, ...);
+*/
+UNIV_INTERN
+void
+ib_senderrf(
+/*========*/
+ THD* thd, /*!< in/out: session */
+ ib_log_level_t level, /*!< in: warning level */
+ ib_uint32_t code, /*!< MySQL error code */
+ ...); /*!< Args */
+
+/******************************************************************//**
+Write a message to the MySQL log, prefixed with "InnoDB: ".
+Wrapper around sql_print_information() */
+UNIV_INTERN
+void
+ib_logf(
+/*====*/
+ ib_log_level_t level, /*!< in: warning level */
+ const char* format, /*!< printf format */
+ ...) /*!< Args */
+ __attribute__((format(printf, 2, 3)));
+
+/******************************************************************//**
+Returns the NUL terminated value of glob_hostname.
+@return pointer to glob_hostname. */
+UNIV_INTERN
+const char*
+server_get_hostname();
+/*=================*/
+
+/******************************************************************//**
+Get the error message format string.
+@return the format string or 0 if not found. */
+UNIV_INTERN
+const char*
+innobase_get_err_msg(
+/*=================*/
+ int error_code); /*!< in: MySQL error code */
+
+/*********************************************************************//**
+Compute the next autoinc value.
+
+For MySQL replication the autoincrement values can be partitioned among
+the nodes. The offset is the start or origin of the autoincrement value
+for a particular node. For n nodes the increment will be n and the offset
+will be in the interval [1, n]. The formula tries to allocate the next
+value for a particular node.
+
+Note: This function is also called with increment set to the number of
+values we want to reserve for multi-value inserts e.g.,
+
+ INSERT INTO T VALUES(), (), ();
+
+innobase_next_autoinc() will be called with increment set to 3 where
+autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
+the multi-value INSERT above.
+@return the next value */
+UNIV_INTERN
+ulonglong
+innobase_next_autoinc(
+/*==================*/
+ ulonglong current, /*!< in: Current value */
+ ulonglong need, /*!< in: count of values needed */
+ ulonglong step, /*!< in: AUTOINC increment step */
+ ulonglong offset, /*!< in: AUTOINC offset */
+ ulonglong max_value) /*!< in: max value for type */
+ __attribute__((pure, warn_unused_result));
+
+/********************************************************************//**
+Get the upper limit of the MySQL integral and floating-point type.
+@return maximum allowed value for the field */
+UNIV_INTERN
+ulonglong
+innobase_get_int_col_max_value(
+/*===========================*/
+ const Field* field) /*!< in: MySQL field */
+ __attribute__((nonnull, pure, warn_unused_result));
+
#endif /* HA_INNODB_PROTOTYPES_H */
diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h
index c5d439ef21b..52aaf2d25ef 100644
--- a/storage/innobase/include/handler0alter.h
+++ b/storage/innobase/include/handler0alter.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,11 +27,34 @@ UNIV_INTERN
void
innobase_rec_to_mysql(
/*==================*/
- struct TABLE* table, /*!< in/out: MySQL table */
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: index */
- const ulint* offsets); /*!< in: rec_get_offsets(
- rec, index, ...) */
+ struct TABLE* table, /*!< in/out: MySQL table */
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: index */
+ const ulint* offsets)/*!< in: rec_get_offsets(
+ rec, index, ...) */
+ __attribute__((nonnull));
+
+/*************************************************************//**
+Copies an InnoDB index entry to table->record[0]. */
+UNIV_INTERN
+void
+innobase_fields_to_mysql(
+/*=====================*/
+ struct TABLE* table, /*!< in/out: MySQL table */
+ const dict_index_t* index, /*!< in: InnoDB index */
+ const dfield_t* fields) /*!< in: InnoDB index fields */
+ __attribute__((nonnull));
+
+/*************************************************************//**
+Copies an InnoDB row to table->record[0]. */
+UNIV_INTERN
+void
+innobase_row_to_mysql(
+/*==================*/
+ struct TABLE* table, /*!< in/out: MySQL table */
+ const dict_table_t* itab, /*!< in: InnoDB table */
+ const dtuple_t* row) /*!< in: InnoDB row */
+ __attribute__((nonnull));
/*************************************************************//**
Resets table->record[0]. */
@@ -39,4 +62,53 @@ UNIV_INTERN
void
innobase_rec_reset(
/*===============*/
- struct TABLE* table); /*!< in/out: MySQL table */
+ struct TABLE* table) /*!< in/out: MySQL table */
+ __attribute__((nonnull));
+
+/** Generate the next autoinc based on a snapshot of the session
+auto_increment_increment and auto_increment_offset variables. */
+struct ib_sequence_t {
+
+ /**
+ @param thd - the session
+ @param start_value - the lower bound
+ @param max_value - the upper bound (inclusive) */
+ ib_sequence_t(THD* thd, ulonglong start_value, ulonglong max_value);
+
+ /**
+ Postfix increment
+ @return the value to insert */
+ ulonglong operator++(int) UNIV_NOTHROW;
+
+ /** Check if the autoinc "sequence" is exhausted.
+ @return true if the sequence is exhausted */
+ bool eof() const UNIV_NOTHROW
+ {
+ return(m_eof);
+ }
+
+ /**
+ @return the next value in the sequence */
+ ulonglong last() const UNIV_NOTHROW
+ {
+ ut_ad(m_next_value > 0);
+
+ return(m_next_value);
+ }
+
+ /** Maximum calumn value if adding an AUTOINC column else 0. Once
+ we reach the end of the sequence it will be set to ~0. */
+ const ulonglong m_max_value;
+
+ /** Value of auto_increment_increment */
+ ulong m_increment;
+
+ /** Value of auto_increment_offset */
+ ulong m_offset;
+
+ /** Next value in the sequence */
+ ulonglong m_next_value;
+
+ /** true if no more values left in the sequence */
+ bool m_eof;
+};
diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h
index 1c19ea53a23..6f9a628df5d 100644
--- a/storage/innobase/include/hash0hash.h
+++ b/storage/innobase/include/hash0hash.h
@@ -33,8 +33,8 @@ Created 5/20/1997 Heikki Tuuri
# include "sync0rw.h"
#endif /* !UNIV_HOTBACKUP */
-typedef struct hash_table_struct hash_table_t;
-typedef struct hash_cell_struct hash_cell_t;
+struct hash_table_t;
+struct hash_cell_t;
typedef void* hash_node_t;
@@ -382,7 +382,7 @@ hash_get_heap(
Gets the nth mutex in a hash table.
@return mutex */
UNIV_INLINE
-mutex_t*
+ib_mutex_t*
hash_get_nth_mutex(
/*===============*/
hash_table_t* table, /*!< in: hash table */
@@ -400,7 +400,7 @@ hash_get_nth_lock(
Gets the mutex for a fold value in a hash table.
@return mutex */
UNIV_INLINE
-mutex_t*
+ib_mutex_t*
hash_get_mutex(
/*===========*/
hash_table_t* table, /*!< in: hash table */
@@ -451,7 +451,7 @@ void
hash_mutex_exit_all_but(
/*====================*/
hash_table_t* table, /*!< in: hash table */
- mutex_t* keep_mutex); /*!< in: mutex to keep */
+ ib_mutex_t* keep_mutex); /*!< in: mutex to keep */
/************************************************************//**
s-lock a lock for a fold value in a hash table. */
UNIV_INTERN
@@ -524,12 +524,12 @@ hash_unlock_x_all_but(
# define hash_unlock_x_all_but(t, l) ((void) 0)
#endif /* !UNIV_HOTBACKUP */
-struct hash_cell_struct{
+struct hash_cell_t{
void* node; /*!< hash chain node, NULL if none */
};
/* The hash table structure */
-struct hash_table_struct {
+struct hash_table_t {
enum hash_table_sync_t type; /*<! type of hash_table. */
#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
# ifndef UNIV_HOTBACKUP
@@ -547,7 +547,7 @@ struct hash_table_struct {
rw_locks depending on the type.
Must be a power of 2 */
union {
- mutex_t* mutexes;/* NULL, or an array of mutexes
+ ib_mutex_t* mutexes;/* NULL, or an array of mutexes
used to protect segments of the
hash table */
rw_lock_t* rw_locks;/* NULL, or an array of rw_lcoks
diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic
index 1e5474601d5..254f3f82e5d 100644
--- a/storage/innobase/include/hash0hash.ic
+++ b/storage/innobase/include/hash0hash.ic
@@ -150,7 +150,7 @@ hash_get_heap(
Gets the nth mutex in a hash table.
@return mutex */
UNIV_INLINE
-mutex_t*
+ib_mutex_t*
hash_get_nth_mutex(
/*===============*/
hash_table_t* table, /*!< in: hash table */
@@ -168,7 +168,7 @@ hash_get_nth_mutex(
Gets the mutex for a fold value in a hash table.
@return mutex */
UNIV_INLINE
-mutex_t*
+ib_mutex_t*
hash_get_mutex(
/*===========*/
hash_table_t* table, /*!< in: hash table */
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index f405ebf8d11..e64f067d364 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -376,24 +376,16 @@ will be merged from ibuf trees to the pages read, 0 if ibuf is
empty */
UNIV_INTERN
ulint
-ibuf_contract(
-/*==========*/
- ibool sync); /*!< in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
-/*********************************************************************//**
-Contracts insert buffer trees by reading pages to the buffer pool.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-UNIV_INTERN
-ulint
ibuf_contract_in_background(
/*========================*/
- ibool full); /*!< in: TRUE if the caller wants to do a full
- contract based on PCT_IO(100). If FALSE then
- the size of contract batch is determined based
- on the current size of the ibuf tree. */
+ table_id_t table_id, /*!< in: if merge should be done only
+ for a specific table, for all tables
+ this should be 0 */
+ ibool full); /*!< in: TRUE if the caller wants to
+ do a full contract based on PCT_IO(100).
+ If FALSE then the size of contract
+ batch is determined based on the
+ current size of the ibuf tree. */
#endif /* !UNIV_HOTBACKUP */
/*********************************************************************//**
Parses a redo log record of an ibuf bitmap page init.
@@ -449,6 +441,17 @@ void
ibuf_close(void);
/*============*/
+/******************************************************************//**
+Checks the insert buffer bitmaps on IMPORT TABLESPACE.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+ibuf_check_bitmap_on_import(
+/*========================*/
+ const trx_t* trx, /*!< in: transaction */
+ ulint space_id) /*!< in: tablespace identifier */
+ __attribute__((nonnull, warn_unused_result));
+
#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
@@ -462,36 +465,6 @@ for the file segment from which the pages for the ibuf tree are allocated */
/* The insert buffer tree itself is always located in space 0. */
#define IBUF_SPACE_ID 0
-/** Insert buffer struct */
-struct ibuf_struct{
- ulint size; /*!< current size of the ibuf index
- tree, in pages */
- ulint max_size; /*!< recommended maximum size of the
- ibuf index tree, in pages */
- ulint seg_size; /*!< allocated pages of the file
- segment containing ibuf header and
- tree */
- ibool empty; /*!< Protected by the page
- latch of the root page of the
- insert buffer tree
- (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE
- if and only if the insert
- buffer tree is empty. */
- ulint free_list_len; /*!< length of the free list */
- ulint height; /*!< tree height */
- dict_index_t* index; /*!< insert buffer index */
-
- ulint n_merges; /*!< number of pages merged */
- ulint n_merged_ops[IBUF_OP_COUNT];
- /*!< number of operations of each type
- merged to index pages */
- ulint n_discarded_ops[IBUF_OP_COUNT];
- /*!< number of operations of each type
- discarded without merging due to the
- tablespace being deleted or the
- index being dropped */
-};
-
#ifndef UNIV_NONINL
#include "ibuf0ibuf.ic"
#endif
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index 8a4ec633b01..92ca2cbb9a2 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -58,6 +58,36 @@ ibuf_mtr_commit(
mtr_commit(mtr);
}
+/** Insert buffer struct */
+struct ibuf_t{
+ ulint size; /*!< current size of the ibuf index
+ tree, in pages */
+ ulint max_size; /*!< recommended maximum size of the
+ ibuf index tree, in pages */
+ ulint seg_size; /*!< allocated pages of the file
+ segment containing ibuf header and
+ tree */
+ ibool empty; /*!< Protected by the page
+ latch of the root page of the
+ insert buffer tree
+ (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE
+ if and only if the insert
+ buffer tree is empty. */
+ ulint free_list_len; /*!< length of the free list */
+ ulint height; /*!< tree height */
+ dict_index_t* index; /*!< insert buffer index */
+
+ ulint n_merges; /*!< number of pages merged */
+ ulint n_merged_ops[IBUF_OP_COUNT];
+ /*!< number of operations of each type
+ merged to index pages */
+ ulint n_discarded_ops[IBUF_OP_COUNT];
+ /*!< number of operations of each type
+ discarded without merging due to the
+ tablespace being deleted or the
+ index being dropped */
+};
+
/************************************************************************//**
Sets the free bit of the page in the ibuf bitmap. This is done in a separate
mini-transaction, hence this operation does not restrict further work to only
@@ -97,6 +127,7 @@ ibuf_should_try(
return(ibuf_use != IBUF_USE_NONE
&& ibuf->max_size != 0
&& !dict_index_is_clust(index)
+ && index->table->quiesce == QUIESCE_NONE
&& (ignore_sec_unique || !dict_index_is_unique(index)));
}
diff --git a/storage/innobase/include/ibuf0types.h b/storage/innobase/include/ibuf0types.h
index e404b62a011..3fdbf078b0b 100644
--- a/storage/innobase/include/ibuf0types.h
+++ b/storage/innobase/include/ibuf0types.h
@@ -26,6 +26,6 @@ Created 7/29/1997 Heikki Tuuri
#ifndef ibuf0types_h
#define ibuf0types_h
-typedef struct ibuf_struct ibuf_t;
+struct ibuf_t;
#endif
diff --git a/storage/innobase/include/lock0iter.h b/storage/innobase/include/lock0iter.h
index 42b4f7281e4..0054850b526 100644
--- a/storage/innobase/include/lock0iter.h
+++ b/storage/innobase/include/lock0iter.h
@@ -29,13 +29,13 @@ Created July 16, 2007 Vasil Dimov
#include "univ.i"
#include "lock0types.h"
-typedef struct lock_queue_iterator_struct {
+struct lock_queue_iterator_t {
const lock_t* current_lock;
/* In case this is a record lock queue (not table lock queue)
then bit_no is the record number within the heap in which the
record is stored. */
ulint bit_no;
-} lock_queue_iterator_t;
+};
/*******************************************************************//**
Initialize lock queue iterator so that it starts to iterate from
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index a1ffe87d5bd..8e6fdaed3d5 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -275,7 +275,7 @@ the query thread to the lock wait state and inserts a waiting request
for a gap x-lock to the lock queue.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_rec_insert_check_and_lock(
/*===========================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
@@ -285,10 +285,11 @@ lock_rec_insert_check_and_lock(
dict_index_t* index, /*!< in: index */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool* inherit);/*!< out: set to TRUE if the new
+ ibool* inherit)/*!< out: set to TRUE if the new
inserted record maybe should inherit
LOCK_GAP type locks from the successor
record */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Checks if locks of other transactions prevent an immediate modify (update,
delete mark, or delete unmark) of a clustered index record. If they do,
@@ -298,7 +299,7 @@ lock wait state and inserts a waiting request for a record x-lock to the
lock queue.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_clust_rec_modify_check_and_lock(
/*=================================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -308,13 +309,14 @@ lock_clust_rec_modify_check_and_lock(
modified */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((warn_unused_result, nonnull));
/*********************************************************************//**
Checks if locks of other transactions prevent an immediate modify
(delete mark or delete unmark) of a secondary index record.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_sec_rec_modify_check_and_lock(
/*===============================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -326,15 +328,17 @@ lock_sec_rec_modify_check_and_lock(
clustered index record first: see the
comment below */
dict_index_t* index, /*!< in: secondary index */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in/out: mini-transaction */
+ que_thr_t* thr, /*!< in: query thread
+ (can be NULL if BTR_NO_LOCKING_FLAG) */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((warn_unused_result, nonnull(2,3,4,6)));
/*********************************************************************//**
Like lock_clust_rec_read_check_and_lock(), but reads a
secondary index record.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-enum db_err
+dberr_t
lock_sec_rec_read_check_and_lock(
/*=============================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -364,7 +368,7 @@ lock on the record.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-enum db_err
+dberr_t
lock_clust_rec_read_check_and_lock(
/*===============================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -395,7 +399,7 @@ lock_clust_rec_read_check_and_lock() that does not require the parameter
"offsets".
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_clust_rec_read_check_and_lock_alt(
/*===================================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -413,13 +417,14 @@ lock_clust_rec_read_check_and_lock_alt(
SELECT FOR UPDATE */
ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
LOCK_REC_NOT_GAP */
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Checks that a record is seen in a consistent read.
-@return TRUE if sees, or FALSE if an earlier version of the record
+@return true if sees, or false if an earlier version of the record
should be retrieved */
UNIV_INTERN
-ibool
+bool
lock_clust_rec_cons_read_sees(
/*==========================*/
const rec_t* rec, /*!< in: user record which should be read or
@@ -431,26 +436,27 @@ lock_clust_rec_cons_read_sees(
Checks that a non-clustered index record is seen in a consistent read.
NOTE that a non-clustered index page contains so little information on
-its modifications that also in the case FALSE, the present version of
+its modifications that also in the case false, the present version of
rec may be the right, but we must check this from the clustered index
record.
-@return TRUE if certainly sees, or FALSE if an earlier version of the
+@return true if certainly sees, or false if an earlier version of the
clustered index record might be needed */
UNIV_INTERN
-ulint
+bool
lock_sec_rec_cons_read_sees(
/*========================*/
const rec_t* rec, /*!< in: user record which
should be read or passed over
by a read cursor */
- const read_view_t* view); /*!< in: consistent read view */
+ const read_view_t* view) /*!< in: consistent read view */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Locks the specified database table in the mode given. If the lock cannot
be granted immediately, the query thread is put to wait.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_table(
/*=======*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
@@ -458,7 +464,8 @@ lock_table(
dict_table_t* table, /*!< in/out: database table
in dictionary cache */
enum lock_mode mode, /*!< in: lock mode */
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
/*************************************************************//**
Removes a granted record lock of a transaction from the queue and grants
locks to other transactions waiting in the queue if they now are entitled
@@ -780,7 +787,7 @@ was selected as a deadlock victim, or if it has to wait then cancel
the wait lock.
@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
UNIV_INTERN
-enum db_err
+dberr_t
lock_trx_handle_wait(
/*=================*/
trx_t* trx) /*!< in/out: trx lock state */
@@ -864,29 +871,35 @@ lock_trx_has_sys_table_locks(
remains set when the waiting lock is granted,
or if the lock is inherited to a neighboring
record */
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
+#define LOCK_CONV_BY_OTHER 4096 /*!< this bit is set when the lock is created
+ by other transaction */
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_MODE_MASK
# error
#endif
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_TYPE_MASK
# error
#endif
/* @} */
+/** Checks if this is a waiting lock created by lock->trx itself.
+@param type_mode lock->type_mode
+@return whether it is a waiting lock belonging to lock->trx */
+#define lock_is_wait_not_by_other(type_mode) \
+ ((type_mode & (LOCK_CONV_BY_OTHER | LOCK_WAIT)) == LOCK_WAIT)
+
/** Lock operation struct */
-typedef struct lock_op_struct lock_op_t;
-/** Lock operation struct */
-struct lock_op_struct{
+struct lock_op_t{
dict_table_t* table; /*!< table to be locked */
enum lock_mode mode; /*!< lock mode */
};
/** The lock system struct */
-struct lock_sys_struct{
- mutex_t mutex; /*!< Mutex protecting the
+struct lock_sys_t{
+ ib_mutex_t mutex; /*!< Mutex protecting the
locks */
hash_table_t* rec_hash; /*!< hash table of the record
locks */
- mutex_t wait_mutex; /*!< Mutex protecting the
+ ib_mutex_t wait_mutex; /*!< Mutex protecting the
next two fields */
srv_slot_t* waiting_threads; /*!< Array of user threads
suspended while waiting for
@@ -901,6 +914,16 @@ struct lock_sys_struct{
recovered transactions is
complete. Protected by
lock_sys->mutex */
+
+ ulint n_lock_max_wait_time; /*!< Max wait time */
+
+ os_event_t timeout_event; /*!< Set to the event that is
+ created in the lock wait monitor
+ thread. A value of 0 means the
+ thread is not active */
+
+ bool timeout_thread_active; /*!< True if the timeout thread
+ is running */
};
/** The lock system */
@@ -935,14 +958,6 @@ extern lock_sys_t* lock_sys;
mutex_exit(&lock_sys->wait_mutex); \
} while (0)
-// FIXME: Move these to lock_sys_t
-extern ibool srv_lock_timeout_active;
-extern ulint srv_n_lock_wait_count;
-extern ulint srv_n_lock_wait_current_count;
-extern ib_int64_t srv_n_lock_wait_time;
-extern ulint srv_n_lock_max_wait_time;
-extern os_event_t srv_lock_timeout_thread_event;
-
#ifndef UNIV_NONINL
#include "lock0lock.ic"
#endif
diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h
index d516289e1f2..9f7ab9f76b6 100644
--- a/storage/innobase/include/lock0priv.h
+++ b/storage/innobase/include/lock0priv.h
@@ -40,9 +40,7 @@ those functions in lock/ */
#include "ut0lst.h"
/** A table lock */
-typedef struct lock_table_struct lock_table_t;
-/** A table lock */
-struct lock_table_struct {
+struct lock_table_t {
dict_table_t* table; /*!< database table in dictionary
cache */
UT_LIST_NODE_T(lock_t)
@@ -51,9 +49,7 @@ struct lock_table_struct {
};
/** Record lock for a page */
-typedef struct lock_rec_struct lock_rec_t;
-/** Record lock for a page */
-struct lock_rec_struct {
+struct lock_rec_t {
ulint space; /*!< space id */
ulint page_no; /*!< page number */
ulint n_bits; /*!< number of bits in the lock
@@ -63,7 +59,7 @@ struct lock_rec_struct {
};
/** Lock struct; protected by lock_sys->mutex */
-struct lock_struct {
+struct lock_t {
trx_t* trx; /*!< transaction owning the
lock */
UT_LIST_NODE_T(lock_t)
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
index 16e6b2e0113..cf32e72f864 100644
--- a/storage/innobase/include/lock0types.h
+++ b/storage/innobase/include/lock0types.h
@@ -27,8 +27,8 @@ Created 5/7/1996 Heikki Tuuri
#define lock0types_h
#define lock_t ib_lock_t
-typedef struct lock_struct lock_t;
-typedef struct lock_sys_struct lock_sys_t;
+struct lock_t;
+struct lock_sys_t;
/* Basic lock modes */
enum lock_mode {
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 5d72c7a96da..dd5e37012b7 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2009, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -48,9 +48,9 @@ typedef ib_uint64_t lsn_t;
#define LSN_PF UINT64PF
/** Redo log buffer */
-typedef struct log_struct log_t;
+struct log_t;
/** Redo log group */
-typedef struct log_group_struct log_group_t;
+struct log_group_t;
#ifdef UNIV_DEBUG
/** Flag: write to log file? */
@@ -67,7 +67,7 @@ extern ibool log_debug_writes;
#define LOG_WAIT_ONE_GROUP 92
#define LOG_WAIT_ALL_GROUPS 93
/* @} */
-/** Maximum number of log groups in log_group_struct::checkpoint_buf */
+/** Maximum number of log groups in log_group_t::checkpoint_buf */
#define LOG_MAX_N_GROUPS 32
/*******************************************************************//**
@@ -161,6 +161,14 @@ UNIV_INLINE
lsn_t
log_get_capacity(void);
/*==================*/
+/****************************************************************
+Get log_sys::max_modified_age_async. It is OK to read the value without
+holding log_sys::mutex because it is constant.
+@return max_modified_age_async */
+UNIV_INLINE
+lsn_t
+log_get_max_modified_age_async(void);
+/*================================*/
/******************************************************//**
Initializes the log. */
UNIV_INTERN
@@ -223,15 +231,6 @@ void
log_buffer_sync_in_background(
/*==========================*/
ibool flush); /*<! in: flush the logs to disk */
-/****************************************************************//**
-Checks if an asynchronous flushing of dirty pages is required in the
-background. This function is only called from the page cleaner thread.
-@return lsn to which the flushing should happen or LSN_MAX
-if flushing is not required */
-UNIV_INTERN
-lsn_t
-log_async_flush_lsn(void);
-/*=====================*/
/******************************************************//**
Makes a checkpoint. Note that this function does not flush dirty
blocks from the buffer pool: it only checks what is lsn of the oldest
@@ -550,13 +549,19 @@ UNIV_INTERN
void
log_refresh_stats(void);
/*===================*/
-/**********************************************************
+/********************************************************//**
+Closes all log groups. */
+UNIV_INTERN
+void
+log_group_close_all(void);
+/*=====================*/
+/********************************************************//**
Shutdown the log system but do not release all the memory. */
UNIV_INTERN
void
log_shutdown(void);
/*==============*/
-/**********************************************************
+/********************************************************//**
Free the log system data structures. */
UNIV_INTERN
void
@@ -712,7 +717,7 @@ extern log_t* log_sys;
/** Log group consists of a number of log files, each of the same size; a log
group is implemented as a space in the sense of the module fil0fil. */
-struct log_group_struct{
+struct log_group_t{
/* The following fields are protected by log_sys->mutex */
ulint id; /*!< log group id */
ulint n_files; /*!< number of files in the group */
@@ -764,7 +769,7 @@ struct log_group_struct{
};
/** Redo log buffer */
-struct log_struct{
+struct log_t{
byte pad[64]; /*!< padding to prevent other memory
update hotspots from residing on the
same memory cache line */
@@ -772,9 +777,9 @@ struct log_struct{
ulint buf_free; /*!< first free offset within the log
buffer */
#ifndef UNIV_HOTBACKUP
- mutex_t mutex; /*!< mutex protecting the log */
+ ib_mutex_t mutex; /*!< mutex protecting the log */
- mutex_t log_flush_order_mutex;/*!< mutex to serialize access to
+ ib_mutex_t log_flush_order_mutex;/*!< mutex to serialize access to
the flush list when we are putting
dirty blocks in the list. The idea
behind this mutex is to be able
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
index ad7b7e790a2..67792395ac9 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innobase/include/log0log.ic
@@ -446,6 +446,18 @@ log_get_capacity(void)
return(log_sys->log_group_capacity);
}
+/****************************************************************
+Get log_sys::max_modified_age_async. It is OK to read the value without
+holding log_sys::mutex because it is constant.
+@return max_modified_age_async */
+UNIV_INLINE
+lsn_t
+log_get_max_modified_age_async(void)
+/*================================*/
+{
+ return(log_sys->max_modified_age_async);
+}
+
/***********************************************************************//**
Checks if there is need for a log buffer flush or a new checkpoint, and does
this if yes. Any database operation should call this when it has modified
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index 218298a1698..dcdd4bdd8aa 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -128,7 +128,7 @@ recv_recovery_from_checkpoint_finish should be called later to complete
the recovery and free the resources used in it.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
recv_recovery_from_checkpoint_start_func(
/*=====================================*/
#ifdef UNIV_LOG_ARCHIVE
@@ -212,18 +212,18 @@ UNIV_INTERN
void
recv_reset_logs(
/*============*/
- lsn_t lsn, /*!< in: reset to this lsn
- rounded up to be divisible by
- OS_FILE_LOG_BLOCK_SIZE, after
- which we add
- LOG_BLOCK_HDR_SIZE */
#ifdef UNIV_LOG_ARCHIVE
ulint arch_log_no, /*!< in: next archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
- ibool new_logs_created);/*!< in: TRUE if resetting logs
+ ibool new_logs_created,/*!< in: TRUE if resetting logs
is done at the log creation;
FALSE if it is done after
archive recovery */
+#endif /* UNIV_LOG_ARCHIVE */
+ lsn_t lsn); /*!< in: reset to this lsn
+ rounded up to be divisible by
+ OS_FILE_LOG_BLOCK_SIZE, after
+ which we add
+ LOG_BLOCK_HDR_SIZE */
#ifdef UNIV_HOTBACKUP
/******************************************************//**
Creates new log files after a backup has been restored. */
@@ -318,9 +318,7 @@ recv_recovery_from_archive_finish(void);
#endif /* UNIV_LOG_ARCHIVE */
/** Block of log record data */
-typedef struct recv_data_struct recv_data_t;
-/** Block of log record data */
-struct recv_data_struct{
+struct recv_data_t{
recv_data_t* next; /*!< pointer to the next block or NULL */
/*!< the log record data is stored physically
immediately after this struct, max amount
@@ -328,9 +326,7 @@ struct recv_data_struct{
};
/** Stored log record struct */
-typedef struct recv_struct recv_t;
-/** Stored log record struct */
-struct recv_struct{
+struct recv_t{
byte type; /*!< log record type */
ulint len; /*!< log record body length in bytes */
recv_data_t* data; /*!< chain of blocks containing the log record
@@ -347,7 +343,7 @@ struct recv_struct{
rec_list;/*!< list of log records for this page */
};
-/** States of recv_addr_struct */
+/** States of recv_addr_t */
enum recv_addr_state {
/** not yet processed */
RECV_NOT_PROCESSED,
@@ -361,9 +357,7 @@ enum recv_addr_state {
};
/** Hashed page file address struct */
-typedef struct recv_addr_struct recv_addr_t;
-/** Hashed page file address struct */
-struct recv_addr_struct{
+struct recv_addr_t{
enum recv_addr_state state;
/*!< recovery state of the page */
unsigned space:32;/*!< space id */
@@ -374,13 +368,14 @@ struct recv_addr_struct{
};
/** Recovery system data structure */
-typedef struct recv_sys_struct recv_sys_t;
-/** Recovery system data structure */
-struct recv_sys_struct{
+struct recv_sys_t{
#ifndef UNIV_HOTBACKUP
- mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
+ ib_mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
n_addrs, and the state field in each recv_addr
struct */
+ ib_mutex_t writer_mutex;/*!< mutex coordinating
+ flushing between recv_writer_thread and
+ the recovery thread. */
#endif /* !UNIV_HOTBACKUP */
ibool apply_log_recs;
/*!< this is TRUE when log rec application to
diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h
index 3066070ef39..d0087f56aaa 100644
--- a/storage/innobase/include/mach0data.h
+++ b/storage/innobase/include/mach0data.h
@@ -374,6 +374,40 @@ mach_read_int_type(
const byte* src, /*!< in: where to read from */
ulint len, /*!< in: length of src */
ibool unsigned_type); /*!< in: signed or unsigned flag */
+/***********************************************************//**
+Convert integral type from host byte order to (big-endian) storage
+byte order. */
+UNIV_INLINE
+void
+mach_write_int_type(
+/*================*/
+ byte* dest, /*!< in: where to write*/
+ const byte* src, /*!< in: where to read from */
+ ulint len, /*!< in: length of src */
+ bool usign); /*!< in: signed or unsigned flag */
+
+/*************************************************************
+Convert a ulonglong integer from host byte order to (big-endian)
+storage byte order. */
+UNIV_INLINE
+void
+mach_write_ulonglong(
+/*=================*/
+ byte* dest, /*!< in: where to write */
+ ulonglong src, /*!< in: where to read from */
+ ulint len, /*!< in: length of dest */
+ bool usign); /*!< in: signed or unsigned flag */
+
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return value read */
+UNIV_INLINE
+ulint
+mach_read_ulint(
+/*============*/
+ const byte* ptr, /*!< in: pointer from where to read */
+ ulint type); /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
+
#endif /* !UNIV_HOTBACKUP */
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic
index ec1a28bca47..fffef87f09d 100644
--- a/storage/innobase/include/mach0data.ic
+++ b/storage/innobase/include/mach0data.ic
@@ -776,5 +776,104 @@ mach_read_int_type(
return(ret);
}
+/*********************************************************//**
+Swap byte ordering. */
+UNIV_INLINE
+void
+mach_swap_byte_order(
+/*=================*/
+ byte* dest, /*!< out: where to write */
+ const byte* from, /*!< in: where to read from */
+ ulint len) /*!< in: length of src */
+{
+ ut_ad(len > 0);
+ ut_ad(len <= 8);
+
+ dest += len;
+
+ switch (len & 0x7) {
+ case 0: *--dest = *from++;
+ case 7: *--dest = *from++;
+ case 6: *--dest = *from++;
+ case 5: *--dest = *from++;
+ case 4: *--dest = *from++;
+ case 3: *--dest = *from++;
+ case 2: *--dest = *from++;
+ case 1: *--dest = *from;
+ }
+}
+
+/*************************************************************
+Convert integral type from host byte order (big-endian) storage
+byte order. */
+UNIV_INLINE
+void
+mach_write_int_type(
+/*================*/
+ byte* dest, /*!< in: where to write */
+ const byte* src, /*!< in: where to read from */
+ ulint len, /*!< in: length of src */
+ bool usign) /*!< in: signed or unsigned flag */
+{
+#ifdef WORDS_BIGENDIAN
+ memcpy(dest, src, len);
+#else
+ mach_swap_byte_order(dest, src, len);
+#endif /* WORDS_BIGENDIAN */
+
+ if (!usign) {
+ *dest ^= 0x80;
+ }
+}
+
+/*************************************************************
+Convert a ulonglong integer from host byte order to (big-endian)
+storage byte order. */
+UNIV_INLINE
+void
+mach_write_ulonglong(
+/*=================*/
+ byte* dest, /*!< in: where to write */
+ ulonglong src, /*!< in: where to read from */
+ ulint len, /*!< in: length of dest */
+ bool usign) /*!< in: signed or unsigned flag */
+{
+ byte* ptr = reinterpret_cast<byte*>(&src);
+
+ ut_ad(len <= sizeof(ulonglong));
+
+#ifdef WORDS_BIGENDIAN
+ memcpy(dest, ptr + (sizeof(src) - len), len);
+#else
+ mach_swap_byte_order(dest, reinterpret_cast<byte*>(ptr), len);
+#endif /* WORDS_BIGENDIAN */
+
+ if (!usign) {
+ *dest ^= 0x80;
+ }
+}
+
+/********************************************************//**
+Reads 1 - 4 bytes from a file page buffered in the buffer pool.
+@return value read */
+UNIV_INLINE
+ulint
+mach_read_ulint(
+/*============*/
+ const byte* ptr, /*!< in: pointer from where to read */
+ ulint type) /*!< in: 1,2 or 4 bytes */
+{
+ switch (type) {
+ case 1:
+ return(mach_read_from_1(ptr));
+ case 2:
+ return(mach_read_from_2(ptr));
+ case 4:
+ return(mach_read_from_4(ptr));
+ default:
+ ut_error;
+ }
+}
+
#endif /* !UNIV_HOTBACKUP */
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/mem0dbg.h b/storage/innobase/include/mem0dbg.h
index 9f95e84c81e..cc339b82910 100644
--- a/storage/innobase/include/mem0dbg.h
+++ b/storage/innobase/include/mem0dbg.h
@@ -32,7 +32,7 @@ check fields whose sizes are given below */
/* The mutex which protects in the debug version the hash table
containing the list of live memory heaps, and also the global
variables in mem0dbg.cc. */
-extern mutex_t mem_hash_mutex;
+extern ib_mutex_t mem_hash_mutex;
# endif /* !UNIV_HOTBACKUP */
#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\
diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h
index 6851a5bc01b..c36ef06b554 100644
--- a/storage/innobase/include/mem0mem.h
+++ b/storage/innobase/include/mem0mem.h
@@ -38,15 +38,12 @@ Created 6/9/1994 Heikki Tuuri
/* -------------------- MEMORY HEAPS ----------------------------- */
-/* The info structure stored at the beginning of a heap block */
-typedef struct mem_block_info_struct mem_block_info_t;
-
/* A block of a memory heap consists of the info structure
followed by an area of memory */
-typedef mem_block_info_t mem_block_t;
+typedef struct mem_block_info_t mem_block_t;
/* A memory heap is a nonempty linear list of memory blocks */
-typedef mem_block_t mem_heap_t;
+typedef mem_block_t mem_heap_t;
/* Types of allocation for memory heaps: DYNAMIC means allocation from the
dynamic memory pool of the C compiler, BUFFER means allocation from the
@@ -343,9 +340,8 @@ mem_validate_all_blocks(void);
/*#######################################################################*/
-/* The info header of a block in a memory heap */
-
-struct mem_block_info_struct {
+/** The info structure stored at the beginning of a heap block */
+struct mem_block_info_t {
ulint magic_n;/* magic number for debugging */
char file_name[8];/* file name where the mem heap was created */
ulint line; /*!< line number where the mem heap was created */
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
index eee3806dd52..7f0e128cc40 100644
--- a/storage/innobase/include/mem0mem.ic
+++ b/storage/innobase/include/mem0mem.ic
@@ -247,16 +247,13 @@ mem_heap_free_heap_top(
{
mem_block_t* block;
mem_block_t* prev_block;
-#ifdef UNIV_MEM_DEBUG
+#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
ibool error;
ulint total_size;
ulint size;
-#endif
ut_ad(mem_heap_check(heap));
-#ifdef UNIV_MEM_DEBUG
-
/* Validate the heap and get its total allocated size */
mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
NULL, NULL);
@@ -294,9 +291,9 @@ mem_heap_free_heap_top(
/* Set the free field of block */
mem_block_set_free(block, old_top - (byte*) block);
-#ifdef UNIV_MEM_DEBUG
ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-
+ UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top);
+#if defined UNIV_MEM_DEBUG
/* In the debug version erase block from top up */
mem_erase_buf(old_top, (byte*) block + block->len - old_top);
@@ -304,8 +301,6 @@ mem_heap_free_heap_top(
mutex_enter(&mem_hash_mutex);
mem_current_allocated_memory -= (total_size - size);
mutex_exit(&mem_hash_mutex);
-#else /* UNIV_MEM_DEBUG */
- UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top);
#endif /* UNIV_MEM_DEBUG */
UNIV_MEM_ALLOC(old_top, (byte*) block + block->len - old_top);
diff --git a/storage/innobase/include/mem0pool.h b/storage/innobase/include/mem0pool.h
index 451055e857f..a65ba50fdf9 100644
--- a/storage/innobase/include/mem0pool.h
+++ b/storage/innobase/include/mem0pool.h
@@ -30,17 +30,14 @@ Created 6/9/1994 Heikki Tuuri
#include "os0file.h"
#include "ut0lst.h"
-/** Memory area header */
-typedef struct mem_area_struct mem_area_t;
/** Memory pool */
-typedef struct mem_pool_struct mem_pool_t;
+struct mem_pool_t;
/** The common memory pool */
extern mem_pool_t* mem_comm_pool;
/** Memory area header */
-
-struct mem_area_struct{
+struct mem_area_t{
ulint size_and_free; /*!< memory area size is obtained by
anding with ~MEM_AREA_FREE; area in
a free list if ANDing with
@@ -50,7 +47,7 @@ struct mem_area_struct{
};
/** Each memory area takes this many extra bytes for control information */
-#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_struct),\
+#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_t),\
UNIV_MEM_ALIGNMENT))
/********************************************************************//**
diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h
index 1427a981bef..18a345d050f 100644
--- a/storage/innobase/include/mtr0log.h
+++ b/storage/innobase/include/mtr0log.h
@@ -32,8 +32,8 @@ Created 12/7/1995 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
/********************************************************//**
-Writes 1 - 4 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
+Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log if mtr is not NULL. */
UNIV_INTERN
void
mlog_write_ulint(
@@ -43,8 +43,8 @@ mlog_write_ulint(
byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************//**
-Writes 8 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
+Writes 8 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log, only if mtr is not NULL */
UNIV_INTERN
void
mlog_write_ull(
@@ -217,12 +217,13 @@ UNIV_INTERN
byte*
mlog_open_and_write_index(
/*======================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* rec, /*!< in: index record or page */
- dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: log item type */
- ulint size); /*!< in: requested buffer size in bytes
- (if 0, calls mlog_close() and returns NULL) */
+ mtr_t* mtr, /*!< in: mtr */
+ const byte* rec, /*!< in: index record or page */
+ const dict_index_t* index, /*!< in: record descriptor */
+ byte type, /*!< in: log item type */
+ ulint size); /*!< in: requested buffer size in bytes
+ (if 0, calls mlog_close() and
+ returns NULL) */
#endif /* !UNIV_HOTBACKUP */
/********************************************************//**
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index fd84f1119cc..f8c1874412c 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,6 +40,7 @@ Created 11/26/1995 Heikki Tuuri
#define MTR_LOG_ALL 21 /* default mode: log all operations
modifying disk-based data */
#define MTR_LOG_NONE 22 /* log no operations */
+#define MTR_LOG_NO_REDO 23 /* Don't generate REDO */
/*#define MTR_LOG_SPACE 23 */ /* log only operations modifying
file space page allocation data
(operations in fsp0fsp.* ) */
@@ -180,7 +182,11 @@ For 1 - 8 bytes, the flag value must give the length also! @{ */
#define MLOG_ZIP_WRITE_HEADER ((byte)50) /*!< write to compressed page
header */
#define MLOG_ZIP_PAGE_COMPRESS ((byte)51) /*!< compress an index page */
-#define MLOG_BIGGEST_TYPE ((byte)51) /*!< biggest value (used in
+#define MLOG_ZIP_PAGE_COMPRESS_NO_DATA ((byte)52)/*!< compress an index page
+ without logging it's image */
+#define MLOG_ZIP_PAGE_REORGANIZE ((byte)53) /*!< reorganize a compressed
+ page */
+#define MLOG_BIGGEST_TYPE ((byte)53) /*!< biggest value (used in
assertions) */
/* @} */
@@ -358,15 +364,14 @@ mtr_memo_push(
void* object, /*!< in: object */
ulint type); /*!< in: object type: MTR_MEMO_S_LOCK, ... */
-/* Type definition of a mini-transaction memo stack slot. */
-typedef struct mtr_memo_slot_struct mtr_memo_slot_t;
-struct mtr_memo_slot_struct{
+/** Mini-transaction memo stack slot. */
+struct mtr_memo_slot_t{
ulint type; /*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */
void* object; /*!< pointer to the object */
};
/* Mini-transaction handle and buffer */
-struct mtr_struct{
+struct mtr_t{
#ifdef UNIV_DEBUG
ulint state; /*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
#endif
diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic
index dcd9826b380..bb24734c9bb 100644
--- a/storage/innobase/include/mtr0mtr.ic
+++ b/storage/innobase/include/mtr0mtr.ic
@@ -269,7 +269,7 @@ mtr_s_lock_func(
ut_ad(mtr);
ut_ad(lock);
- rw_lock_s_lock_func(lock, 0, file, line);
+ rw_lock_s_lock_inline(lock, 0, file, line);
mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
}
@@ -288,7 +288,7 @@ mtr_x_lock_func(
ut_ad(mtr);
ut_ad(lock);
- rw_lock_x_lock_func(lock, 0, file, line);
+ rw_lock_x_lock_inline(lock, 0, file, line);
mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
}
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
index 7a2bcefadb9..43368c0b726 100644
--- a/storage/innobase/include/mtr0types.h
+++ b/storage/innobase/include/mtr0types.h
@@ -26,6 +26,6 @@ Created 11/26/1995 Heikki Tuuri
#ifndef mtr0types_h
#define mtr0types_h
-typedef struct mtr_struct mtr_t;
+struct mtr_t;
#endif
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 8f84193cb0f..4a744c1b268 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -1,6 +1,6 @@
/***********************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted
@@ -44,7 +44,7 @@ Created 10/21/1995 Heikki Tuuri
#endif
/** File node of a tablespace or the log data space */
-typedef struct fil_node_struct fil_node_t;
+struct fil_node_t;
extern ibool os_has_said_disk_full;
/** Flag: enable debug printout for asynchronous i/o */
@@ -102,7 +102,7 @@ log. */
#define OS_FILE_LOG_BLOCK_SIZE 512
/** Options for os_file_create_func @{ */
-typedef enum os_file_create_enum {
+enum os_file_create_t {
OS_FILE_OPEN = 51, /*!< to open an existing file (if
doesn't exist, error) */
OS_FILE_CREATE, /*!< to create new file (if
@@ -122,7 +122,7 @@ typedef enum os_file_create_enum {
the log unless it is a fatal error,
this flag is only used if
ON_ERROR_NO_EXIT is set */
-} os_file_create_t;
+};
#define OS_FILE_READ_ONLY 333
#define OS_FILE_READ_WRITE 444
@@ -217,10 +217,10 @@ used to register actual file read, write and flush */
# define register_pfs_file_open_begin(state, locker, key, op, name, \
src_file, src_line) \
do { \
- locker = PSI_CALL(get_thread_file_name_locker)( \
+ locker = PSI_FILE_CALL(get_thread_file_name_locker)( \
state, key, op, name, &locker); \
if (UNIV_LIKELY(locker != NULL)) { \
- PSI_CALL(start_file_open_wait)( \
+ PSI_FILE_CALL(start_file_open_wait)( \
locker, src_file, src_line); \
} \
} while (0)
@@ -228,7 +228,7 @@ do { \
# define register_pfs_file_open_end(locker, file) \
do { \
if (UNIV_LIKELY(locker != NULL)) { \
- PSI_CALL(end_file_open_wait_and_bind_to_descriptor)( \
+ PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(\
locker, file); \
} \
} while (0)
@@ -236,10 +236,10 @@ do { \
# define register_pfs_file_io_begin(state, locker, file, count, op, \
src_file, src_line) \
do { \
- locker = PSI_CALL(get_thread_file_descriptor_locker)( \
+ locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( \
state, file, op); \
if (UNIV_LIKELY(locker != NULL)) { \
- PSI_CALL(start_file_wait)( \
+ PSI_FILE_CALL(start_file_wait)( \
locker, count, src_file, src_line); \
} \
} while (0)
@@ -247,7 +247,7 @@ do { \
# define register_pfs_file_io_end(locker, count) \
do { \
if (UNIV_LIKELY(locker != NULL)) { \
- PSI_CALL(end_file_wait)(locker, count); \
+ PSI_FILE_CALL(end_file_wait)(locker, count); \
} \
} while (0)
#endif /* UNIV_PFS_IO */
@@ -345,13 +345,12 @@ to original un-instrumented file I/O APIs */
/* File types for directory entry data type */
-enum os_file_type_enum{
+enum os_file_type_t {
OS_FILE_TYPE_UNKNOWN = 0,
OS_FILE_TYPE_FILE, /* regular file */
OS_FILE_TYPE_DIR, /* directory */
OS_FILE_TYPE_LINK /* symbolic link */
};
-typedef enum os_file_type_enum os_file_type_t;
/* Maximum path string length in bytes when referring to tables with in the
'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
@@ -359,16 +358,18 @@ of this size from the thread stack; that is why this should not be made much
bigger than 4000 bytes */
#define OS_FILE_MAX_PATH 4000
-/* Struct used in fetching information of a file in a directory */
-struct os_file_stat_struct{
+/** Struct used in fetching information of a file in a directory */
+struct os_file_stat_t {
char name[OS_FILE_MAX_PATH]; /*!< path to a file */
os_file_type_t type; /*!< file type */
ib_int64_t size; /*!< file size */
time_t ctime; /*!< creation time */
time_t mtime; /*!< modification time */
time_t atime; /*!< access time */
+ bool rw_perm; /*!< true if can be opened
+ in read-write mode. Only valid
+ if type == OS_FILE_TYPE_FILE */
};
-typedef struct os_file_stat_struct os_file_stat_t;
#ifdef __WIN__
typedef HANDLE os_file_dir_t; /*!< directory stream */
@@ -525,7 +526,7 @@ os_file_create_func(
Deletes a file. The file has to be closed before calling this.
@return TRUE if success */
UNIV_INTERN
-ibool
+bool
os_file_delete(
/*===========*/
const char* name); /*!< in: file path as a null-terminated
@@ -535,7 +536,7 @@ os_file_delete(
Deletes a file if it exists. The file has to be closed before calling this.
@return TRUE if success */
UNIV_INTERN
-ibool
+bool
os_file_delete_if_exists(
/*=====================*/
const char* name); /*!< in: file path as a null-terminated
@@ -826,7 +827,7 @@ UNIV_INTERN
ulint
os_file_get_last_error(
/*===================*/
- ibool report_all_errors); /*!< in: TRUE if we want an error message
+ bool report_all_errors); /*!< in: TRUE if we want an error message
printed of all errors */
/*******************************************************************//**
NOTE! Use the corresponding macro os_file_read(), not directly this function!
@@ -925,6 +926,60 @@ os_file_dirname(
/*============*/
const char* path); /*!< in: pathname */
/****************************************************************//**
+This function returns a new path name after replacing the basename
+in an old path with a new basename. The old_path is a full path
+name including the extension. The tablename is in the normal
+form "databasename/tablename". The new base name is found after
+the forward slash. Both input strings are null terminated.
+
+This function allocates memory to be returned. It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return own: new full pathname */
+UNIV_INTERN
+char*
+os_file_make_new_pathname(
+/*======================*/
+ const char* old_path, /*!< in: pathname */
+ const char* new_name); /*!< in: new file name */
+/****************************************************************//**
+This function returns a remote path name by combining a data directory
+path provided in a DATA DIRECTORY clause with the tablename which is
+in the form 'database/tablename'. It strips the file basename (which
+is the tablename) found after the last directory in the path provided.
+The full filepath created will include the database name as a directory
+under the path provided. The filename is the tablename with the '.ibd'
+extension. All input and output strings are null-terminated.
+
+This function allocates memory to be returned. It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return own: A full pathname; data_dir_path/databasename/tablename.ibd */
+UNIV_INTERN
+char*
+os_file_make_remote_pathname(
+/*=========================*/
+ const char* data_dir_path, /*!< in: pathname */
+ const char* tablename, /*!< in: tablename */
+ const char* extention); /*!< in: file extention; ibd,cfg*/
+/****************************************************************//**
+This function reduces a null-terminated full remote path name into
+the path that is sent by MySQL for DATA DIRECTORY clause. It replaces
+the 'databasename/tablename.ibd' found at the end of the path with just
+'tablename'.
+
+Since the result is always smaller than the path sent in, no new memory
+is allocated. The caller should allocate memory for the path sent in.
+This function manipulates that path in place.
+
+If the path format is not as expected, just return. The result is used
+to inform a SHOW CREATE TABLE command. */
+UNIV_INTERN
+void
+os_file_make_data_dir_path(
+/*========================*/
+ char* data_dir_path); /*!< in/out: full path/data_dir_path */
+/****************************************************************//**
Creates all missing subdirectories along the given path.
@return TRUE if call succeeded FALSE otherwise */
UNIV_INTERN
@@ -1108,14 +1163,16 @@ os_aio_all_slots_free(void);
/*******************************************************************//**
This function returns information about the specified file
-@return TRUE if stat information found */
+@return DB_SUCCESS if all OK */
UNIV_INTERN
-ibool
+dberr_t
os_file_get_status(
/*===============*/
- const char* path, /*!< in: pathname of the file */
- os_file_stat_t* stat_info); /*!< information of a file in a
+ const char* path, /*!< in: pathname of the file */
+ os_file_stat_t* stat_info, /*!< information of a file in a
directory */
+ bool check_rw_perm); /*!< in: for testing whether the
+ file can be opened in RW mode */
#if !defined(UNIV_HOTBACKUP)
/*********************************************************************//**
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
index d68823b72ca..d3ce68253ec 100644
--- a/storage/innobase/include/os0sync.h
+++ b/storage/innobase/include/os0sync.h
@@ -54,22 +54,19 @@ typedef pthread_cond_t os_cond_t;
/** Structure that includes Performance Schema Probe pfs_psi
in the os_fast_mutex structure if UNIV_PFS_MUTEX is defined */
-typedef struct os_fast_mutex_struct {
+struct os_fast_mutex_t {
fast_mutex_t mutex; /*!< os_fast_mutex */
#ifdef UNIV_PFS_MUTEX
struct PSI_mutex* pfs_psi;/*!< The performance schema
instrumentation hook */
#endif
-} os_fast_mutex_t;
-
+};
-/** Operating system event */
-typedef struct os_event_struct os_event_struct_t;
/** Operating system event handle */
-typedef os_event_struct_t* os_event_t;
+typedef struct os_event* os_event_t;
/** An asynchronous signal sent between threads */
-struct os_event_struct {
+struct os_event {
#ifdef __WIN__
HANDLE handle; /*!< kernel event object, slow,
used on older Windows */
@@ -84,7 +81,7 @@ struct os_event_struct {
the event becomes signaled */
os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */
- UT_LIST_NODE_T(os_event_struct_t) os_event_list;
+ UT_LIST_NODE_T(os_event_t) os_event_list;
/*!< list of all created events */
};
@@ -94,16 +91,11 @@ struct os_event_struct {
/** Return value of os_event_wait_time() when the time is exceeded */
#define OS_SYNC_TIME_EXCEEDED 1
-/** Operating system mutex */
-typedef struct os_mutex_struct os_mutex_str_t;
/** Operating system mutex handle */
-typedef os_mutex_str_t* os_mutex_t;
-
-/** Return value of os_event_wait_time() when the time is exceeded */
-#define OS_SYNC_TIME_EXCEEDED 1
+typedef struct os_mutex_t* os_ib_mutex_t;
/** Mutex protecting counts and the event and OS 'slow' mutex lists */
-extern os_mutex_t os_sync_mutex;
+extern os_ib_mutex_t os_sync_mutex;
/** This is incremented by 1 in os_thread_create and decremented by 1 in
os_thread_exit */
@@ -132,10 +124,8 @@ explicitly by calling sync_os_reset_event.
@return the event handle */
UNIV_INTERN
os_event_t
-os_event_create(
-/*============*/
- const char* name); /*!< in: the name of the event, if NULL
- the event is created without a name */
+os_event_create(void);
+/*==================*/
/**********************************************************//**
Sets an event semaphore to the signaled state: lets waiting threads
proceed. */
@@ -191,7 +181,7 @@ os_event_wait_low(
os_event_reset(). */
#define os_event_wait(event) os_event_wait_low(event, 0)
-#define os_event_wait_time(e, t) os_event_wait_time_low(event, t, 0)
+#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
/**********************************************************//**
Waits for an event object until it is in the signaled state or
@@ -210,10 +200,10 @@ os_event_wait_time_low(
os_event_reset(). */
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
+mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
@return the mutex handle */
UNIV_INTERN
-os_mutex_t
+os_ib_mutex_t
os_mutex_create(void);
/*=================*/
/**********************************************************//**
@@ -222,21 +212,21 @@ UNIV_INTERN
void
os_mutex_enter(
/*===========*/
- os_mutex_t mutex); /*!< in: mutex to acquire */
+ os_ib_mutex_t mutex); /*!< in: mutex to acquire */
/**********************************************************//**
Releases ownership of a mutex. */
UNIV_INTERN
void
os_mutex_exit(
/*==========*/
- os_mutex_t mutex); /*!< in: mutex to release */
+ os_ib_mutex_t mutex); /*!< in: mutex to release */
/**********************************************************//**
Frees an mutex object. */
UNIV_INTERN
void
os_mutex_free(
/*==========*/
- os_mutex_t mutex); /*!< in: mutex to free */
+ os_ib_mutex_t mutex); /*!< in: mutex to free */
/**********************************************************//**
Acquires ownership of a fast mutex. Currently in Windows this is the same
as os_fast_mutex_lock!
@@ -365,7 +355,11 @@ Atomic compare-and-swap and increment for InnoDB. */
#if defined(HAVE_IB_GCC_ATOMIC_BUILTINS)
-#define HAVE_ATOMIC_BUILTINS
+# define HAVE_ATOMIC_BUILTINS
+
+# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_64
+# define HAVE_ATOMIC_BUILTINS_64
+# endif
/**********************************************************//**
Returns true if swapped, ptr is pointer to target, old_val is value to
@@ -419,6 +413,9 @@ amount to decrement. */
# define os_atomic_decrement_ulint(ptr, amount) \
os_atomic_decrement(ptr, amount)
+# define os_atomic_decrement_uint64(ptr, amount) \
+ os_atomic_decrement(ptr, amount)
+
/**********************************************************//**
Returns the old value of *ptr, atomically sets *ptr to new_val */
@@ -430,12 +427,13 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
#elif defined(HAVE_IB_SOLARIS_ATOMICS)
-#define HAVE_ATOMIC_BUILTINS
+# define HAVE_ATOMIC_BUILTINS
+# define HAVE_ATOMIC_BUILTINS_64
/* If not compiling with GCC or GCC doesn't support the atomic
intrinsics and running on Solaris >= 10 use Solaris atomics */
-#include <atomic.h>
+# include <atomic.h>
/**********************************************************//**
Returns true if swapped, ptr is pointer to target, old_val is value to
@@ -487,6 +485,9 @@ amount to decrement. */
# define os_atomic_decrement_ulint(ptr, amount) \
os_atomic_increment_ulint(ptr, -(amount))
+# define os_atomic_decrement_uint64(ptr, amount) \
+ os_atomic_increment_uint64(ptr, -(amount))
+
/**********************************************************//**
Returns the old value of *ptr, atomically sets *ptr to new_val */
@@ -498,7 +499,11 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */
#elif defined(HAVE_WINDOWS_ATOMICS)
-#define HAVE_ATOMIC_BUILTINS
+# define HAVE_ATOMIC_BUILTINS
+
+# ifndef _WIN32
+# define HAVE_ATOMIC_BUILTINS_64
+# endif
/**********************************************************//**
Atomic compare and exchange of signed integers (both 32 and 64 bit).
@@ -574,8 +579,10 @@ amount of increment. */
# define os_atomic_increment_ulint(ptr, amount) \
((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount))
-# define os_atomic_increment_uint64(ptr, amount) \
- ((ulint) (win_xchg_and_add(ptr, (lint) amount) + amount))
+# define os_atomic_increment_uint64(ptr, amount) \
+ ((ib_uint64_t) (InterlockedExchangeAdd64( \
+ (ib_int64_t*) ptr, \
+ (ib_int64_t) amount) + amount))
/**********************************************************//**
Returns the resulting value, ptr is pointer to target, amount is the
@@ -587,6 +594,11 @@ amount to decrement. There is no atomic substract function on Windows */
# define os_atomic_decrement_ulint(ptr, amount) \
((ulint) (win_xchg_and_add((lint*) ptr, -(lint) amount) - amount))
+# define os_atomic_decrement_uint64(ptr, amount) \
+ ((ib_uint64_t) (InterlockedExchangeAdd64( \
+ (ib_int64_t*) ptr, \
+ -(ib_int64_t) amount) - amount))
+
/**********************************************************//**
Returns the old value of *ptr, atomically sets *ptr to new_val.
InterlockedExchange() operates on LONG, and the LONG will be
diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic
index 0d907b31366..33c238ceb47 100644
--- a/storage/innobase/include/os0sync.ic
+++ b/storage/innobase/include/os0sync.ic
@@ -66,7 +66,7 @@ pfs_os_fast_mutex_init(
os_fast_mutex_t* fast_mutex) /*!< out: fast mutex */
{
#ifdef HAVE_PSI_MUTEX_INTERFACE
- fast_mutex->pfs_psi = PSI_CALL(init_mutex)(key, &fast_mutex->mutex);
+ fast_mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, &fast_mutex->mutex);
#else
fast_mutex->pfs_psi = NULL;
#endif
@@ -86,7 +86,7 @@ pfs_os_fast_mutex_free(
{
#ifdef HAVE_PSI_MUTEX_INTERFACE
if (fast_mutex->pfs_psi != NULL)
- PSI_CALL(destroy_mutex)(fast_mutex->pfs_psi);
+ PSI_MUTEX_CALL(destroy_mutex)(fast_mutex->pfs_psi);
#endif
fast_mutex->pfs_psi = NULL;
@@ -112,13 +112,13 @@ pfs_os_fast_mutex_lock(
PSI_mutex_locker* locker;
PSI_mutex_locker_state state;
- locker = PSI_CALL(start_mutex_wait)(&state, fast_mutex->pfs_psi,
+ locker = PSI_MUTEX_CALL(start_mutex_wait)(&state, fast_mutex->pfs_psi,
PSI_MUTEX_LOCK, file_name, line);
os_fast_mutex_lock_func(&fast_mutex->mutex);
if (locker != NULL)
- PSI_CALL(end_mutex_wait)(locker, 0);
+ PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
}
else
#endif
@@ -141,7 +141,7 @@ pfs_os_fast_mutex_unlock(
{
#ifdef HAVE_PSI_MUTEX_INTERFACE
if (fast_mutex->pfs_psi != NULL)
- PSI_CALL(unlock_mutex)(fast_mutex->pfs_psi);
+ PSI_MUTEX_CALL(unlock_mutex)(fast_mutex->pfs_psi);
#endif
os_fast_mutex_unlock_func(&fast_mutex->mutex);
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index 52f5c5de58a..038a05edbd0 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -170,8 +170,11 @@ page_cur_tuple_insert(
page_cur_t* cursor, /*!< in/out: a page cursor */
const dtuple_t* tuple, /*!< in: pointer to a data tuple */
dict_index_t* index, /*!< in: record descriptor */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
+ __attribute__((nonnull(1,2,3,4,5), warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Inserts a record next to page cursor. Returns pointer to inserted record if
@@ -238,10 +241,11 @@ UNIV_INTERN
void
page_cur_delete_rec(
/*================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- dict_index_t* index, /*!< in: record descriptor */
- const ulint* offsets,/*!< in: rec_get_offsets(cursor->rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+ page_cur_t* cursor, /*!< in/out: a page cursor */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const ulint* offsets,/*!< in: rec_get_offsets(
+ cursor->rec, index) */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
#ifndef UNIV_HOTBACKUP
/****************************************************************//**
Searches the right position for a page cursor.
@@ -331,10 +335,24 @@ page_cur_parse_delete_rec(
buf_block_t* block, /*!< in: page or NULL */
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr); /*!< in: mtr or NULL */
+/*******************************************************//**
+Removes the record from a leaf page. This function does not log
+any changes. It is used by the IMPORT tablespace functions.
+@return true if success, i.e., the page did not become too empty */
+UNIV_INTERN
+bool
+page_delete_rec(
+/*============*/
+ const dict_index_t* index, /*!< in: The index that the record
+ belongs to */
+ page_cur_t* pcur, /*!< in/out: page cursor on record
+ to delete */
+ page_zip_des_t* page_zip,/*!< in: compressed page descriptor */
+ const ulint* offsets);/*!< in: offsets for record */
/** Index page cursor */
-struct page_cur_struct{
+struct page_cur_t{
byte* rec; /*!< pointer to a record on page */
buf_block_t* block; /*!< pointer to the block containing rec */
};
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
index a065f9ff30d..90a5a690487 100644
--- a/storage/innobase/include/page0cur.ic
+++ b/storage/innobase/include/page0cur.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -245,33 +245,36 @@ page_cur_tuple_insert(
page_cur_t* cursor, /*!< in/out: a page cursor */
const dtuple_t* tuple, /*!< in: pointer to a data tuple */
dict_index_t* index, /*!< in: record descriptor */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
{
- mem_heap_t* heap;
- ulint* offsets;
ulint size
= rec_get_converted_size(index, tuple, n_ext);
rec_t* rec;
- heap = mem_heap_create(size
- + (4 + REC_OFFS_HEADER_SIZE
- + dtuple_get_n_fields(tuple))
- * sizeof *offsets);
- rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(heap, size),
+ if (!*heap) {
+ *heap = mem_heap_create(size
+ + (4 + REC_OFFS_HEADER_SIZE
+ + dtuple_get_n_fields(tuple))
+ * sizeof **offsets);
+ }
+
+ rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(*heap, size),
index, tuple, n_ext);
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
+ *offsets = rec_get_offsets(
+ rec, index, *offsets, ULINT_UNDEFINED, heap);
if (buf_block_get_page_zip(cursor->block)) {
rec = page_cur_insert_rec_zip(&cursor->rec, cursor->block,
- index, rec, offsets, mtr);
+ index, rec, *offsets, mtr);
} else {
rec = page_cur_insert_rec_low(cursor->rec,
- index, rec, offsets, mtr);
+ index, rec, *offsets, mtr);
}
- ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, offsets));
- mem_heap_free(heap);
+ ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, *offsets));
return(rec);
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index e4571b69376..773ec4c2177 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -551,6 +551,16 @@ page_rec_get_next_const(
/*====================*/
const rec_t* rec); /*!< in: pointer to record */
/************************************************************//**
+Gets the pointer to the next non delete-marked record on the page.
+If all subsequent records are delete-marked, then this function
+will return the supremum record.
+@return pointer to next non delete-marked record or pointer to supremum */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_non_del_marked(
+/*=============================*/
+ const rec_t* rec); /*!< in: pointer to record */
+/************************************************************//**
Sets the pointer to the next record on the page. */
UNIV_INLINE
void
@@ -737,11 +747,14 @@ UNIV_INLINE
void
page_mem_free(
/*==========*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- rec_t* rec, /*!< in: pointer to the (origin of) record */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page,
+ or NULL */
+ rec_t* rec, /*!< in: pointer to the (origin of)
+ record */
+ const dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets);/*!< in: array returned by
+ rec_get_offsets() */
/**********************************************************//**
Create an uncompressed B-tree index page.
@return pointer to the page */
@@ -1031,7 +1044,6 @@ page_find_rec_with_heap_no(
/*=======================*/
const page_t* page, /*!< in: index page */
ulint heap_no);/*!< in: heap number */
-
#ifdef UNIV_MATERIALIZE
#undef UNIV_INLINE
#define UNIV_INLINE UNIV_INLINE_ORIGINAL
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index e73e547e92b..c2e20d81a29 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -776,6 +776,30 @@ page_rec_get_next_const(
}
/************************************************************//**
+Gets the pointer to the next non delete-marked record on the page.
+If all subsequent records are delete-marked, then this function
+will return the supremum record.
+@return pointer to next non delete-marked record or pointer to supremum */
+UNIV_INLINE
+const rec_t*
+page_rec_get_next_non_del_marked(
+/*=============================*/
+ const rec_t* rec) /*!< in: pointer to record */
+{
+ const rec_t* r;
+ ulint page_is_compact = page_rec_is_comp(rec);
+
+ for (r = page_rec_get_next_const(rec);
+ !page_rec_is_supremum(r)
+ && rec_get_deleted_flag(r, page_is_compact);
+ r = page_rec_get_next_const(r)) {
+ /* noop */
+ }
+
+ return(r);
+}
+
+/************************************************************//**
Sets the pointer to the next record on the page. */
UNIV_INLINE
void
@@ -1085,11 +1109,14 @@ UNIV_INLINE
void
page_mem_free(
/*==========*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- rec_t* rec, /*!< in: pointer to the (origin of) record */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ page_t* page, /*!< in/out: index page */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page,
+ or NULL */
+ rec_t* rec, /*!< in: pointer to the
+ (origin of) record */
+ const dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets) /*!< in: array returned by
+ rec_get_offsets() */
{
rec_t* free;
ulint garbage;
diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h
index da2ac1c7de2..533b0d3cf98 100644
--- a/storage/innobase/include/page0types.h
+++ b/storage/innobase/include/page0types.h
@@ -26,6 +26,10 @@ Created 2/2/1994 Heikki Tuuri
#ifndef page0types_h
#define page0types_h
+using namespace std;
+
+#include <map>
+
#include "univ.i"
#include "dict0types.h"
#include "mtr0types.h"
@@ -35,12 +39,12 @@ Created 2/2/1994 Heikki Tuuri
/** Type of the index page */
typedef byte page_t;
/** Index page cursor */
-typedef struct page_cur_struct page_cur_t;
+struct page_cur_t;
/** Compressed index page */
typedef byte page_zip_t;
/** Compressed page descriptor */
-typedef struct page_zip_des_struct page_zip_des_t;
+struct page_zip_des_t;
/* The following definitions would better belong to page0zip.h,
but we cannot include page0zip.h from rem0rec.ic, because
@@ -60,12 +64,14 @@ ssize, which is the number of shifts from 512. */
#endif
/** Compressed page descriptor */
-struct page_zip_des_struct
+struct page_zip_des_t
{
page_zip_t* data; /*!< compressed page data */
#ifdef UNIV_DEBUG
unsigned m_start:16; /*!< start offset of modification log */
+ bool m_external; /*!< Allocated externally, not from the
+ buffer pool */
#endif /* UNIV_DEBUG */
unsigned m_end:16; /*!< end offset of modification log */
unsigned m_nonempty:1; /*!< TRUE if the modification log
@@ -80,7 +86,7 @@ struct page_zip_des_struct
};
/** Compression statistics for a given page size */
-struct page_zip_stat_struct {
+struct page_zip_stat_t {
/** Number of page compressions */
ulint compressed;
/** Number of successful page compressions */
@@ -91,13 +97,29 @@ struct page_zip_stat_struct {
ib_uint64_t compressed_usec;
/** Duration of page decompressions in microseconds */
ib_uint64_t decompressed_usec;
+ page_zip_stat_t() :
+ /* Initialize members to 0 so that when we do
+ stlmap[key].compressed++ and element with "key" does not
+ exist it gets inserted with zeroed members. */
+ compressed(0),
+ compressed_ok(0),
+ decompressed(0),
+ compressed_usec(0),
+ decompressed_usec(0)
+ { }
};
-/** Compression statistics */
-typedef struct page_zip_stat_struct page_zip_stat_t;
-
-/** Statistics on compression, indexed by page_zip_des_struct::ssize - 1 */
-extern page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+/** Compression statistics types */
+typedef map<index_id_t, page_zip_stat_t> page_zip_stat_per_index_t;
+
+/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
+extern page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+/** Statistics on compression, indexed by dict_index_t::id */
+extern page_zip_stat_per_index_t page_zip_stat_per_index;
+extern ib_mutex_t page_zip_stat_per_index_mutex;
+#ifdef HAVE_PSI_INTERFACE
+extern mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
/**********************************************************************//**
Write the "deleted" flag of a record on a compressed page. The flag must
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index 2c84f75b2ab..12781bd61b8 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 2005, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -39,6 +40,16 @@ Created June 2005 by Marko Makela
#include "trx0types.h"
#include "mem0mem.h"
+/* Compression level to be used by zlib. Settable by user. */
+extern ulint page_compression_level;
+
+/* Default compression level. */
+#define DEFAULT_COMPRESSION_LEVEL 6
+
+/* Whether or not to log compressed page images to avoid possible
+compression algorithm changes in zlib. */
+extern bool page_log_compressed_pages;
+
/**********************************************************************//**
Determine the size of a compressed page in bytes.
@return size in bytes */
@@ -114,6 +125,7 @@ page_zip_compress(
m_start, m_end, m_nonempty */
const page_t* page, /*!< in: uncompressed page */
dict_index_t* index, /*!< in: index of the B-tree node */
+ ulint level, /*!< in: commpression level */
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
__attribute__((nonnull(1,2,3)));
@@ -337,11 +349,12 @@ UNIV_INTERN
void
page_zip_dir_delete(
/*================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- byte* rec, /*!< in: deleted record */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- const byte* free) /*!< in: previous start of the free list */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ byte* rec, /*!< in: deleted record */
+ const dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ const byte* free) /*!< in: previous start of
+ the free list */
__attribute__((nonnull(1,2,3,4)));
/**********************************************************************//**
@@ -461,14 +474,49 @@ page_zip_verify_checksum(
/*=====================*/
const void* data, /*!< in: compressed page */
ulint size); /*!< in: size of compressed page */
+/**********************************************************************//**
+Write a log record of compressing an index page without the data on the page. */
+UNIV_INLINE
+void
+page_zip_compress_write_log_no_data(
+/*================================*/
+ ulint level, /*!< in: compression level */
+ const page_t* page, /*!< in: page that is compressed */
+ dict_index_t* index, /*!< in: index */
+ mtr_t* mtr); /*!< in: mtr */
+/**********************************************************************//**
+Parses a log record of compressing an index page without the data.
+@return end of log record or NULL */
+UNIV_INLINE
+byte*
+page_zip_parse_compress_no_data(
+/*============================*/
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr, /*!< in: buffer end */
+ page_t* page, /*!< in: uncompressed page */
+ page_zip_des_t* page_zip, /*!< out: compressed page */
+ dict_index_t* index) /*!< in: index */
+ __attribute__((nonnull(1,2)));
+
+/**********************************************************************//**
+Reset the counters used for filling
+INFORMATION_SCHEMA.innodb_cmp_per_index. */
+UNIV_INLINE
+void
+page_zip_reset_stat_per_index();
+/*===========================*/
#ifndef UNIV_HOTBACKUP
/** Check if a pointer to an uncompressed page matches a compressed page.
+When we IMPORT a tablespace the blocks and accompanying frames are allocted
+from outside the buffer pool.
@param ptr pointer to an uncompressed page frame
@param page_zip compressed page descriptor
@return TRUE if ptr and page_zip refer to the same block */
-# define PAGE_ZIP_MATCH(ptr, page_zip) \
- (buf_frame_get_page_zip(ptr) == (page_zip))
+# define PAGE_ZIP_MATCH(ptr, page_zip) \
+ (((page_zip)->m_external \
+ && (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data)) \
+ || buf_frame_get_page_zip(ptr) == (page_zip))
#else /* !UNIV_HOTBACKUP */
/** Check if a pointer to an uncompressed page matches a compressed page.
@param ptr pointer to an uncompressed page frame
diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic
index c9300aa4e9f..0062e1cb39f 100644
--- a/storage/innobase/include/page0zip.ic
+++ b/storage/innobase/include/page0zip.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 2005, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,6 +30,7 @@ Created June 2005 by Marko Makela
#endif
#include "page0zip.h"
+#include "mtr0log.h"
#include "page0page.h"
/* The format of compressed pages is as follows.
@@ -389,6 +391,75 @@ page_zip_write_header(
}
}
+/**********************************************************************//**
+Write a log record of compressing an index page without the data on the page. */
+UNIV_INLINE
+void
+page_zip_compress_write_log_no_data(
+/*================================*/
+ ulint level, /*!< in: compression level */
+ const page_t* page, /*!< in: page that is compressed */
+ dict_index_t* index, /*!< in: index */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ byte* log_ptr = mlog_open_and_write_index(
+ mtr, page, index, MLOG_ZIP_PAGE_COMPRESS_NO_DATA, 1);
+
+ if (log_ptr) {
+ mach_write_to_1(log_ptr, level);
+ mlog_close(mtr, log_ptr + 1);
+ }
+}
+
+/**********************************************************************//**
+Parses a log record of compressing an index page without the data.
+@return end of log record or NULL */
+UNIV_INLINE
+byte*
+page_zip_parse_compress_no_data(
+/*============================*/
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr, /*!< in: buffer end */
+ page_t* page, /*!< in: uncompressed page */
+ page_zip_des_t* page_zip, /*!< out: compressed page */
+ dict_index_t* index) /*!< in: index */
+{
+ ulint level;
+ if (end_ptr == ptr) {
+ return(NULL);
+ }
+
+ level = mach_read_from_1(ptr);
+
+ /* If page compression fails then there must be something wrong
+ because a compress log record is logged only if the compression
+ was successful. Crash in this case. */
+
+ if (page
+ && !page_zip_compress(page_zip, page, index, level, NULL)) {
+ ut_error;
+ }
+
+ return(ptr + 1);
+}
+
+/**********************************************************************//**
+Reset the counters used for filling
+INFORMATION_SCHEMA.innodb_cmp_per_index. */
+UNIV_INLINE
+void
+page_zip_reset_stat_per_index()
+/*===========================*/
+{
+ mutex_enter(&page_zip_stat_per_index_mutex);
+
+ page_zip_stat_per_index.erase(
+ page_zip_stat_per_index.begin(),
+ page_zip_stat_per_index.end());
+
+ mutex_exit(&page_zip_stat_per_index_mutex);
+}
+
#ifdef UNIV_MATERIALIZE
# undef UNIV_INLINE
# define UNIV_INLINE UNIV_INLINE_ORIGINAL
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
index 9eb8aeb747f..65ff7533828 100644
--- a/storage/innobase/include/pars0pars.h
+++ b/storage/innobase/include/pars0pars.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -469,9 +469,10 @@ que_thr_t*
pars_complete_graph_for_exec(
/*=========================*/
que_node_t* node, /*!< in: root node for an incomplete
- query graph */
+ query graph, or NULL for dummy graph */
trx_t* trx, /*!< in: transaction handle */
- mem_heap_t* heap); /*!< in: memory heap from which allocated */
+ mem_heap_t* heap) /*!< in: memory heap from which allocated */
+ __attribute__((nonnull(2,3), warn_unused_result));
/****************************************************************//**
Create parser info struct.
@@ -618,6 +619,18 @@ pars_info_add_ull_literal(
ib_uint64_t val); /*!< in: value */
/****************************************************************//**
+If the literal value already exists then it rebinds otherwise it
+creates a new entry. */
+UNIV_INTERN
+void
+pars_info_bind_ull_literal(
+/*=======================*/
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ const ib_uint64_t* val) /*!< in: value */
+ __attribute__((nonnull));
+
+/****************************************************************//**
Add bound id. */
UNIV_INTERN
void
@@ -628,16 +641,6 @@ pars_info_add_id(
const char* id); /*!< in: id */
/****************************************************************//**
-Get user function with the given name.
-@return user func, or NULL if not found */
-UNIV_INTERN
-pars_user_func_t*
-pars_info_get_user_func(
-/*====================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name); /*!< in: function name to find*/
-
-/****************************************************************//**
Get bound literal with the given name.
@return bound literal, or NULL if not found */
UNIV_INTERN
@@ -665,7 +668,7 @@ pars_lexer_close(void);
/*==================*/
/** Extra information supplied for pars_sql(). */
-struct pars_info_struct {
+struct pars_info_t {
mem_heap_t* heap; /*!< our own memory heap */
ib_vector_t* funcs; /*!< user functions, or NUll
@@ -680,14 +683,14 @@ struct pars_info_struct {
};
/** User-supplied function and argument. */
-struct pars_user_func_struct {
+struct pars_user_func_t {
const char* name; /*!< function name */
pars_user_func_cb_t func; /*!< function address */
void* arg; /*!< user-supplied argument */
};
/** Bound literal. */
-struct pars_bound_lit_struct {
+struct pars_bound_lit_t {
const char* name; /*!< name */
const void* address; /*!< address */
ulint length; /*!< length of data */
@@ -697,20 +700,20 @@ struct pars_bound_lit_struct {
};
/** Bound identifier. */
-struct pars_bound_id_struct {
+struct pars_bound_id_t {
const char* name; /*!< name */
const char* id; /*!< identifier */
};
/** Struct used to denote a reserved word in a parsing tree */
-struct pars_res_word_struct{
+struct pars_res_word_t{
int code; /*!< the token code for the reserved word from
pars0grm.h */
};
/** A predefined function or operator node in a parsing tree; this construct
is also used for some non-functions like the assignment ':=' */
-struct func_node_struct{
+struct func_node_t{
que_common_t common; /*!< type: QUE_NODE_FUNC */
int func; /*!< token code of the function name */
ulint fclass; /*!< class of the function */
@@ -725,14 +728,14 @@ struct func_node_struct{
};
/** An order-by node in a select */
-struct order_node_struct{
+struct order_node_t{
que_common_t common; /*!< type: QUE_NODE_ORDER */
sym_node_t* column; /*!< order-by column */
ibool asc; /*!< TRUE if ascending, FALSE if descending */
};
/** Procedure definition node */
-struct proc_node_struct{
+struct proc_node_t{
que_common_t common; /*!< type: QUE_NODE_PROC */
sym_node_t* proc_id; /*!< procedure name symbol in the symbol
table of this same procedure */
@@ -742,14 +745,14 @@ struct proc_node_struct{
};
/** elsif-element node */
-struct elsif_node_struct{
+struct elsif_node_t{
que_common_t common; /*!< type: QUE_NODE_ELSIF */
que_node_t* cond; /*!< if condition */
que_node_t* stat_list; /*!< statement list */
};
/** if-statement node */
-struct if_node_struct{
+struct if_node_t{
que_common_t common; /*!< type: QUE_NODE_IF */
que_node_t* cond; /*!< if condition */
que_node_t* stat_list; /*!< statement list */
@@ -758,14 +761,14 @@ struct if_node_struct{
};
/** while-statement node */
-struct while_node_struct{
+struct while_node_t{
que_common_t common; /*!< type: QUE_NODE_WHILE */
que_node_t* cond; /*!< while condition */
que_node_t* stat_list; /*!< statement list */
};
/** for-loop-statement node */
-struct for_node_struct{
+struct for_node_t{
que_common_t common; /*!< type: QUE_NODE_FOR */
sym_node_t* loop_var; /*!< loop variable: this is the
dereferenced symbol from the
@@ -782,24 +785,24 @@ struct for_node_struct{
};
/** exit statement node */
-struct exit_node_struct{
+struct exit_node_t{
que_common_t common; /*!< type: QUE_NODE_EXIT */
};
/** return-statement node */
-struct return_node_struct{
+struct return_node_t{
que_common_t common; /*!< type: QUE_NODE_RETURN */
};
/** Assignment statement node */
-struct assign_node_struct{
+struct assign_node_t{
que_common_t common; /*!< type: QUE_NODE_ASSIGNMENT */
sym_node_t* var; /*!< variable to set */
que_node_t* val; /*!< value to assign */
};
/** Column assignment node */
-struct col_assign_node_struct{
+struct col_assign_node_t{
que_common_t common; /*!< type: QUE_NODE_COL_ASSIGN */
sym_node_t* col; /*!< column to set */
que_node_t* val; /*!< value to assign */
diff --git a/storage/innobase/include/pars0sym.h b/storage/innobase/include/pars0sym.h
index 4b3b342a533..bcf73639228 100644
--- a/storage/innobase/include/pars0sym.h
+++ b/storage/innobase/include/pars0sym.h
@@ -119,9 +119,9 @@ sym_tab_add_bound_id(
sym_tab_t* sym_tab, /*!< in: symbol table */
const char* name); /*!< in: name of bound id */
-/** Index of sym_node_struct::field_nos corresponding to the clustered index */
+/** Index of sym_node_t::field_nos corresponding to the clustered index */
#define SYM_CLUST_FIELD_NO 0
-/** Index of sym_node_struct::field_nos corresponding to a secondary index */
+/** Index of sym_node_t::field_nos corresponding to a secondary index */
#define SYM_SEC_FIELD_NO 1
/** Types of a symbol table node */
@@ -143,7 +143,7 @@ enum sym_tab_entry {
};
/** Symbol table node */
-struct sym_node_struct{
+struct sym_node_t{
que_common_t common; /*!< node type:
QUE_NODE_SYMBOL */
/* NOTE: if the data field in 'common.val' is not NULL and the symbol
@@ -227,7 +227,7 @@ struct sym_node_struct{
};
/** Symbol table */
-struct sym_tab_struct{
+struct sym_tab_t{
que_t* query_graph;
/*!< query graph generated by the
parser */
diff --git a/storage/innobase/include/pars0types.h b/storage/innobase/include/pars0types.h
index 13ae53f3fd6..47f4b432d20 100644
--- a/storage/innobase/include/pars0types.h
+++ b/storage/innobase/include/pars0types.h
@@ -26,24 +26,24 @@ Created 1/11/1998 Heikki Tuuri
#ifndef pars0types_h
#define pars0types_h
-typedef struct pars_info_struct pars_info_t;
-typedef struct pars_user_func_struct pars_user_func_t;
-typedef struct pars_bound_lit_struct pars_bound_lit_t;
-typedef struct pars_bound_id_struct pars_bound_id_t;
-typedef struct sym_node_struct sym_node_t;
-typedef struct sym_tab_struct sym_tab_t;
-typedef struct pars_res_word_struct pars_res_word_t;
-typedef struct func_node_struct func_node_t;
-typedef struct order_node_struct order_node_t;
-typedef struct proc_node_struct proc_node_t;
-typedef struct elsif_node_struct elsif_node_t;
-typedef struct if_node_struct if_node_t;
-typedef struct while_node_struct while_node_t;
-typedef struct for_node_struct for_node_t;
-typedef struct exit_node_struct exit_node_t;
-typedef struct return_node_struct return_node_t;
-typedef struct assign_node_struct assign_node_t;
-typedef struct col_assign_node_struct col_assign_node_t;
+struct pars_info_t;
+struct pars_user_func_t;
+struct pars_bound_lit_t;
+struct pars_bound_id_t;
+struct sym_node_t;
+struct sym_tab_t;
+struct pars_res_word_t;
+struct func_node_t;
+struct order_node_t;
+struct proc_node_t;
+struct elsif_node_t;
+struct if_node_t;
+struct while_node_t;
+struct for_node_t;
+struct exit_node_t;
+struct return_node_t;
+struct assign_node_t;
+struct col_assign_node_t;
typedef UT_LIST_BASE_NODE_T(sym_node_t) sym_node_list_t;
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
index 531794ce688..ba8828623af 100644
--- a/storage/innobase/include/que0que.h
+++ b/storage/innobase/include/que0que.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,7 +42,7 @@ of SQL execution in the UNIV_SQL_DEBUG version */
extern ibool que_trace_on;
/** Mutex protecting the query threads. */
-extern mutex_t que_thr_mutex;
+extern ib_mutex_t que_thr_mutex;
/***********************************************************************//**
Creates a query graph fork node.
@@ -310,7 +310,7 @@ que_node_print_info(
Evaluate the given SQL
@return error code or DB_SUCCESS */
UNIV_INTERN
-enum db_err
+dberr_t
que_eval_sql(
/*=========*/
pars_info_t* info, /*!< in: info struct, or NULL */
@@ -349,7 +349,7 @@ que_close(void);
/* Query graph query thread node: the fields are protected by the
trx_t::mutex with the exceptions named below */
-struct que_thr_struct{
+struct que_thr_t{
que_common_t common; /*!< type: QUE_NODE_THR */
ulint magic_n; /*!< magic number to catch memory
corruption */
@@ -374,7 +374,7 @@ struct que_thr_struct{
thus far */
ulint lock_state; /*!< lock state of thread (table or
row) */
- struct srv_slot_struct*
+ struct srv_slot_t*
slot; /* The thread slot in the wait
array in srv_sys_t */
/*------------------------------*/
@@ -398,7 +398,7 @@ struct que_thr_struct{
#define QUE_THR_MAGIC_FREED 123461526
/* Query graph fork node: its fields are protected by the query thread mutex */
-struct que_fork_struct{
+struct que_fork_t{
que_common_t common; /*!< type: QUE_NODE_FORK */
que_t* graph; /*!< query graph of this node */
ulint fork_type; /*!< fork type */
diff --git a/storage/innobase/include/que0types.h b/storage/innobase/include/que0types.h
index b165b817d87..0f11cad301a 100644
--- a/storage/innobase/include/que0types.h
+++ b/storage/innobase/include/que0types.h
@@ -32,18 +32,15 @@ Created 5/27/1996 Heikki Tuuri
/* Pseudotype for all graph nodes */
typedef void que_node_t;
-typedef struct que_fork_struct que_fork_t;
-
/* Query graph root is a fork node */
-typedef que_fork_t que_t;
+typedef struct que_fork_t que_t;
-typedef struct que_thr_struct que_thr_t;
-typedef struct que_common_struct que_common_t;
+struct que_thr_t;
/* Common struct at the beginning of each query graph node; the name of this
substruct must be 'common' */
-struct que_common_struct{
+struct que_common_t{
ulint type; /*!< query node type */
que_node_t* parent; /*!< back pointer to parent node, or NULL */
que_node_t* brother;/* pointer to a possible brother node */
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
index 6ea57fffcd2..980faddf98e 100644
--- a/storage/innobase/include/read0read.h
+++ b/storage/innobase/include/read0read.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -57,12 +57,14 @@ read_view_purge_open(
mem_heap_t* heap); /*!< in: memory heap from which
allocated */
/*********************************************************************//**
-Remove read view from the trx_sys->view_list. */
-UNIV_INTERN
+Remove a read view from the trx_sys->view_list. */
+UNIV_INLINE
void
read_view_remove(
/*=============*/
- read_view_t* view); /*!< in: read view */
+ read_view_t* view, /*!< in: read view, can be 0 */
+ bool own_mutex); /*!< in: true if caller owns the
+ trx_sys_t::mutex */
/*********************************************************************//**
Closes a consistent read view for MySQL. This function is called at an SQL
statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
@@ -73,13 +75,14 @@ read_view_close_for_mysql(
trx_t* trx); /*!< in: trx which has a read view */
/*********************************************************************//**
Checks if a read view sees the specified transaction.
-@return TRUE if sees */
+@return true if sees */
UNIV_INLINE
-ibool
+bool
read_view_sees_trx_id(
/*==================*/
const read_view_t* view, /*!< in: read view */
- trx_id_t trx_id);/*!< in: trx id */
+ trx_id_t trx_id) /*!< in: trx id */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Prints a read view to stderr. */
UNIV_INTERN
@@ -119,7 +122,7 @@ read_cursor_set_for_mysql(
/** Read view lists the trx ids of those transactions for which a consistent
read should not see the modifications to the database. */
-struct read_view_struct{
+struct read_view_t{
ulint type; /*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
undo_no_t undo_no;/*!< 0 or if type is
VIEW_HIGH_GRANULARITY
@@ -145,7 +148,7 @@ struct read_view_struct{
trx_id_t* trx_ids;/*!< Additional trx ids which the read should
not see: typically, these are the read-write
active transactions at the time when the read
- is serialized, except the reading transaction
+ is serialized, except the reading transaction
itself; the trx ids in this array are in a
descending order. These trx_ids should be
between the "low" and "high" water marks,
@@ -173,7 +176,7 @@ struct read_view_struct{
cursors. This struct holds both heap where consistent read view
is allocated and pointer to a read view. */
-struct cursor_view_struct{
+struct cursor_view_t{
mem_heap_t* heap;
/*!< Memory heap for the cursor view */
read_view_t* read_view;
diff --git a/storage/innobase/include/read0read.ic b/storage/innobase/include/read0read.ic
index 436800e1585..82c1028f12e 100644
--- a/storage/innobase/include/read0read.ic
+++ b/storage/innobase/include/read0read.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,11 +23,64 @@ Cursor read
Created 2/16/1997 Heikki Tuuri
*******************************************************/
+#include "trx0sys.h"
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Validates a read view object. */
+static
+bool
+read_view_validate(
+/*===============*/
+ const read_view_t* view) /*!< in: view to validate */
+{
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ /* Check that the view->trx_ids array is in descending order. */
+ for (ulint i = 1; i < view->n_trx_ids; ++i) {
+
+ ut_a(view->trx_ids[i] < view->trx_ids[i - 1]);
+ }
+
+ return(true);
+}
+
+/** Functor to validate the view list. */
+struct ViewCheck {
+
+ ViewCheck() : m_prev_view(0) { }
+
+ void operator()(const read_view_t* view)
+ {
+ ut_a(m_prev_view == NULL
+ || m_prev_view->low_limit_no >= view->low_limit_no);
+
+ m_prev_view = view;
+ }
+
+ const read_view_t* m_prev_view;
+};
+
+/*********************************************************************//**
+Validates a read view list. */
+static
+bool
+read_view_list_validate(void)
+/*=========================*/
+{
+ ut_ad(mutex_own(&trx_sys->mutex));
+
+ ut_list_map(trx_sys->view_list, &read_view_t::view_list, ViewCheck());
+
+ return(true);
+}
+#endif /* UNIV_DEBUG */
+
/*********************************************************************//**
Checks if a read view sees the specified transaction.
-@return TRUE if sees */
+@return true if sees */
UNIV_INLINE
-ibool
+bool
read_view_sees_trx_id(
/*==================*/
const read_view_t* view, /*!< in: read view */
@@ -35,10 +88,10 @@ read_view_sees_trx_id(
{
if (trx_id < view->up_limit_id) {
- return(TRUE);
+ return(true);
} else if (trx_id >= view->low_limit_id) {
- return(FALSE);
+ return(false);
} else {
ulint lower = 0;
ulint upper = view->n_trx_ids - 1;
@@ -63,5 +116,33 @@ read_view_sees_trx_id(
} while (lower <= upper);
}
- return(TRUE);
+ return(true);
+}
+
+/*********************************************************************//**
+Remove a read view from the trx_sys->view_list. */
+UNIV_INLINE
+void
+read_view_remove(
+/*=============*/
+ read_view_t* view, /*!< in: read view, can be 0 */
+ bool own_mutex) /*!< in: true if caller owns the
+ trx_sys_t::mutex */
+{
+ if (view != 0) {
+ if (!own_mutex) {
+ mutex_enter(&trx_sys->mutex);
+ }
+
+ ut_ad(read_view_validate(view));
+
+ UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
+
+ ut_ad(read_view_list_validate());
+
+ if (!own_mutex) {
+ mutex_exit(&trx_sys->mutex);
+ }
+ }
}
+
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
index 0b6aa132b88..969f4ebb637 100644
--- a/storage/innobase/include/read0types.h
+++ b/storage/innobase/include/read0types.h
@@ -26,7 +26,7 @@ Created 2/16/1997 Heikki Tuuri
#ifndef read0types_h
#define read0types_h
-typedef struct read_view_struct read_view_t;
-typedef struct cursor_view_struct cursor_view_t;
+struct read_view_t;
+struct cursor_view_t;
#endif
diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h
index ed6486aa603..cb3c85ac2c8 100644
--- a/storage/innobase/include/rem0cmp.h
+++ b/storage/innobase/include/rem0cmp.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -156,21 +156,28 @@ respectively, when only the common first fields are compared, or until
the first externally stored field in rec */
UNIV_INTERN
int
-cmp_dtuple_rec_with_match(
-/*======================*/
+cmp_dtuple_rec_with_match_low(
+/*==========================*/
const dtuple_t* dtuple, /*!< in: data tuple */
const rec_t* rec, /*!< in: physical record which differs from
dtuple in some of the common fields, or which
has an equal number or more fields than
dtuple */
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields, /*!< in/out: number of already completely
+ ulint n_cmp, /*!< in: number of fields to compare */
+ ulint* matched_fields,
+ /*!< in/out: number of already completely
matched fields; when function returns,
contains the value for current comparison */
- ulint* matched_bytes); /*!< in/out: number of already matched
+ ulint* matched_bytes)
+ /*!< in/out: number of already matched
bytes within the first field not completely
matched; when function returns, contains the
value for current comparison */
+ __attribute__((nonnull));
+#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields,bytes) \
+ cmp_dtuple_rec_with_match_low( \
+ tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields,bytes)
/**************************************************************//**
Compares a data tuple to a physical record.
@see cmp_dtuple_rec_with_match
@@ -196,7 +203,9 @@ cmp_dtuple_is_prefix_of_rec(
/*************************************************************//**
Compare two physical records that contain the same number of columns,
none of which are stored externally.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively, than rec2 */
+@retval 1 if rec1 (including non-ordering columns) is greater than rec2
+@retval -1 if rec1 (including non-ordering columns) is less than rec2
+@retval 0 if rec1 is a duplicate of rec2 */
UNIV_INTERN
int
cmp_rec_rec_simple(
@@ -206,8 +215,10 @@ cmp_rec_rec_simple(
const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
const dict_index_t* index, /*!< in: data dictionary index */
- ibool* null_eq);/*!< out: set to TRUE if
- found matching null values */
+ struct TABLE* table) /*!< in: MySQL table, for reporting
+ duplicate key value if applicable,
+ or NULL */
+ __attribute__((nonnull(1,2,3,4), warn_unused_result));
/*************************************************************//**
This function is used to compare two physical records. Only the common
first fields are compared, and if an externally stored field is
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index c6c70bb5f09..2a84aee7a6f 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -66,6 +66,15 @@ The status is stored in the low-order bits. */
/* Length of a B-tree node pointer, in bytes */
#define REC_NODE_PTR_SIZE 4
+/** SQL null flag in a 1-byte offset of ROW_FORMAT=REDUNDANT records */
+#define REC_1BYTE_SQL_NULL_MASK 0x80UL
+/** SQL null flag in a 2-byte offset of ROW_FORMAT=REDUNDANT records */
+#define REC_2BYTE_SQL_NULL_MASK 0x8000UL
+
+/** In a 2-byte offset of ROW_FORMAT=REDUNDANT records, the second most
+significant bit denotes that the tail of a field is stored off-page. */
+#define REC_2BYTE_EXTERN_MASK 0x4000UL
+
#ifdef UNIV_DEBUG
/* Length of the rec_get_offsets() header */
# define REC_OFFS_HEADER_SIZE 4
@@ -88,7 +97,8 @@ const rec_t*
rec_get_next_ptr_const(
/*===================*/
const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to get the pointer of the next chained record
on the same page.
@@ -98,7 +108,8 @@ rec_t*
rec_get_next_ptr(
/*=============*/
rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to get the offset of the
next chained record on the same page.
@@ -108,7 +119,8 @@ ulint
rec_get_next_offs(
/*==============*/
const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the next record offset field
of an old-style record. */
@@ -117,7 +129,8 @@ void
rec_set_next_offs_old(
/*==================*/
rec_t* rec, /*!< in: old-style physical record */
- ulint next); /*!< in: offset of the next record */
+ ulint next) /*!< in: offset of the next record */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to set the next record offset field
of a new-style record. */
@@ -126,7 +139,8 @@ void
rec_set_next_offs_new(
/*==================*/
rec_t* rec, /*!< in/out: new-style physical record */
- ulint next); /*!< in: offset of the next record */
+ ulint next) /*!< in: offset of the next record */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to get the number of fields
in an old-style record.
@@ -135,7 +149,8 @@ UNIV_INLINE
ulint
rec_get_n_fields_old(
/*=================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to get the number of fields
in a record.
@@ -145,7 +160,8 @@ ulint
rec_get_n_fields(
/*=============*/
const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index); /*!< in: record descriptor */
+ const dict_index_t* index) /*!< in: record descriptor */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to get the number of records owned by the
previous directory record.
@@ -154,7 +170,8 @@ UNIV_INLINE
ulint
rec_get_n_owned_old(
/*================*/
- const rec_t* rec); /*!< in: old-style physical record */
+ const rec_t* rec) /*!< in: old-style physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the number of owned records. */
UNIV_INLINE
@@ -162,7 +179,8 @@ void
rec_set_n_owned_old(
/*================*/
rec_t* rec, /*!< in: old-style physical record */
- ulint n_owned); /*!< in: the number of owned */
+ ulint n_owned) /*!< in: the number of owned */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to get the number of records owned by the
previous directory record.
@@ -171,7 +189,8 @@ UNIV_INLINE
ulint
rec_get_n_owned_new(
/*================*/
- const rec_t* rec); /*!< in: new-style physical record */
+ const rec_t* rec) /*!< in: new-style physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the number of owned records. */
UNIV_INLINE
@@ -180,7 +199,8 @@ rec_set_n_owned_new(
/*================*/
rec_t* rec, /*!< in/out: new-style physical record */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint n_owned);/*!< in: the number of owned */
+ ulint n_owned)/*!< in: the number of owned */
+ __attribute__((nonnull(1)));
/******************************************************//**
The following function is used to retrieve the info bits of
a record.
@@ -190,7 +210,8 @@ ulint
rec_get_info_bits(
/*==============*/
const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the info bits of a record. */
UNIV_INLINE
@@ -198,7 +219,8 @@ void
rec_set_info_bits_old(
/*==================*/
rec_t* rec, /*!< in: old-style physical record */
- ulint bits); /*!< in: info bits */
+ ulint bits) /*!< in: info bits */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to set the info bits of a record. */
UNIV_INLINE
@@ -206,7 +228,8 @@ void
rec_set_info_bits_new(
/*==================*/
rec_t* rec, /*!< in/out: new-style physical record */
- ulint bits); /*!< in: info bits */
+ ulint bits) /*!< in: info bits */
+ __attribute__((nonnull));
/******************************************************//**
The following function retrieves the status bits of a new-style record.
@return status bits */
@@ -214,7 +237,8 @@ UNIV_INLINE
ulint
rec_get_status(
/*===========*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the status bits of a new-style record. */
@@ -223,7 +247,8 @@ void
rec_set_status(
/*===========*/
rec_t* rec, /*!< in/out: physical record */
- ulint bits); /*!< in: info bits */
+ ulint bits) /*!< in: info bits */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to retrieve the info and status
@@ -234,7 +259,8 @@ ulint
rec_get_info_and_status_bits(
/*=========================*/
const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the info and status
bits of a record. (Only compact records have status bits.) */
@@ -243,7 +269,8 @@ void
rec_set_info_and_status_bits(
/*=========================*/
rec_t* rec, /*!< in/out: compact physical record */
- ulint bits); /*!< in: info bits */
+ ulint bits) /*!< in: info bits */
+ __attribute__((nonnull));
/******************************************************//**
The following function tells if record is delete marked.
@@ -253,7 +280,8 @@ ulint
rec_get_deleted_flag(
/*=================*/
const rec_t* rec, /*!< in: physical record */
- ulint comp); /*!< in: nonzero=compact page format */
+ ulint comp) /*!< in: nonzero=compact page format */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the deleted bit. */
UNIV_INLINE
@@ -261,7 +289,8 @@ void
rec_set_deleted_flag_old(
/*=====================*/
rec_t* rec, /*!< in: old-style physical record */
- ulint flag); /*!< in: nonzero if delete marked */
+ ulint flag) /*!< in: nonzero if delete marked */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to set the deleted bit. */
UNIV_INLINE
@@ -270,7 +299,8 @@ rec_set_deleted_flag_new(
/*=====================*/
rec_t* rec, /*!< in/out: new-style physical record */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint flag); /*!< in: nonzero if delete marked */
+ ulint flag) /*!< in: nonzero if delete marked */
+ __attribute__((nonnull(1)));
/******************************************************//**
The following function tells if a new-style record is a node pointer.
@return TRUE if node pointer */
@@ -278,7 +308,8 @@ UNIV_INLINE
ibool
rec_get_node_ptr_flag(
/*==================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to get the order number
of an old-style record in the heap of the index page.
@@ -287,7 +318,8 @@ UNIV_INLINE
ulint
rec_get_heap_no_old(
/*================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the heap number
field in an old-style record. */
@@ -296,7 +328,8 @@ void
rec_set_heap_no_old(
/*================*/
rec_t* rec, /*!< in: physical record */
- ulint heap_no);/*!< in: the heap number */
+ ulint heap_no)/*!< in: the heap number */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to get the order number
of a new-style record in the heap of the index page.
@@ -305,7 +338,8 @@ UNIV_INLINE
ulint
rec_get_heap_no_new(
/*================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
The following function is used to set the heap number
field in a new-style record. */
@@ -314,7 +348,8 @@ void
rec_set_heap_no_new(
/*================*/
rec_t* rec, /*!< in/out: physical record */
- ulint heap_no);/*!< in: the heap number */
+ ulint heap_no)/*!< in: the heap number */
+ __attribute__((nonnull));
/******************************************************//**
The following function is used to test whether the data offsets
in the record are stored in one-byte or two-byte format.
@@ -323,7 +358,57 @@ UNIV_INLINE
ibool
rec_get_1byte_offs_flag(
/*====================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+The following function is used to set the 1-byte offsets flag. */
+UNIV_INLINE
+void
+rec_set_1byte_offs_flag(
+/*====================*/
+ rec_t* rec, /*!< in: physical record */
+ ibool flag) /*!< in: TRUE if 1byte form */
+ __attribute__((nonnull));
+
+/******************************************************//**
+Returns the offset of nth field end if the record is stored in the 1-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value.
+@return offset of the start of the field, SQL null flag ORed */
+UNIV_INLINE
+ulint
+rec_1_get_field_end_info(
+/*=====================*/
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+Returns the offset of nth field end if the record is stored in the 2-byte
+offsets form. If the field is SQL null, the flag is ORed in the returned
+value.
+@return offset of the start of the field, SQL null flag and extern
+storage flag ORed */
+UNIV_INLINE
+ulint
+rec_2_get_field_end_info(
+/*=====================*/
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
+ __attribute__((nonnull, pure, warn_unused_result));
+
+/******************************************************//**
+Returns nonzero if the field is stored off-page.
+@retval 0 if the field is stored in-page
+@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */
+UNIV_INLINE
+ulint
+rec_2_is_field_extern(
+/*==================*/
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Determine how many of the first n columns in a compact
@@ -333,9 +418,10 @@ UNIV_INTERN
ulint
rec_get_n_extern_new(
/*=================*/
- const rec_t* rec, /*!< in: compact physical record */
- dict_index_t* index, /*!< in: record descriptor */
- ulint n); /*!< in: number of columns to scan */
+ const rec_t* rec, /*!< in: compact physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint n) /*!< in: number of columns to scan */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************//**
The following function determines the offsets to each field
@@ -356,30 +442,13 @@ rec_get_offsets_func(
(ULINT_UNDEFINED if all fields) */
mem_heap_t** heap, /*!< in/out: memory heap */
const char* file, /*!< in: file name where called */
- ulint line); /*!< in: line number where called */
+ ulint line) /*!< in: line number where called */
+ __attribute__((nonnull(1,2,5,6),warn_unused_result));
#define rec_get_offsets(rec,index,offsets,n,heap) \
rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
/******************************************************//**
-Determine the offset to each field in a leaf-page record
-in ROW_FORMAT=COMPACT. This is a special case of
-rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INTERN
-void
-rec_init_offsets_comp_ordinary(
-/*===========================*/
- const rec_t* rec, /*!< in: physical record in
- ROW_FORMAT=COMPACT */
- ulint extra, /*!< in: number of bytes to reserve
- between the record header and
- the data payload
- (usually REC_N_NEW_EXTRA_BYTES) */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets);/*!< in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
-
-/******************************************************//**
The following function determines the offsets to each field
in the record. It can reuse a previously allocated array. */
UNIV_INTERN
@@ -393,9 +462,10 @@ rec_get_offsets_reverse(
const dict_index_t* index, /*!< in: record descriptor */
ulint node_ptr,/*!< in: nonzero=node pointer,
0=leaf node */
- ulint* offsets);/*!< in/out: array consisting of
+ ulint* offsets)/*!< in/out: array consisting of
offsets[0] allocated elements */
-
+ __attribute__((nonnull));
+#ifdef UNIV_DEBUG
/************************************************************//**
Validates offsets returned by rec_get_offsets().
@return TRUE if valid */
@@ -405,9 +475,9 @@ rec_offs_validate(
/*==============*/
const rec_t* rec, /*!< in: record or NULL */
const dict_index_t* index, /*!< in: record descriptor or NULL */
- const ulint* offsets);/*!< in: array returned by
+ const ulint* offsets)/*!< in: array returned by
rec_get_offsets() */
-#ifdef UNIV_DEBUG
+ __attribute__((nonnull(3), warn_unused_result));
/************************************************************//**
Updates debug data in offsets, in order to avoid bogus
rec_offs_validate() failures. */
@@ -417,8 +487,9 @@ rec_offs_make_valid(
/*================*/
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets);/*!< in: array returned by
+ ulint* offsets)/*!< in: array returned by
rec_get_offsets() */
+ __attribute__((nonnull));
#else
# define rec_offs_make_valid(rec, index, offsets) ((void) 0)
#endif /* UNIV_DEBUG */
@@ -433,8 +504,9 @@ rec_get_nth_field_offs_old(
/*=======================*/
const rec_t* rec, /*!< in: record */
ulint n, /*!< in: index of the field */
- ulint* len); /*!< out: length of the field; UNIV_SQL_NULL
+ ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
if SQL null */
+ __attribute__((nonnull));
#define rec_get_nth_field_old(rec, n, len) \
((rec) + rec_get_nth_field_offs_old(rec, n, len))
/************************************************************//**
@@ -447,7 +519,8 @@ ulint
rec_get_nth_field_size(
/*===================*/
const rec_t* rec, /*!< in: record */
- ulint n); /*!< in: index of the field */
+ ulint n) /*!< in: index of the field */
+ __attribute__((nonnull, pure, warn_unused_result));
/************************************************************//**
The following function is used to get an offset to the nth
data field in a record.
@@ -458,8 +531,9 @@ rec_get_nth_field_offs(
/*===================*/
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n, /*!< in: index of the field */
- ulint* len); /*!< out: length of the field; UNIV_SQL_NULL
+ ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
if SQL null */
+ __attribute__((nonnull));
#define rec_get_nth_field(rec, offsets, n, len) \
((rec) + rec_get_nth_field_offs(offsets, n, len))
/******************************************************//**
@@ -470,7 +544,8 @@ UNIV_INLINE
ulint
rec_offs_comp(
/*==========*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Determine if the offsets are for a record containing
externally stored columns.
@@ -479,8 +554,8 @@ UNIV_INLINE
ulint
rec_offs_any_extern(
/*================*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Determine if the offsets are for a record containing null BLOB pointers.
@return first field containing a null BLOB pointer, or NULL if none found */
@@ -490,8 +565,7 @@ rec_offs_any_null_extern(
/*=====================*/
const rec_t* rec, /*!< in: record */
const ulint* offsets) /*!< in: rec_get_offsets(rec) */
- __attribute__((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
@return nonzero if externally stored */
@@ -500,7 +574,8 @@ ulint
rec_offs_nth_extern(
/*================*/
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n); /*!< in: nth field */
+ ulint n) /*!< in: nth field */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Returns nonzero if the SQL NULL bit is set in nth field of rec.
@return nonzero if SQL NULL */
@@ -509,7 +584,8 @@ ulint
rec_offs_nth_sql_null(
/*==================*/
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n); /*!< in: nth field */
+ ulint n) /*!< in: nth field */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Gets the physical size of a field.
@return length of field */
@@ -518,7 +594,8 @@ ulint
rec_offs_nth_size(
/*==============*/
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n); /*!< in: nth field */
+ ulint n) /*!< in: nth field */
+ __attribute__((nonnull, pure, warn_unused_result));
/******************************************************//**
Returns the number of extern bits set in a record.
@@ -527,7 +604,8 @@ UNIV_INLINE
ulint
rec_offs_n_extern(
/*==============*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/***********************************************************//**
This is used to modify the value of an already existing field in a record.
The previous value must have exactly the same size as the new value. If len
@@ -542,11 +620,12 @@ rec_set_nth_field(
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n, /*!< in: index number of the field */
const void* data, /*!< in: pointer to the data if not SQL null */
- ulint len); /*!< in: length of the data or UNIV_SQL_NULL.
+ ulint len) /*!< in: length of the data or UNIV_SQL_NULL.
If not SQL null, must have the same
length as the previous value.
If SQL null, previous value must be
SQL null. */
+ __attribute__((nonnull(1,2)));
/**********************************************************//**
The following function returns the data size of an old-style physical
record, that is the sum of field lengths. SQL null fields
@@ -557,7 +636,8 @@ UNIV_INLINE
ulint
rec_get_data_size_old(
/*==================*/
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
The following function returns the number of allocated elements
for an array of offsets.
@@ -566,7 +646,8 @@ UNIV_INLINE
ulint
rec_offs_get_n_alloc(
/*=================*/
- const ulint* offsets);/*!< in: array for rec_get_offsets() */
+ const ulint* offsets)/*!< in: array for rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
The following function sets the number of allocated elements
for an array of offsets. */
@@ -576,7 +657,8 @@ rec_offs_set_n_alloc(
/*=================*/
ulint* offsets, /*!< out: array for rec_get_offsets(),
must be allocated */
- ulint n_alloc); /*!< in: number of elements */
+ ulint n_alloc) /*!< in: number of elements */
+ __attribute__((nonnull));
#define rec_offs_init(offsets) \
rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
/**********************************************************//**
@@ -586,7 +668,8 @@ UNIV_INLINE
ulint
rec_offs_n_fields(
/*==============*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
The following function returns the data size of a physical
record, that is the sum of field lengths. SQL null fields
@@ -597,7 +680,8 @@ UNIV_INLINE
ulint
rec_offs_data_size(
/*===============*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
Returns the total size of record minus data size of record.
The value returned by the function is the distance from record
@@ -607,7 +691,8 @@ UNIV_INLINE
ulint
rec_offs_extra_size(
/*================*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
Returns the total size of a physical record.
@return size */
@@ -615,7 +700,8 @@ UNIV_INLINE
ulint
rec_offs_size(
/*==========*/
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
#ifdef UNIV_DEBUG
/**********************************************************//**
Returns a pointer to the start of the record.
@@ -625,7 +711,8 @@ byte*
rec_get_start(
/*==========*/
const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
/**********************************************************//**
Returns a pointer to the end of the record.
@return pointer to end */
@@ -634,7 +721,8 @@ byte*
rec_get_end(
/*========*/
const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull, pure, warn_unused_result));
#else /* UNIV_DEBUG */
# define rec_get_start(rec, offsets) ((rec) - rec_offs_extra_size(offsets))
# define rec_get_end(rec, offsets) ((rec) + rec_offs_data_size(offsets))
@@ -648,8 +736,48 @@ rec_copy(
/*=====*/
void* buf, /*!< in: buffer */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Determines the size of a data tuple prefix in a temporary file.
+@return total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_temp(
+/*========================*/
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint* extra) /*!< out: extra size */
+ __attribute__((warn_unused_result, nonnull));
+
+/******************************************************//**
+Determine the offset to each field in temporary file.
+@see rec_convert_dtuple_to_temp() */
+UNIV_INTERN
+void
+rec_init_offsets_temp(
+/*==================*/
+ const rec_t* rec, /*!< in: temporary file record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets)/*!< in/out: array of offsets;
+ in: n=rec_offs_n_fields(offsets) */
+ __attribute__((nonnull));
+
+/*********************************************************//**
+Builds a temporary file record out of a data tuple.
+@see rec_init_offsets_temp() */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_temp(
+/*=======================*/
+ rec_t* rec, /*!< out: record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields) /*!< in: number of fields */
+ __attribute__((nonnull));
+
/**************************************************************//**
Copies the first n fields of a physical record to a new physical record in
a buffer.
@@ -665,7 +793,8 @@ rec_copy_prefix_to_buf(
byte** buf, /*!< in/out: memory buffer
for the copied prefix,
or NULL */
- ulint* buf_size); /*!< in/out: buffer size */
+ ulint* buf_size) /*!< in/out: buffer size */
+ __attribute__((nonnull));
/************************************************************//**
Folds a prefix of a physical record to a ulint.
@return the folded value */
@@ -681,24 +810,9 @@ rec_fold(
ulint n_bytes, /*!< in: number of bytes to fold
in an incomplete last field */
index_id_t tree_id) /*!< in: index tree id */
- __attribute__((pure));
+ __attribute__((nonnull, pure, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/*********************************************************//**
-Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INTERN
-void
-rec_convert_dtuple_to_rec_comp(
-/*===========================*/
- rec_t* rec, /*!< in: origin of record */
- ulint extra, /*!< in: number of bytes to
- reserve between the record
- header and the data payload
- (normally REC_N_NEW_EXTRA_BYTES) */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint status, /*!< in: status bits of the record */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields);/*!< in: number of data fields */
-/*********************************************************//**
Builds a physical record out of a data tuple and
stores it into the given buffer.
@return pointer to the origin of physical record */
@@ -710,8 +824,9 @@ rec_convert_dtuple_to_rec(
physical record */
const dict_index_t* index, /*!< in: record descriptor */
const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext); /*!< in: number of
+ ulint n_ext) /*!< in: number of
externally stored columns */
+ __attribute__((nonnull, warn_unused_result));
/**********************************************************//**
Returns the extra size of an old-style physical record if we know its
data size and number of fields.
@@ -723,7 +838,7 @@ rec_get_converted_extra_size(
ulint data_size, /*!< in: data size */
ulint n_fields, /*!< in: number of fields */
ulint n_ext) /*!< in: number of externally stored columns */
- __attribute__((const));
+ __attribute__((const));
/**********************************************************//**
Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
@return total size */
@@ -731,13 +846,11 @@ UNIV_INTERN
ulint
rec_get_converted_size_comp_prefix(
/*===============================*/
- const dict_index_t* index, /*!< in: record descriptor;
- dict_table_is_comp() is
- assumed to hold, even if
- it does not */
+ const dict_index_t* index, /*!< in: record descriptor */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
- ulint* extra); /*!< out: extra size */
+ ulint* extra) /*!< out: extra size */
+ __attribute__((warn_unused_result, nonnull(1,2)));
/**********************************************************//**
Determines the size of a data tuple in ROW_FORMAT=COMPACT.
@return total size */
@@ -752,7 +865,8 @@ rec_get_converted_size_comp(
ulint status, /*!< in: status bits of the record */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
- ulint* extra); /*!< out: extra size */
+ ulint* extra) /*!< out: extra size */
+ __attribute__((nonnull(1,3)));
/**********************************************************//**
The following function returns the size of a data tuple when converted to
a physical record.
@@ -763,7 +877,8 @@ rec_get_converted_size(
/*===================*/
dict_index_t* index, /*!< in: record descriptor */
const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext); /*!< in: number of externally stored columns */
+ ulint n_ext) /*!< in: number of externally stored columns */
+ __attribute__((warn_unused_result, nonnull));
#ifndef UNIV_HOTBACKUP
/**************************************************************//**
Copies the first n fields of a physical record to a data tuple.
@@ -777,7 +892,8 @@ rec_copy_prefix_to_dtuple(
const dict_index_t* index, /*!< in: record descriptor */
ulint n_fields, /*!< in: number of fields
to copy */
- mem_heap_t* heap); /*!< in: memory heap */
+ mem_heap_t* heap) /*!< in: memory heap */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/***************************************************************//**
Validates the consistency of a physical record.
@@ -787,7 +903,8 @@ ibool
rec_validate(
/*=========*/
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull));
/***************************************************************//**
Prints an old-style physical record. */
UNIV_INTERN
@@ -795,7 +912,8 @@ void
rec_print_old(
/*==========*/
FILE* file, /*!< in: file where to print */
- const rec_t* rec); /*!< in: physical record */
+ const rec_t* rec) /*!< in: physical record */
+ __attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
/***************************************************************//**
Prints a physical record in ROW_FORMAT=COMPACT. Ignores the
@@ -806,7 +924,8 @@ rec_print_comp(
/*===========*/
FILE* file, /*!< in: file where to print */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull));
/***************************************************************//**
Prints a physical record. */
UNIV_INTERN
@@ -815,7 +934,8 @@ rec_print_new(
/*==========*/
FILE* file, /*!< in: file where to print */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull));
/***************************************************************//**
Prints a physical record. */
UNIV_INTERN
@@ -824,7 +944,21 @@ rec_print(
/*======*/
FILE* file, /*!< in: file where to print */
const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index); /*!< in: record descriptor */
+ const dict_index_t* index) /*!< in: record descriptor */
+ __attribute__((nonnull));
+
+# ifdef UNIV_DEBUG
+/************************************************************//**
+Reads the DB_TRX_ID of a clustered index record.
+@return the value of DB_TRX_ID */
+UNIV_INTERN
+trx_id_t
+rec_get_trx_id(
+/*===========*/
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index) /*!< in: clustered index */
+ __attribute__((nonnull, warn_unused_result));
+# endif /* UNIV_DEBUG */
#endif /* UNIV_HOTBACKUP */
/* Maximum lengths for the data in a physical record if the offsets
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index 6950263fe81..18a7deb9d26 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -118,17 +118,6 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_INFO_BITS_MASK 0xF0UL
#define REC_INFO_BITS_SHIFT 0
-/* The following masks are used to filter the SQL null bit from
-one-byte and two-byte offsets */
-
-#define REC_1BYTE_SQL_NULL_MASK 0x80UL
-#define REC_2BYTE_SQL_NULL_MASK 0x8000UL
-
-/* In a 2-byte offset the second most significant bit denotes
-a field stored to another page: */
-
-#define REC_2BYTE_EXTERN_MASK 0x4000UL
-
#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \
^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \
^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \
@@ -883,6 +872,20 @@ rec_2_get_field_end_info(
return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
}
+/******************************************************//**
+Returns nonzero if the field is stored off-page.
+@retval 0 if the field is stored in-page
+@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */
+UNIV_INLINE
+ulint
+rec_2_is_field_extern(
+/*==================*/
+ const rec_t* rec, /*!< in: record */
+ ulint n) /*!< in: field index */
+{
+ return(rec_2_get_field_end_info(rec, n) & REC_2BYTE_EXTERN_MASK);
+}
+
/* Get the base address of offsets. The extra_size is stored at
this position, and following positions hold the end offsets of
the fields. */
@@ -1084,7 +1087,6 @@ rec_offs_any_extern(
return(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL);
}
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
/******************************************************//**
Determine if the offsets are for a record containing null BLOB pointers.
@return first field containing a null BLOB pointer, or NULL if none found */
@@ -1120,7 +1122,6 @@ rec_offs_any_null_extern(
return(NULL);
}
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h
index 2f1ead43c07..f8133f77466 100644
--- a/storage/innobase/include/rem0types.h
+++ b/storage/innobase/include/rem0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -34,6 +34,15 @@ typedef byte rec_t;
#define REC_MAX_HEAP_NO (2 * 8192 - 1)
#define REC_MAX_N_OWNED (16 - 1)
+/* Maximum number of user defined fields/columns. The reserved columns
+are the ones InnoDB adds internally: DB_ROW_ID, DB_TRX_ID, DB_ROLL_PTR.
+We need "* 2" because mlog_parse_index() creates a dummy table object
+possibly, with some of the system columns in it, and then adds the 3
+system columns (again) using dict_table_add_system_columns(). The problem
+is that mlog_parse_index() cannot recognize the system columns by
+just having n_fields, n_uniq and the lengths of the columns. */
+#define REC_MAX_N_USER_FIELDS (REC_MAX_N_FIELDS - DATA_N_SYS_COLS * 2)
+
/* REC_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
indexed field length (or indexed prefix length) for indexes on tables of
ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT format.
diff --git a/storage/innobase/include/row0ext.h b/storage/innobase/include/row0ext.h
index 60aaf16c09a..a098e2f9b29 100644
--- a/storage/innobase/include/row0ext.h
+++ b/storage/innobase/include/row0ext.h
@@ -84,7 +84,7 @@ row_ext_lookup(
DICT_MAX_FIELD_LEN_BY_FORMAT() */
/** Prefixes of externally stored columns */
-struct row_ext_struct{
+struct row_ext_t{
ulint n_ext; /*!< number of externally stored columns */
const ulint* ext; /*!< col_no's of externally stored columns */
byte* buf; /*!< backing store of the column prefix cache */
diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h
index cc5efea026f..4a486450efc 100644
--- a/storage/innobase/include/row0ftsort.h
+++ b/storage/innobase/include/row0ftsort.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -56,16 +56,16 @@ typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t;
#define FTS_PLL_MERGE 1
/** Sort information passed to each individual parallel sort thread */
-typedef struct fts_psort_struct fts_psort_t;
+struct fts_psort_t;
/** Common info passed to each parallel sort thread */
-struct fts_psort_common_struct {
- struct TABLE* table; /*!< MySQL table */
+struct fts_psort_common_t {
+ row_merge_dup_t* dup; /*!< descriptor of FTS index */
dict_table_t* new_table; /*!< source table */
trx_t* trx; /*!< transaction */
- dict_index_t* sort_index; /*!< FTS index */
fts_psort_t* all_info; /*!< all parallel sort info */
os_event_t sort_event; /*!< sort event */
+ os_event_t merge_event; /*!< merge event */
ibool opt_doc_id_size;/*!< whether to use 4 bytes
instead of 8 bytes integer to
store Doc ID during sort, if
@@ -73,9 +73,7 @@ struct fts_psort_common_struct {
to use 8 bytes value */
};
-typedef struct fts_psort_common_struct fts_psort_common_t;
-
-struct fts_psort_struct {
+struct fts_psort_t {
ulint psort_id; /*!< Parallel sort ID */
row_merge_buf_t* merge_buf[FTS_NUM_AUX_INDEX];
/*!< sort buffer */
@@ -89,6 +87,7 @@ struct fts_psort_struct {
ulint state; /*!< child thread state */
fts_doc_list_t fts_doc_list; /*!< doc list to process */
fts_psort_common_t* psort_common; /*!< ptr to all psort info */
+ os_thread_t thread_hdl; /*!< thread handler */
};
/** Structure stores information from string tokenization operation */
@@ -126,6 +125,7 @@ typedef struct fts_psort_insert fts_psort_insert_t;
/** status bit used for communication between parent and child thread */
#define FTS_PARENT_COMPLETE 1
#define FTS_CHILD_COMPLETE 1
+#define FTS_CHILD_EXITING 2
/** Print some debug information */
#define FTSORT_PRINT
@@ -171,18 +171,19 @@ ibool
row_fts_psort_info_init(
/*====================*/
trx_t* trx, /*!< in: transaction */
- struct TABLE* table, /*!< in: MySQL table object */
+ row_merge_dup_t* dup, /*!< in,own: descriptor of
+ FTS index being created */
const dict_table_t* new_table,/*!< in: table where indexes are
created */
- dict_index_t* index, /*!< in: FTS index to be created */
ibool opt_doc_id_size,
/*!< in: whether to use 4 bytes
instead of 8 bytes integer to
store Doc ID during sort */
fts_psort_t** psort, /*!< out: parallel sort info to be
instantiated */
- fts_psort_t** merge); /*!< out: parallel merge info
+ fts_psort_t** merge) /*!< out: parallel merge info
to be instantiated */
+ __attribute__((nonnull));
/********************************************************************//**
Clean up and deallocate FTS parallel sort structures, and close
temparary merge sort files */
@@ -231,19 +232,6 @@ row_fts_start_parallel_merge(
/*=========================*/
fts_psort_t* merge_info); /*!< in: parallel sort info */
/********************************************************************//**
-Insert processed FTS data to the auxillary tables.
-@return DB_SUCCESS if insertion runs fine */
-UNIV_INTERN
-ulint
-row_merge_write_fts_word(
-/*=====================*/
- trx_t* trx, /*!< in: transaction */
- que_t** ins_graph, /*!< in: Insert query graphs */
- fts_tokenizer_word_t*word, /*!< in: sorted and tokenized
- word */
- fts_table_t* fts_table, /*!< in: fts aux table instance */
- CHARSET_INFO* charset); /*!< in: charset */
-/********************************************************************//**
Read sorted FTS data files and insert data tuples to auxillary tables.
@return DB_SUCCESS or error number */
UNIV_INTERN
@@ -275,13 +263,13 @@ Read sorted file containing index data tuples and insert these data
tuples to the index
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
row_fts_merge_insert(
/*=================*/
dict_index_t* index, /*!< in: index */
dict_table_t* table, /*!< in: new table */
fts_psort_t* psort_info, /*!< parallel sort info */
- ulint id); /* !< in: which auxiliary table's data
+ ulint id) /* !< in: which auxiliary table's data
to insert to */
-
+ __attribute__((nonnull));
#endif /* row0ftsort_h */
diff --git a/storage/innobase/include/row0import.h b/storage/innobase/include/row0import.h
new file mode 100644
index 00000000000..aa46fdb7c27
--- /dev/null
+++ b/storage/innobase/include/row0import.h
@@ -0,0 +1,91 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0import.h
+Header file for import tablespace functions.
+
+Created 2012-02-08 by Sunny Bains
+*******************************************************/
+
+#ifndef row0import_h
+#define row0import_h
+
+#include "univ.i"
+#include "db0err.h"
+#include "dict0types.h"
+
+// Forward declarations
+struct trx_t;
+struct dict_table_t;
+struct row_prebuilt_t;
+
+/*****************************************************************//**
+Imports a tablespace. The space id in the .ibd file must match the space id
+of the table in the data dictionary.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_import_for_mysql(
+/*=================*/
+ dict_table_t* table, /*!< in/out: table */
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct
+ in MySQL */
+ __attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
+@return DB_SUCCESS or error code. */
+UNIV_INTERN
+dberr_t
+row_import_update_discarded_flag(
+/*=============================*/
+ trx_t* trx, /*!< in/out: transaction that
+ covers the update */
+ table_id_t table_id, /*!< in: Table for which we want
+ to set the root table->flags2 */
+ bool discarded, /*!< in: set MIX_LEN column bit
+ to discarded, if true */
+ bool dict_locked) /*!< in: Set to true if the
+ caller already owns the
+ dict_sys_t:: mutex. */
+ __attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Update the (space, root page) of a table's indexes from the values
+in the data dictionary.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_import_update_index_root(
+/*=========================*/
+ trx_t* trx, /*!< in/out: transaction that
+ covers the update */
+ const dict_table_t* table, /*!< in: Table for which we want
+ to set the root page_no */
+ bool reset, /*!< in: if true then set to
+ FIL_NUL */
+ bool dict_locked) /*!< in: Set to true if the
+ caller already owns the
+ dict_sys_t:: mutex. */
+ __attribute__((nonnull, warn_unused_result));
+#ifndef UNIV_NONINL
+#include "row0import.ic"
+#endif
+
+#endif /* row0import_h */
diff --git a/storage/innobase/include/row0import.ic b/storage/innobase/include/row0import.ic
new file mode 100644
index 00000000000..c5bbab49f6f
--- /dev/null
+++ b/storage/innobase/include/row0import.ic
@@ -0,0 +1,25 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0import.ic
+
+Import tablespace inline functions.
+
+Created 2012-02-08 Sunny Bains
+*******************************************************/
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
index 54ad7241a4f..2a892d2f5df 100644
--- a/storage/innobase/include/row0ins.h
+++ b/storage/innobase/include/row0ins.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -40,7 +40,7 @@ the caller must have a shared latch on dict_foreign_key_check_lock.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or
DB_ROW_IS_REFERENCED */
UNIV_INTERN
-ulint
+dberr_t
row_ins_check_foreign_constraint(
/*=============================*/
ibool check_ref,/*!< in: TRUE If we want to check that
@@ -52,7 +52,8 @@ row_ins_check_foreign_constraint(
dict_table_t* table, /*!< in: if check_ref is TRUE, then the foreign
table, else the referenced table */
dtuple_t* entry, /*!< in: index entry for index */
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Creates an insert node struct.
@return own: insert node struct */
@@ -74,21 +75,110 @@ ins_node_set_new_row(
ins_node_t* node, /*!< in: insert node */
dtuple_t* row); /*!< in: new row (or first row) for the node */
/***************************************************************//**
-Inserts an index entry to index. Tries first optimistic, then pessimistic
-descent down the tree. If the entry matches enough to a delete marked record,
-performs the insert by updating or delete unmarking the delete marked
-record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+Tries to insert an entry into a clustered index, ignoring foreign key
+constraints. If a record with the same unique key is found, the other
+record is necessarily marked deleted by a committed transaction, or a
+unique key violation error occurs. The delete marked record is then
+updated to an existing record, and we must write an undo log record on
+the delete marked record.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
UNIV_INTERN
-ulint
-row_ins_index_entry(
-/*================*/
- dict_index_t* index, /*!< in: index */
+dberr_t
+row_ins_clust_index_entry_low(
+/*==========================*/
+ ulint flags, /*!< in: undo logging and locking flags */
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ depending on whether we wish optimistic or
+ pessimistic descent down the index tree */
+ dict_index_t* index, /*!< in: clustered index */
+ ulint n_uniq, /*!< in: 0 or index->n_uniq */
dtuple_t* entry, /*!< in/out: index entry to insert */
ulint n_ext, /*!< in: number of externally stored columns */
- ibool foreign,/*!< in: TRUE=check foreign key constraints
- (foreign=FALSE only during CREATE INDEX) */
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread or NULL */
+ __attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Tries to insert an entry into a secondary index. If a record with exactly the
+same fields is found, the other record is necessarily marked deleted.
+It is then unmarked. Otherwise, the entry is just inserted to the index.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
+UNIV_INTERN
+dberr_t
+row_ins_sec_index_entry_low(
+/*========================*/
+ ulint flags, /*!< in: undo logging and locking flags */
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ depending on whether we wish optimistic or
+ pessimistic descent down the index tree */
+ dict_index_t* index, /*!< in: secondary index */
+ mem_heap_t* offsets_heap,
+ /*!< in/out: memory heap that can be emptied */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ dtuple_t* entry, /*!< in/out: index entry to insert */
+ trx_id_t trx_id, /*!< in: PAGE_MAX_TRX_ID during
+ row_log_table_apply(), or 0 */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Tries to insert the externally stored fields (off-page columns)
+of a clustered index entry.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+UNIV_INTERN
+dberr_t
+row_ins_index_entry_big_rec_func(
+/*=============================*/
+ const dtuple_t* entry, /*!< in/out: index entry to insert */
+ const big_rec_t* big_rec,/*!< in: externally stored fields */
+ ulint* offsets,/*!< in/out: rec offsets */
+ mem_heap_t** heap, /*!< in/out: memory heap */
+ dict_index_t* index, /*!< in: index */
+ const char* file, /*!< in: file name of caller */
+#ifndef DBUG_OFF
+ const void* thd, /*!< in: connection, or NULL */
+#endif /* DBUG_OFF */
+ ulint line) /*!< in: line number of caller */
+ __attribute__((nonnull(1,2,3,4,5,6), warn_unused_result));
+#ifdef DBUG_OFF
+# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
+ row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,line)
+#else /* DBUG_OFF */
+# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
+ row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,thd,line)
+#endif /* DBUG_OFF */
+/***************************************************************//**
+Inserts an entry into a clustered index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
+dberr_t
+row_ins_clust_index_entry(
+/*======================*/
+ dict_index_t* index, /*!< in: clustered index */
+ dtuple_t* entry, /*!< in/out: index entry to insert */
+ que_thr_t* thr, /*!< in: query thread */
+ ulint n_ext) /*!< in: number of externally stored columns */
+ __attribute__((nonnull, warn_unused_result));
+/***************************************************************//**
+Inserts an entry into a secondary index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
+dberr_t
+row_ins_sec_index_entry(
+/*====================*/
+ dict_index_t* index, /*!< in: secondary index */
+ dtuple_t* entry, /*!< in/out: index entry to insert */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************//**
Inserts a row to a table. This is a high-level function used in
SQL execution graphs.
@@ -98,17 +188,10 @@ que_thr_t*
row_ins_step(
/*=========*/
que_thr_t* thr); /*!< in: query thread */
-/***********************************************************//**
-Creates an entry template for each index of a table. */
-UNIV_INTERN
-void
-ins_node_create_entry_list(
-/*=======================*/
- ins_node_t* node); /*!< in: row insert node */
/* Insert node structure */
-struct ins_node_struct{
+struct ins_node_t{
que_common_t common; /*!< node type: QUE_NODE_INSERT */
ulint ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
dtuple_t* row; /*!< row to insert */
diff --git a/storage/innobase/include/row0log.h b/storage/innobase/include/row0log.h
new file mode 100644
index 00000000000..984d907d390
--- /dev/null
+++ b/storage/innobase/include/row0log.h
@@ -0,0 +1,241 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0log.h
+Modification log for online index creation and online table rebuild
+
+Created 2011-05-26 Marko Makela
+*******************************************************/
+
+#ifndef row0log_h
+#define row0log_h
+
+#include "univ.i"
+#include "mtr0types.h"
+#include "row0types.h"
+#include "rem0types.h"
+#include "data0types.h"
+#include "dict0types.h"
+#include "trx0types.h"
+#include "que0types.h"
+
+/******************************************************//**
+Allocate the row log for an index and flag the index
+for online creation.
+@retval true if success, false if not */
+UNIV_INTERN
+bool
+row_log_allocate(
+/*=============*/
+ dict_index_t* index, /*!< in/out: index */
+ dict_table_t* table, /*!< in/out: new table being rebuilt,
+ or NULL when creating a secondary index */
+ bool same_pk,/*!< in: whether the definition of the
+ PRIMARY KEY has remained the same */
+ const dtuple_t* add_cols,
+ /*!< in: default values of
+ added columns, or NULL */
+ const ulint* col_map)/*!< in: mapping of old column
+ numbers to new ones, or NULL if !table */
+ __attribute__((nonnull(1), warn_unused_result));
+
+/******************************************************//**
+Free the row log for an index that was being created online. */
+UNIV_INTERN
+void
+row_log_free(
+/*=========*/
+ row_log_t*& log) /*!< in,own: row log */
+ __attribute__((nonnull));
+
+/******************************************************//**
+Free the row log for an index on which online creation was aborted. */
+UNIV_INLINE
+void
+row_log_abort_sec(
+/*==============*/
+ dict_index_t* index) /*!< in/out: index (x-latched) */
+ __attribute__((nonnull));
+
+/******************************************************//**
+Try to log an operation to a secondary index that is
+(or was) being created.
+@retval true if the operation was logged or can be ignored
+@retval false if online index creation is not taking place */
+UNIV_INLINE
+bool
+row_log_online_op_try(
+/*==================*/
+ dict_index_t* index, /*!< in/out: index, S or X latched */
+ const dtuple_t* tuple, /*!< in: index tuple */
+ trx_id_t trx_id) /*!< in: transaction ID for insert,
+ or 0 for delete */
+ __attribute__((nonnull, warn_unused_result));
+/******************************************************//**
+Logs an operation to a secondary index that is (or was) being created. */
+UNIV_INTERN
+void
+row_log_online_op(
+/*==============*/
+ dict_index_t* index, /*!< in/out: index, S or X latched */
+ const dtuple_t* tuple, /*!< in: index tuple */
+ trx_id_t trx_id) /*!< in: transaction ID for insert,
+ or 0 for delete */
+ UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Gets the error status of the online index rebuild log.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_log_table_get_error(
+/*====================*/
+ const dict_index_t* index) /*!< in: clustered index of a table
+ that is being rebuilt online */
+ __attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Logs a delete operation to a table that is being rebuilt.
+This will be merged in row_log_table_apply_delete(). */
+UNIV_INTERN
+void
+row_log_table_delete(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ trx_id_t trx_id) /*!< in: DB_TRX_ID of the record before
+ it was deleted */
+ UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Logs an update operation to a table that is being rebuilt.
+This will be merged in row_log_table_apply_update(). */
+UNIV_INTERN
+void
+row_log_table_update(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ const dtuple_t* old_pk) /*!< in: row_log_table_get_pk()
+ before the update */
+ UNIV_COLD __attribute__((nonnull(1,2,3)));
+
+/******************************************************//**
+Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
+of a table that is being rebuilt.
+@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
+or NULL if the PRIMARY KEY definition does not change */
+UNIV_INTERN
+const dtuple_t*
+row_log_table_get_pk(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index),
+ or NULL */
+ mem_heap_t** heap) /*!< in/out: memory heap where allocated */
+ UNIV_COLD __attribute__((nonnull(1,2,4), warn_unused_result));
+
+/******************************************************//**
+Logs an insert to a table that is being rebuilt.
+This will be merged in row_log_table_apply_insert(). */
+UNIV_INTERN
+void
+row_log_table_insert(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec,index) */
+ UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Notes that a transaction is being rolled back. */
+UNIV_INTERN
+void
+row_log_table_rollback(
+/*===================*/
+ dict_index_t* index, /*!< in/out: clustered index */
+ trx_id_t trx_id) /*!< in: transaction being rolled back */
+ UNIV_COLD __attribute__((nonnull));
+
+/******************************************************//**
+Check if a transaction rollback has been initiated.
+@return true if inserts of this transaction were rolled back */
+UNIV_INTERN
+bool
+row_log_table_is_rollback(
+/*======================*/
+ const dict_index_t* index, /*!< in: clustered index */
+ trx_id_t trx_id) /*!< in: transaction id */
+ __attribute__((nonnull));
+
+/******************************************************//**
+Apply the row_log_table log to a table upon completing rebuild.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_table_apply(
+/*================*/
+ que_thr_t* thr, /*!< in: query graph */
+ dict_table_t* old_table,
+ /*!< in: old table */
+ struct TABLE* table) /*!< in/out: MySQL table
+ (for reporting duplicates) */
+ __attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Get the latest transaction ID that has invoked row_log_online_op()
+during online creation.
+@return latest transaction ID, or 0 if nothing was logged */
+UNIV_INTERN
+trx_id_t
+row_log_get_max_trx(
+/*================*/
+ dict_index_t* index) /*!< in: index, must be locked */
+ __attribute__((nonnull, warn_unused_result));
+
+/******************************************************//**
+Merge the row log to the index upon completing index creation.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_apply(
+/*==========*/
+ trx_t* trx, /*!< in: transaction (for checking if
+ the operation was interrupted) */
+ dict_index_t* index, /*!< in/out: secondary index */
+ struct TABLE* table) /*!< in/out: MySQL table
+ (for reporting duplicates) */
+ __attribute__((nonnull, warn_unused_result));
+
+#ifndef UNIV_NONINL
+#include "row0log.ic"
+#endif
+
+#endif /* row0log.h */
diff --git a/storage/innobase/include/row0log.ic b/storage/innobase/include/row0log.ic
new file mode 100644
index 00000000000..b0f37dbd8e7
--- /dev/null
+++ b/storage/innobase/include/row0log.ic
@@ -0,0 +1,84 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0log.ic
+Modification log for online index creation and online table rebuild
+
+Created 2012-10-18 Marko Makela
+*******************************************************/
+
+#include "dict0dict.h"
+
+/******************************************************//**
+Free the row log for an index on which online creation was aborted. */
+UNIV_INLINE
+void
+row_log_abort_sec(
+/*===============*/
+ dict_index_t* index) /*!< in/out: index (x-latched) */
+{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ ut_ad(!dict_index_is_clust(index));
+ dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
+ row_log_free(index->online_log);
+}
+
+/******************************************************//**
+Try to log an operation to a secondary index that is
+(or was) being created.
+@retval true if the operation was logged or can be ignored
+@retval false if online index creation is not taking place */
+UNIV_INLINE
+bool
+row_log_online_op_try(
+/*==================*/
+ dict_index_t* index, /*!< in/out: index, S or X latched */
+ const dtuple_t* tuple, /*!< in: index tuple */
+ trx_id_t trx_id) /*!< in: transaction ID for insert,
+ or 0 for delete */
+{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
+ || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_COMPLETE:
+ /* This is a normal index. Do not log anything.
+ The caller must perform the operation on the
+ index tree directly. */
+ return(false);
+ case ONLINE_INDEX_CREATION:
+ /* The index is being created online. Log the
+ operation. */
+ row_log_online_op(index, tuple, trx_id);
+ break;
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ /* The index was created online, but the operation was
+ aborted. Do not log the operation and tell the caller
+ to skip the operation. */
+ break;
+ }
+
+ return(true);
+}
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index c4e2f5ddf41..f464e46ae5b 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -40,15 +40,17 @@ Created 13/06/2005 Jan Lindstrom
#include "lock0types.h"
#include "srv0srv.h"
+// Forward declaration
+struct ib_sequence_t;
+
/** @brief Block size for I/O operations in merge sort.
The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
rounded to a power of 2.
When not creating a PRIMARY KEY that contains column prefixes, this
-can be set as small as UNIV_PAGE_SIZE / 2. See the comment above
-ut_ad(data_size < sizeof(row_merge_block_t)). */
-typedef byte row_merge_block_t;
+can be set as small as UNIV_PAGE_SIZE / 2. */
+typedef byte row_merge_block_t;
/** @brief Secondary buffer for I/O operations of merge records.
@@ -64,114 +66,146 @@ The format is the same as a record in ROW_FORMAT=COMPACT with the
exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
typedef byte mrec_t;
+/** Merge record in row_merge_buf_t */
+struct mtuple_t {
+ dfield_t* fields; /*!< data fields */
+};
+
/** Buffer for sorting in main memory. */
-struct row_merge_buf_struct {
+struct row_merge_buf_t {
mem_heap_t* heap; /*!< memory heap where allocated */
dict_index_t* index; /*!< the index the tuples belong to */
ulint total_size; /*!< total amount of data bytes */
ulint n_tuples; /*!< number of data tuples */
ulint max_tuples; /*!< maximum number of data tuples */
- const dfield_t**tuples; /*!< array of pointers to
- arrays of fields that form
- the data tuples */
- const dfield_t**tmp_tuples; /*!< temporary copy of tuples,
+ mtuple_t* tuples; /*!< array of data tuples */
+ mtuple_t* tmp_tuples; /*!< temporary copy of tuples,
for sorting */
};
-/** Buffer for sorting in main memory. */
-typedef struct row_merge_buf_struct row_merge_buf_t;
-
/** Information about temporary files used in merge sort */
-struct merge_file_struct {
+struct merge_file_t {
int fd; /*!< file descriptor */
ulint offset; /*!< file offset (end of file) */
ib_uint64_t n_rec; /*!< number of records in the file */
};
-/** Information about temporary files used in merge sort */
-typedef struct merge_file_struct merge_file_t;
-
/** Index field definition */
-struct merge_index_field_struct {
+struct index_field_t {
+ ulint col_no; /*!< column offset */
ulint prefix_len; /*!< column prefix length, or 0
if indexing the whole column */
- const char* field_name; /*!< field name */
};
-/** Index field definition */
-typedef struct merge_index_field_struct merge_index_field_t;
-
/** Definition of an index being created */
-struct merge_index_def_struct {
- const char* name; /*!< index name */
- ulint ind_type; /*!< 0, DICT_UNIQUE,
- or DICT_CLUSTERED */
- ulint n_fields; /*!< number of fields
- in index */
- merge_index_field_t* fields; /*!< field definitions */
+struct index_def_t {
+ const char* name; /*!< index name */
+ ulint ind_type; /*!< 0, DICT_UNIQUE,
+ or DICT_CLUSTERED */
+ ulint key_number; /*!< MySQL key number,
+ or ULINT_UNDEFINED if none */
+ ulint n_fields; /*!< number of fields in index */
+ index_field_t* fields; /*!< field definitions */
};
-/** Definition of an index being created */
-typedef struct merge_index_def_struct merge_index_def_t;
-
/** Structure for reporting duplicate records. */
-struct row_merge_dup_struct {
- const dict_index_t* index; /*!< index being sorted */
- struct TABLE* table; /*!< MySQL table object */
- ulint n_dup; /*!< number of duplicates */
+struct row_merge_dup_t {
+ dict_index_t* index; /*!< index being sorted */
+ struct TABLE* table; /*!< MySQL table object */
+ const ulint* col_map;/*!< mapping of column numbers
+ in table to the rebuilt table
+ (index->table), or NULL if not
+ rebuilding table */
+ ulint n_dup; /*!< number of duplicates */
};
-/** Structure for reporting duplicate records. */
-typedef struct row_merge_dup_struct row_merge_dup_t;
-
+/*************************************************************//**
+Report a duplicate key. */
+UNIV_INTERN
+void
+row_merge_dup_report(
+/*=================*/
+ row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */
+ const dfield_t* entry) /*!< in: duplicate index entry */
+ __attribute__((nonnull));
/*********************************************************************//**
Sets an exclusive lock on a table, for the duration of creating indexes.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_merge_lock_table(
/*=================*/
trx_t* trx, /*!< in/out: transaction */
dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode); /*!< in: LOCK_X or LOCK_S */
+ enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
-Drop an index from the InnoDB system tables. The data dictionary must
-have been locked exclusively by the caller, because the transaction
-will not be committed. */
+Drop indexes that were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
UNIV_INTERN
void
-row_merge_drop_index(
-/*=================*/
- dict_index_t* index, /*!< in: index to be removed */
- dict_table_t* table, /*!< in: table */
- trx_t* trx); /*!< in: transaction handle */
+row_merge_drop_indexes_dict(
+/*========================*/
+ trx_t* trx, /*!< in/out: dictionary transaction */
+ table_id_t table_id)/*!< in: table identifier */
+ __attribute__((nonnull));
/*********************************************************************//**
-Drop those indexes which were created before an error occurred when
-building an index. The data dictionary must have been locked
-exclusively by the caller, because the transaction will not be
-committed. */
+Drop those indexes which were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
UNIV_INTERN
void
row_merge_drop_indexes(
/*===================*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table, /*!< in: table containing the indexes */
- dict_index_t** index, /*!< in: indexes to drop */
- ulint num_created); /*!< in: number of elements in
- index[] */
+ trx_t* trx, /*!< in/out: transaction */
+ dict_table_t* table, /*!< in/out: table containing the indexes */
+ ibool locked) /*!< in: TRUE=table locked,
+ FALSE=may need to do a lazy drop */
+ __attribute__((nonnull));
/*********************************************************************//**
Drop all partially created indexes during crash recovery. */
UNIV_INTERN
void
row_merge_drop_temp_indexes(void);
/*=============================*/
+
+/*********************************************************************//**
+Creates temporary merge files, and if UNIV_PFS_IO defined, register
+the file descriptor with Performance Schema.
+@return File descriptor */
+UNIV_INTERN
+int
+row_merge_file_create_low(void)
+/*===========================*/
+ __attribute__((warn_unused_result));
+/*********************************************************************//**
+Destroy a merge file. And de-register the file from Performance Schema
+if UNIV_PFS_IO is defined. */
+UNIV_INTERN
+void
+row_merge_file_destroy_low(
+/*=======================*/
+ int fd); /*!< in: merge file descriptor */
+
+/*********************************************************************//**
+Provide a new pathname for a table that is being renamed if it belongs to
+a file-per-table tablespace. The caller is responsible for freeing the
+memory allocated for the return value.
+@return new pathname of tablespace file, or NULL if space = 0 */
+UNIV_INTERN
+char*
+row_make_new_pathname(
+/*==================*/
+ dict_table_t* table, /*!< in: table to be renamed */
+ const char* new_name); /*!< in: new name */
/*********************************************************************//**
Rename the tables in the data dictionary. The data dictionary must
have been locked exclusively by the caller, because the transaction
will not be committed.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_merge_rename_tables(
/*====================*/
dict_table_t* old_table, /*!< in/out: old table, renamed to
@@ -179,32 +213,35 @@ row_merge_rename_tables(
dict_table_t* new_table, /*!< in/out: new table, renamed to
old_table->name */
const char* tmp_name, /*!< in: new name for old_table */
- trx_t* trx); /*!< in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
+ __attribute__((nonnull, warn_unused_result));
+
/*********************************************************************//**
-Create a temporary table for creating a primary key, using the definition
-of an existing table.
-@return table, or NULL on error */
+Rename an index in the dictionary that was created. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
+@return DB_SUCCESS if all OK */
UNIV_INTERN
-dict_table_t*
-row_merge_create_temporary_table(
-/*=============================*/
- const char* table_name, /*!< in: new table name */
- const merge_index_def_t*index_def, /*!< in: the index definition
- of the primary key */
- const dict_table_t* table, /*!< in: old table definition */
- trx_t* trx); /*!< in/out: transaction
- (sets error_state) */
+dberr_t
+row_merge_rename_index_to_add(
+/*==========================*/
+ trx_t* trx, /*!< in/out: transaction */
+ table_id_t table_id, /*!< in: table identifier */
+ index_id_t index_id) /*!< in: index identifier */
+ __attribute__((nonnull));
/*********************************************************************//**
-Rename the temporary indexes in the dictionary to permanent ones. The
-data dictionary must have been locked exclusively by the caller,
-because the transaction will not be committed.
+Rename an index in the dictionary that is to be dropped. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
@return DB_SUCCESS if all OK */
UNIV_INTERN
-ulint
-row_merge_rename_indexes(
-/*=====================*/
+dberr_t
+row_merge_rename_index_to_drop(
+/*===========================*/
trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table); /*!< in/out: table with new indexes */
+ table_id_t table_id, /*!< in: table identifier */
+ index_id_t index_id) /*!< in: index identifier */
+ __attribute__((nonnull));
/*********************************************************************//**
Create the index and load in to the dictionary.
@return index, or NULL on error */
@@ -214,7 +251,7 @@ row_merge_create_index(
/*===================*/
trx_t* trx, /*!< in/out: trx (sets error_state) */
dict_table_t* table, /*!< in: the index is on this table */
- const merge_index_def_t*index_def);
+ const index_def_t* index_def);
/*!< in: the index definition */
/*********************************************************************//**
Check if a transaction can use an index.
@@ -226,22 +263,25 @@ row_merge_is_index_usable(
const trx_t* trx, /*!< in: transaction */
const dict_index_t* index); /*!< in: index to check */
/*********************************************************************//**
-If there are views that refer to the old table name then we "attach" to
-the new instance of the table else we drop it immediately.
+Drop a table. The caller must have ensured that the background stats
+thread is not processing the table. This can be done by calling
+dict_stats_wait_bg_to_stop_using_tables() after locking the dictionary and
+before calling this function.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
row_merge_drop_table(
/*=================*/
trx_t* trx, /*!< in: transaction */
- dict_table_t* table); /*!< in: table instance to drop */
+ dict_table_t* table) /*!< in: table instance to drop */
+ __attribute__((nonnull));
/*********************************************************************//**
Build indexes on a table by reading a clustered index,
creating a temporary file containing index entries, merge sorting
these index entries and inserting sorted index entries to indexes.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
row_merge_build_indexes(
/*====================*/
trx_t* trx, /*!< in: transaction */
@@ -250,11 +290,24 @@ row_merge_build_indexes(
dict_table_t* new_table, /*!< in: table where indexes are
created; identical to old_table
unless creating a PRIMARY KEY */
+ bool online, /*!< in: true if creating indexes
+ online */
dict_index_t** indexes, /*!< in: indexes to be created */
+ const ulint* key_numbers, /*!< in: MySQL key numbers */
ulint n_indexes, /*!< in: size of indexes[] */
- struct TABLE* table); /*!< in/out: MySQL table, for
+ struct TABLE* table, /*!< in/out: MySQL table, for
reporting erroneous key value
if applicable */
+ const dtuple_t* add_cols, /*!< in: default values of
+ added columns, or NULL */
+ const ulint* col_map, /*!< in: mapping of old column
+ numbers to new ones, or NULL
+ if old_table == new_table */
+ ulint add_autoinc, /*!< in: number of added
+ AUTO_INCREMENT column, or
+ ULINT_UNDEFINED if none is added */
+ ib_sequence_t& sequence) /*!< in/out: autoinc sequence */
+ __attribute__((nonnull(1,2,3,5,6,8), warn_unused_result));
/********************************************************************//**
Write a buffer to a block. */
UNIV_INTERN
@@ -263,15 +316,18 @@ row_merge_buf_write(
/*================*/
const row_merge_buf_t* buf, /*!< in: sorted buffer */
const merge_file_t* of, /*!< in: output file */
- row_merge_block_t* block); /*!< out: buffer for writing to file */
+ row_merge_block_t* block) /*!< out: buffer for writing to file */
+ __attribute__((nonnull));
/********************************************************************//**
Sort a buffer. */
UNIV_INTERN
void
row_merge_buf_sort(
/*===============*/
- row_merge_buf_t* buf, /*!< in/out: sort buffer */
- row_merge_dup_t* dup); /*!< in/out: for reporting duplicates */
+ row_merge_buf_t* buf, /*!< in/out: sort buffer */
+ row_merge_dup_t* dup) /*!< in/out: reporter of duplicates
+ (NULL if non-unique index) */
+ __attribute__((nonnull(1)));
/********************************************************************//**
Write a merge block to the file system.
@return TRUE if request was successful, FALSE if fail */
@@ -290,30 +346,32 @@ UNIV_INTERN
row_merge_buf_t*
row_merge_buf_empty(
/*================*/
- row_merge_buf_t* buf); /*!< in,own: sort buffer */
+ row_merge_buf_t* buf) /*!< in,own: sort buffer */
+ __attribute__((warn_unused_result, nonnull));
/*********************************************************************//**
-Create a merge file. */
+Create a merge file.
+@return file descriptor, or -1 on failure */
UNIV_INTERN
-void
+int
row_merge_file_create(
/*==================*/
- merge_file_t* merge_file); /*!< out: merge file structure */
+ merge_file_t* merge_file) /*!< out: merge file structure */
+ __attribute__((nonnull));
/*********************************************************************//**
Merge disk files.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
row_merge_sort(
/*===========*/
trx_t* trx, /*!< in: transaction */
- const dict_index_t* index, /*!< in: index being created */
+ const row_merge_dup_t* dup, /*!< in: descriptor of
+ index being created */
merge_file_t* file, /*!< in/out: file containing
index entries */
row_merge_block_t* block, /*!< in/out: 3 buffers */
- int* tmpfd, /*!< in/out: temporary file handle */
- struct TABLE* table); /*!< in/out: MySQL table, for
- reporting erroneous key value
- if applicable */
+ int* tmpfd) /*!< in/out: temporary file handle */
+ __attribute__((nonnull));
/*********************************************************************//**
Allocate a sort buffer.
@return own: sort buffer */
@@ -321,37 +379,24 @@ UNIV_INTERN
row_merge_buf_t*
row_merge_buf_create(
/*=================*/
- dict_index_t* index); /*!< in: secondary index */
+ dict_index_t* index) /*!< in: secondary index */
+ __attribute__((warn_unused_result, nonnull, malloc));
/*********************************************************************//**
Deallocate a sort buffer. */
UNIV_INTERN
void
row_merge_buf_free(
/*===============*/
- row_merge_buf_t* buf); /*!< in,own: sort buffer, to be freed */
+ row_merge_buf_t* buf) /*!< in,own: sort buffer to be freed */
+ __attribute__((nonnull));
/*********************************************************************//**
Destroy a merge file. */
UNIV_INTERN
void
row_merge_file_destroy(
/*===================*/
- merge_file_t* merge_file); /*!< out: merge file structure */
-/*********************************************************************//**
-Compare two merge records.
-@return 1, 0, -1 if mrec1 is greater, equal, less, respectively, than mrec2 */
-UNIV_INTERN
-int
-row_merge_cmp(
-/*==========*/
- const mrec_t* mrec1, /*!< in: first merge
- record to be compared */
- const mrec_t* mrec2, /*!< in: second merge
- record to be compared */
- const ulint* offsets1, /*!< in: first record offsets */
- const ulint* offsets2, /*!< in: second record offsets */
- const dict_index_t* index, /*!< in: index */
- ibool* null_eq); /*!< out: set to TRUE if
- found matching null values */
+ merge_file_t* merge_file) /*!< in/out: merge file structure */
+ __attribute__((nonnull));
/********************************************************************//**
Read a merge block from the file system.
@return TRUE if request was successful, FALSE if fail */
@@ -367,7 +412,7 @@ row_merge_read(
/********************************************************************//**
Read a merge record.
@return pointer to next record, or NULL on I/O error or end of list */
-UNIV_INTERN __attribute__((nonnull))
+UNIV_INTERN
const byte*
row_merge_read_rec(
/*===============*/
@@ -380,5 +425,6 @@ row_merge_read_rec(
const mrec_t** mrec, /*!< out: pointer to merge record,
or NULL on end of list
(non-NULL on I/O error) */
- ulint* offsets);/*!< out: offsets of mrec */
+ ulint* offsets)/*!< out: offsets of mrec */
+ __attribute__((nonnull, warn_unused_result));
#endif /* row0merge.h */
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 17a29e38ec7..1e0f3b30f8c 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -36,9 +36,12 @@ Created 9/17/2000 Heikki Tuuri
#include "btr0pcur.h"
#include "trx0types.h"
+// Forward declaration
+struct SysIndexCallback;
+
extern ibool row_rollback_on_timeout;
-typedef struct row_prebuilt_struct row_prebuilt_t;
+struct row_prebuilt_t;
/*******************************************************************//**
Frees the blob heap in prebuilt when no longer needed. */
@@ -152,18 +155,19 @@ row_mysql_store_col_in_innobase_format(
ulint comp); /*!< in: nonzero=compact format */
/****************************************************************//**
Handles user errors and lock waits detected by the database engine.
-@return TRUE if it was a lock wait and we should continue running the
+@return true if it was a lock wait and we should continue running the
query thread */
UNIV_INTERN
-ibool
+bool
row_mysql_handle_errors(
/*====================*/
- ulint* new_err,/*!< out: possible new error encountered in
+ dberr_t* new_err,/*!< out: possible new error encountered in
rollback, or the old error which was
during the function entry */
trx_t* trx, /*!< in: transaction */
- que_thr_t* thr, /*!< in: query thread */
- trx_savept_t* savept);/*!< in: savepoint */
+ que_thr_t* thr, /*!< in: query thread, or NULL */
+ trx_savept_t* savept) /*!< in: savepoint, or NULL */
+ __attribute__((nonnull(1,2)));
/********************************************************************//**
Create a prebuilt struct for a MySQL table handle.
@return own: a prebuilt struct */
@@ -200,16 +204,17 @@ It is not compatible with another AUTO_INC or exclusive lock on the
table.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_lock_table_autoinc_for_mysql(
/*=============================*/
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in the MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in the MySQL
table handle */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Sets a table lock on the table mentioned in prebuilt.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_lock_table_for_mysql(
/*=====================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL
@@ -218,19 +223,20 @@ row_lock_table_for_mysql(
if prebuilt->table should be
locked as
prebuilt->select_lock_type */
- ulint mode); /*!< in: lock mode of table
+ ulint mode) /*!< in: lock mode of table
(ignored if table==NULL) */
-
+ __attribute__((nonnull(1)));
/*********************************************************************//**
Does an insert for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_insert_for_mysql(
/*=================*/
byte* mysql_rec, /*!< in: row in the MySQL format */
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Builds a dummy query graph used in selects. */
UNIV_INTERN
@@ -263,13 +269,14 @@ row_table_got_default_clust_index(
Does an update or delete of a row for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_update_for_mysql(
/*=================*/
byte* mysql_rec, /*!< in: the row to be updated, in
the MySQL format */
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
session is using a READ COMMITTED or READ UNCOMMITTED isolation level.
@@ -278,19 +285,31 @@ initialized prebuilt->new_rec_locks to store the information which new
record locks really were set. This function removes a newly set
clustered index record lock under prebuilt->pcur or
prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that
-releases the latest clustered index record lock we set.
-@return error code or DB_SUCCESS */
+releases the latest clustered index record lock we set. */
UNIV_INTERN
-int
+void
row_unlock_for_mysql(
/*=================*/
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL
handle */
- ibool has_latches_on_recs);/*!< in: TRUE if called
+ ibool has_latches_on_recs)/*!< in: TRUE if called
so that we have the latches on
the records under pcur and
clust_pcur, and we do not need
to reposition the cursors. */
+ __attribute__((nonnull));
+/*********************************************************************//**
+Checks if a table name contains the string "/#sql" which denotes temporary
+tables in MySQL.
+@return true if temporary table */
+UNIV_INTERN
+bool
+row_is_mysql_tmp_table_name(
+/*========================*/
+ const char* name) __attribute__((warn_unused_result));
+ /*!< in: table name in the form
+ 'database/tablename' */
+
/*********************************************************************//**
Creates an query graph node of 'update' type to be used in the MySQL
interface.
@@ -305,13 +324,14 @@ row_create_update_node_for_mysql(
Does a cascaded delete or set null in a foreign key operation.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_update_cascade_for_mysql(
/*=========================*/
que_thr_t* thr, /*!< in: query thread */
upd_node_t* node, /*!< in: update node used in the cascade
or set null operation */
- dict_table_t* table); /*!< in: table where we do the operation */
+ dict_table_t* table) /*!< in: table where we do the operation */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Locks the data dictionary exclusively for performing a table create or other
data dictionary modification operation. */
@@ -355,33 +375,38 @@ Creates a table for MySQL. If the name of the table ends in
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
"innodb_table_monitor", then this will also start the printing of monitor
output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate().
+InnoDB will try to invoke mem_validate(). On failure the transaction will
+be rolled back.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_create_table_for_mysql(
/*=======================*/
- dict_table_t* table, /*!< in, own: table definition
- (will be freed) */
- trx_t* trx); /*!< in: transaction handle */
+ dict_table_t* table, /*!< in, own: table definition
+ (will be freed, or on DB_SUCCESS
+ added to the data dictionary cache) */
+ trx_t* trx, /*!< in/out: transaction */
+ bool commit) /*!< in: if true, commit the transaction */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Does an index creation operation for MySQL. TODO: currently failure
to create an index results in dropping the whole table! This is no problem
currently as all indexes must be created at the same time as the table.
@return error number or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_create_index_for_mysql(
/*=======================*/
dict_index_t* index, /*!< in, own: index definition
(will be freed) */
trx_t* trx, /*!< in: transaction handle */
- const ulint* field_lengths); /*!< in: if not NULL, must contain
+ const ulint* field_lengths) /*!< in: if not NULL, must contain
dict_index_get_n_fields(index)
actual field lengths for the
index columns, which are
then checked for not being too
large. */
+ __attribute__((nonnull(1,2), warn_unused_result));
/*********************************************************************//**
Scans a table create SQL string and adds to the data dictionary
the foreign key constraints declared in the string. This function
@@ -391,7 +416,7 @@ bot participating tables. The indexes are allowed to contain more
fields than mentioned in the constraint.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_table_add_foreign_constraints(
/*==============================*/
trx_t* trx, /*!< in: transaction */
@@ -404,10 +429,10 @@ row_table_add_foreign_constraints(
const char* name, /*!< in: table full name in the
normalized form
database_name/table_name */
- ibool reject_fks); /*!< in: if TRUE, fail with error
+ ibool reject_fks) /*!< in: if TRUE, fail with error
code DB_CANNOT_ADD_CONSTRAINT if
any foreign keys are found. */
-
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
The master thread in srv0srv.cc calls this regularly to drop tables which
we must drop in background after queries to them have ended. Such lazy
@@ -426,14 +451,28 @@ ulint
row_get_background_drop_list_len_low(void);
/*======================================*/
/*********************************************************************//**
+Sets an exclusive lock on a table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_mysql_lock_table(
+/*=================*/
+ trx_t* trx, /*!< in/out: transaction */
+ dict_table_t* table, /*!< in: table to lock */
+ enum lock_mode mode, /*!< in: LOCK_X or LOCK_S */
+ const char* op_info) /*!< in: string for trx->op_info */
+ __attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
Truncates a table for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_truncate_table_for_mysql(
/*=========================*/
dict_table_t* table, /*!< in: table handle */
- trx_t* trx); /*!< in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Drops a table for MySQL. If the name of the dropped table ends in
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
@@ -443,12 +482,16 @@ by the transaction, the transaction will be committed. Otherwise, the
data dictionary will remain locked.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_drop_table_for_mysql(
/*=====================*/
const char* name, /*!< in: table name */
- trx_t* trx, /*!< in: transaction handle */
- ibool drop_db);/*!< in: TRUE=dropping whole database */
+ trx_t* trx, /*!< in: dictionary transaction handle */
+ bool drop_db,/*!< in: true=dropping whole database */
+ bool nonatomic = true)
+ /*!< in: whether it is permitted
+ to release and reacquire dict_operation_lock */
+ __attribute__((nonnull));
/*********************************************************************//**
Drop all temporary tables during crash recovery. */
UNIV_INTERN
@@ -462,66 +505,70 @@ means that this function deletes the .ibd file and assigns a new table id for
the table. Also the flag table->ibd_file_missing is set TRUE.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_discard_tablespace_for_mysql(
/*=============================*/
const char* name, /*!< in: table name */
- trx_t* trx); /*!< in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
+ __attribute__((nonnull, warn_unused_result));
/*****************************************************************//**
Imports a tablespace. The space id in the .ibd file must match the space id
of the table in the data dictionary.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_import_tablespace_for_mysql(
/*============================*/
- const char* name, /*!< in: table name */
- trx_t* trx); /*!< in: transaction handle */
+ dict_table_t* table, /*!< in/out: table */
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Drops a database for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_drop_database_for_mysql(
/*========================*/
const char* name, /*!< in: database name which ends to '/' */
- trx_t* trx); /*!< in: transaction handle */
+ trx_t* trx) /*!< in: transaction handle */
+ __attribute__((nonnull));
/*********************************************************************//**
Renames a table for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_rename_table_for_mysql(
/*=======================*/
const char* old_name, /*!< in: old table name */
const char* new_name, /*!< in: new table name */
- trx_t* trx, /*!< in: transaction handle */
- ibool commit); /*!< in: if TRUE then commit trx */
+ trx_t* trx, /*!< in/out: transaction */
+ bool commit) /*!< in: whether to commit trx */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Checks that the index contains entries in an ascending order, unique
constraint is not broken, and calculates the number of index entries
in the read view of the current transaction.
-@return DB_SUCCESS if ok */
+@return true if ok */
UNIV_INTERN
-ulint
+bool
row_check_index_for_mysql(
/*======================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
in MySQL handle */
const dict_index_t* index, /*!< in: index */
- ulint* n_rows); /*!< out: number of entries
+ ulint* n_rows) /*!< out: number of entries
seen in the consistent read */
-
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Determines if a table is a magic monitor table.
-@return TRUE if monitor table */
+@return true if monitor table */
UNIV_INTERN
-ibool
+bool
row_is_magic_monitor_table(
/*=======================*/
- const char* table_name); /*!< in: name of the table, in the
+ const char* table_name) /*!< in: name of the table, in the
form database/table_name */
-
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Initialize this module */
UNIV_INTERN
@@ -536,13 +583,24 @@ void
row_mysql_close(void);
/*=================*/
+/*********************************************************************//**
+Reassigns the table identifier of a table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_mysql_table_id_reassign(
+/*========================*/
+ dict_table_t* table, /*!< in/out: table */
+ trx_t* trx, /*!< in/out: transaction */
+ table_id_t* new_id) /*!< out: new table id */
+ __attribute__((nonnull, warn_unused_result));
+
/* A struct describing a place for an individual column in the MySQL
row format which is presented to the table handler in ha_innobase.
This template struct is used to speed up row transformations between
Innobase and MySQL. */
-typedef struct mysql_row_templ_struct mysql_row_templ_t;
-struct mysql_row_templ_struct {
+struct mysql_row_templ_t {
ulint col_no; /*!< column number of the column */
ulint rec_field_no; /*!< field number of the column in an
Innobase record in the current index;
@@ -597,7 +655,7 @@ struct mysql_row_templ_struct {
/** A struct for (sometimes lazily) prebuilt structures in an Innobase table
handle used within MySQL; these are used to save CPU time. */
-struct row_prebuilt_struct {
+struct row_prebuilt_t {
ulint magic_n; /*!< this magic number is set to
ROW_PREBUILT_ALLOCATED when created,
or ROW_PREBUILT_FREED when the
@@ -682,8 +740,11 @@ struct row_prebuilt_struct {
columns in the table */
upd_node_t* upd_node; /*!< Innobase SQL update node used
to perform updates and deletes */
+ trx_id_t trx_id; /*!< The table->def_trx_id when
+ ins_graph was built */
que_fork_t* ins_graph; /*!< Innobase SQL query graph used
- in inserts */
+ in inserts. Will be rebuilt on
+ trx_id or n_indexes mismatch. */
que_fork_t* upd_graph; /*!< Innobase SQL query graph used
in updates or deletes */
btr_pcur_t pcur; /*!< persistent cursor used in selects
@@ -780,7 +841,7 @@ struct row_prebuilt_struct {
to this heap */
mem_heap_t* old_vers_heap; /*!< memory heap where a previous
version is built in consistent read */
- fts_result_t* result; /* The result of an FTS query */
+ bool in_fts_query; /*!< Whether we are in a FTS query */
/*----------------------*/
ulonglong autoinc_last_value;
/*!< last value of AUTO-INC interval */
@@ -791,7 +852,7 @@ struct row_prebuilt_struct {
ulonglong autoinc_offset; /*!< The offset passed to
get_auto_increment() by MySQL. Required
to calculate the next value */
- ulint autoinc_error; /*!< The actual error code encountered
+ dberr_t autoinc_error; /*!< The actual error code encountered
while trying to init or read the
autoinc value from the table. We
store it here so that we can return
@@ -806,6 +867,20 @@ struct row_prebuilt_struct {
/*----------------------*/
ulint magic_n2; /*!< this should be the same as
magic_n */
+ /*----------------------*/
+ unsigned innodb_api:1; /*!< whether this is a InnoDB API
+ query */
+ const rec_t* innodb_api_rec; /*!< InnoDB API search result */
+};
+
+/** Callback for row_mysql_sys_index_iterate() */
+struct SysIndexCallback {
+ virtual ~SysIndexCallback() { }
+
+ /** Callback method
+ @param mtr - current mini transaction
+ @param pcur - persistent cursor. */
+ virtual void operator()(mtr_t* mtr, btr_pcur_t* pcur) throw() = 0;
};
#define ROW_PREBUILT_FETCH_MAGIC_N 465765687
@@ -829,4 +904,4 @@ struct row_prebuilt_struct {
#include "row0mysql.ic"
#endif
-#endif
+#endif /* row0mysql.h */
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
index 740771fa3eb..93dcf9cf49b 100644
--- a/storage/innobase/include/row0purge.h
+++ b/storage/innobase/include/row0purge.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -46,7 +46,8 @@ row_purge_node_create(
/*==================*/
que_thr_t* parent, /*!< in: parent node, i.e., a
thr node */
- mem_heap_t* heap); /*!< in: memory heap where created */
+ mem_heap_t* heap) /*!< in: memory heap where created */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************//**
Determines if it is possible to remove a secondary index entry.
Removal is possible if the secondary index entry does not refer to any
@@ -56,19 +57,20 @@ is newer than the purge view.
NOTE: This function should only be called by the purge thread, only
while holding a latch on the leaf page of the secondary index entry
(or keeping the buffer pool watch on the page). It is possible that
-this function first returns TRUE and then FALSE, if a user transaction
+this function first returns true and then false, if a user transaction
inserts a record that the secondary index entry would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
-@return TRUE if the secondary index record can be purged */
+@return true if the secondary index record can be purged */
UNIV_INTERN
-ibool
+bool
row_purge_poss_sec(
/*===============*/
purge_node_t* node, /*!< in/out: row purge node */
dict_index_t* index, /*!< in: secondary index */
- const dtuple_t* entry); /*!< in: secondary index entry */
+ const dtuple_t* entry) /*!< in: secondary index entry */
+ __attribute__((nonnull, warn_unused_result));
/***************************************************************
Does the purge operation for a single undo log record. This is a high-level
function used in an SQL execution graph.
@@ -77,11 +79,12 @@ UNIV_INTERN
que_thr_t*
row_purge_step(
/*===========*/
- que_thr_t* thr); /*!< in: query thread */
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
/* Purge node structure */
-struct purge_node_struct{
+struct purge_node_t{
que_common_t common; /*!< node type: QUE_NODE_PURGE */
/*----------------------*/
/* Local storage for this graph node */
diff --git a/storage/innobase/include/row0quiesce.h b/storage/innobase/include/row0quiesce.h
new file mode 100644
index 00000000000..1d6d11291b8
--- /dev/null
+++ b/storage/innobase/include/row0quiesce.h
@@ -0,0 +1,74 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0quiesce.h
+
+Header file for tablespace quiesce functions.
+
+Created 2012-02-08 by Sunny Bains
+*******************************************************/
+
+#ifndef row0quiesce_h
+#define row0quiesce_h
+
+#include "univ.i"
+#include "dict0types.h"
+
+struct trx_t;
+
+/** The version number of the export meta-data text file. */
+#define IB_EXPORT_CFG_VERSION_V1 0x1UL
+
+/*********************************************************************//**
+Quiesce the tablespace that the table resides in. */
+UNIV_INTERN
+void
+row_quiesce_table_start(
+/*====================*/
+ dict_table_t* table, /*!< in: quiesce this table */
+ trx_t* trx) /*!< in/out: transaction/session */
+ __attribute__((nonnull));
+
+/*********************************************************************//**
+Set a table's quiesce state.
+@return DB_SUCCESS or errro code. */
+UNIV_INTERN
+dberr_t
+row_quiesce_set_state(
+/*==================*/
+ dict_table_t* table, /*!< in: quiesce this table */
+ ib_quiesce_t state, /*!< in: quiesce state to set */
+ trx_t* trx) /*!< in/out: transaction */
+ __attribute__((nonnull, warn_unused_result));
+
+/*********************************************************************//**
+Cleanup after table quiesce. */
+UNIV_INTERN
+void
+row_quiesce_table_complete(
+/*=======================*/
+ dict_table_t* table, /*!< in: quiesce this table */
+ trx_t* trx) /*!< in/out: transaction/session */
+ __attribute__((nonnull));
+
+#ifndef UNIV_NONINL
+#include "row0quiesce.ic"
+#endif
+
+#endif /* row0quiesce_h */
diff --git a/storage/innobase/include/row0quiesce.ic b/storage/innobase/include/row0quiesce.ic
new file mode 100644
index 00000000000..f570a6aed05
--- /dev/null
+++ b/storage/innobase/include/row0quiesce.ic
@@ -0,0 +1,26 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/row0quiesce.ic
+
+Quiesce a tablespace.
+
+Created 2012-02-08 Sunny Bains
+*******************************************************/
+
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index cf253ab2347..a4e5e0dd2fa 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -73,20 +73,41 @@ row_get_rec_roll_ptr(
/*****************************************************************//**
When an insert or purge to a table is performed, this function builds
the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged
+@retval NULL if the externally stored columns in the clustered index record
+are unavailable and ext != NULL, or row is missing some needed columns. */
+UNIV_INTERN
+dtuple_t*
+row_build_index_entry_low(
+/*======================*/
+ const dtuple_t* row, /*!< in: row which should be
+ inserted or purged */
+ const row_ext_t* ext, /*!< in: externally stored column
+ prefixes, or NULL */
+ dict_index_t* index, /*!< in: index on the table */
+ mem_heap_t* heap) /*!< in: memory heap from which
+ the memory for the index entry
+ is allocated */
+ __attribute__((warn_unused_result, nonnull(1,3,4)));
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
@return index entry which should be inserted or purged, or NULL if the
externally stored columns in the clustered index record are
unavailable and ext != NULL */
-UNIV_INTERN
+UNIV_INLINE
dtuple_t*
row_build_index_entry(
/*==================*/
- const dtuple_t* row, /*!< in: row which should be
- inserted or purged */
- row_ext_t* ext, /*!< in: externally stored column prefixes,
- or NULL */
- dict_index_t* index, /*!< in: index on the table */
- mem_heap_t* heap); /*!< in: memory heap from which the memory for
- the index entry is allocated */
+ const dtuple_t* row, /*!< in: row which should be
+ inserted or purged */
+ const row_ext_t* ext, /*!< in: externally stored column
+ prefixes, or NULL */
+ dict_index_t* index, /*!< in: index on the table */
+ mem_heap_t* heap) /*!< in: memory heap from which
+ the memory for the index entry
+ is allocated */
+ __attribute__((warn_unused_result, nonnull(1,3,4)));
/*******************************************************************//**
An inverse function to row_build_index_entry. Builds a row from a
record in a clustered index.
@@ -124,11 +145,17 @@ row_build(
consulted instead; the user
columns in this table should be
the same columns as in index->table */
+ const dtuple_t* add_cols,
+ /*!< in: default values of
+ added columns, or NULL */
+ const ulint* col_map,/*!< in: mapping of old column
+ numbers to new ones, or NULL */
row_ext_t** ext, /*!< out, own: cache of
externally stored column
prefixes, or NULL */
- mem_heap_t* heap); /*!< in: memory heap from which
+ mem_heap_t* heap) /*!< in: memory heap from which
the memory needed is allocated */
+ __attribute__((nonnull(2,3,9)));
/*******************************************************************//**
Converts an index record to a typed data tuple.
@return index entry built; does not set info_bits, and the data fields
@@ -142,37 +169,25 @@ row_rec_to_index_entry_low(
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
ulint* n_ext, /*!< out: number of externally
stored columns */
- mem_heap_t* heap); /*!< in: memory heap from which
+ mem_heap_t* heap) /*!< in: memory heap from which
the memory needed is allocated */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Converts an index record to a typed data tuple. NOTE that externally
stored (often big) fields are NOT copied to heap.
-@return own: index entry built; see the NOTE below! */
+@return own: index entry built */
UNIV_INTERN
dtuple_t*
row_rec_to_index_entry(
/*===================*/
- ulint type, /*!< in: ROW_COPY_DATA, or
- ROW_COPY_POINTERS: the former
- copies also the data fields to
- heap as the latter only places
- pointers to data fields on the
- index page */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: in the case
- ROW_COPY_POINTERS the data
- fields in the row will point
- directly into this record,
- therefore, the buffer page of
- this record must be at least
- s-latched and the latch held
- as long as the dtuple is used! */
+ const rec_t* rec, /*!< in: record in the index */
const dict_index_t* index, /*!< in: index */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec) */
+ const ulint* offsets,/*!< in/out: rec_get_offsets(rec) */
ulint* n_ext, /*!< out: number of externally
stored columns */
- mem_heap_t* heap); /*!< in: memory heap from which
+ mem_heap_t* heap) /*!< in: memory heap from which
the memory needed is allocated */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record.
@@ -193,8 +208,9 @@ row_build_row_ref(
the buffer page of this record must be
at least s-latched and the latch held
as long as the row reference is used! */
- mem_heap_t* heap); /*!< in: memory heap from which the memory
+ mem_heap_t* heap) /*!< in: memory heap from which the memory
needed is allocated */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
@@ -215,7 +231,8 @@ row_build_row_ref_in_tuple(
const dict_index_t* index, /*!< in: secondary index */
ulint* offsets,/*!< in: rec_get_offsets(rec, index)
or NULL */
- trx_t* trx); /*!< in: transaction */
+ trx_t* trx) /*!< in: transaction or NULL */
+ __attribute__((nonnull(1,2,3)));
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
@@ -245,7 +262,8 @@ row_search_on_row_ref(
ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
const dict_table_t* table, /*!< in: table */
const dtuple_t* ref, /*!< in: row reference */
- mtr_t* mtr); /*!< in/out: mtr */
+ mtr_t* mtr) /*!< in/out: mtr */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Fetches the clustered index record for a secondary index record. The latches
on the secondary index record are preserved.
@@ -258,7 +276,8 @@ row_get_clust_rec(
const rec_t* rec, /*!< in: record in a secondary index */
dict_index_t* index, /*!< in: secondary index */
dict_index_t** clust_index,/*!< out: clustered index */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull, warn_unused_result));
/** Result of row_search_index_entry */
enum row_search_result {
@@ -285,8 +304,8 @@ row_search_index_entry(
ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must
be closed by the caller */
- mtr_t* mtr); /*!< in: mtr */
-
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull, warn_unused_result));
#define ROW_COPY_DATA 1
#define ROW_COPY_POINTERS 2
@@ -313,8 +332,9 @@ row_raw_format(
in bytes */
const dict_field_t* dict_field, /*!< in: index field */
char* buf, /*!< out: output buffer */
- ulint buf_size); /*!< in: output buffer size
+ ulint buf_size) /*!< in: output buffer size
in bytes */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_NONINL
#include "row0row.ic"
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
index 8e9f3460519..ac62422be1f 100644
--- a/storage/innobase/include/row0row.ic
+++ b/storage/innobase/include/row0row.ic
@@ -104,6 +104,33 @@ row_get_rec_roll_ptr(
return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
}
+/*****************************************************************//**
+When an insert or purge to a table is performed, this function builds
+the entry to be inserted into or purged from an index on the table.
+@return index entry which should be inserted or purged, or NULL if the
+externally stored columns in the clustered index record are
+unavailable and ext != NULL */
+UNIV_INLINE
+dtuple_t*
+row_build_index_entry(
+/*==================*/
+ const dtuple_t* row, /*!< in: row which should be
+ inserted or purged */
+ const row_ext_t* ext, /*!< in: externally stored column
+ prefixes, or NULL */
+ dict_index_t* index, /*!< in: index on the table */
+ mem_heap_t* heap) /*!< in: memory heap from which
+ the memory for the index entry
+ is allocated */
+{
+ dtuple_t* entry;
+
+ ut_ad(dtuple_check_typed(row));
+ entry = row_build_index_entry_low(row, ext, index, heap);
+ ut_ad(!entry || dtuple_check_typed(entry));
+ return(entry);
+}
+
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index fa3c93b6b9a..c8be80f89d9 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -148,7 +148,7 @@ position and fetch next or fetch prev must not be tried to the cursor!
@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */
UNIV_INTERN
-ulint
+dberr_t
row_search_for_mysql(
/*=================*/
byte* buf, /*!< in/out: buffer for the fetched
@@ -163,11 +163,12 @@ row_search_for_mysql(
'mode' */
ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or
ROW_SEL_EXACT_PREFIX */
- ulint direction); /*!< in: 0 or ROW_SEL_NEXT or
+ ulint direction) /*!< in: 0 or ROW_SEL_NEXT or
ROW_SEL_PREV; NOTE: if this is != 0,
then prebuilt must have a pcur
with stored position! In opening of a
cursor 'direction' should be 0. */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Checks if MySQL at the moment is allowed for this table to retrieve a
consistent read result, or store it to the query cache.
@@ -179,28 +180,20 @@ row_search_check_if_query_cache_permitted(
trx_t* trx, /*!< in: transaction object */
const char* norm_name); /*!< in: concatenation of database name,
'/' char, table name */
-void
-row_create_key(
-/*===========*/
- dtuple_t* tuple, /* in: tuple where to build;
- NOTE: we assume that the type info
- in the tuple is already according
- to index! */
- dict_index_t* index, /* in: index of the key value */
- doc_id_t* doc_id); /* in: doc id to lookup.*/
/*******************************************************************//**
Read the max AUTOINC value from an index.
@return DB_SUCCESS if all OK else error code */
UNIV_INTERN
-ulint
+dberr_t
row_search_max_autoinc(
/*===================*/
dict_index_t* index, /*!< in: index to search */
const char* col_name, /*!< in: autoinc column name */
- ib_uint64_t* value); /*!< out: AUTOINC value read */
+ ib_uint64_t* value) /*!< out: AUTOINC value read */
+ __attribute__((nonnull, warn_unused_result));
/** A structure for caching column values for prefetched rows */
-struct sel_buf_struct{
+struct sel_buf_t{
byte* data; /*!< data, or NULL; if not NULL, this field
has allocated memory which must be explicitly
freed; can be != NULL even when len is
@@ -213,7 +206,7 @@ struct sel_buf_struct{
};
/** Query plan */
-struct plan_struct{
+struct plan_t{
dict_table_t* table; /*!< table struct in the dictionary
cache */
dict_index_t* index; /*!< table index used in the search */
@@ -299,7 +292,7 @@ enum sel_node_state {
};
/** Select statement node */
-struct sel_node_struct{
+struct sel_node_t{
que_common_t common; /*!< node type: QUE_NODE_SELECT */
enum sel_node_state
state; /*!< node state */
@@ -352,7 +345,7 @@ struct sel_node_struct{
};
/** Fetch statement node */
-struct fetch_node_struct{
+struct fetch_node_t{
que_common_t common; /*!< type: QUE_NODE_FETCH */
sel_node_t* cursor_def; /*!< cursor definition */
sym_node_t* into_list; /*!< variables to set */
@@ -379,7 +372,7 @@ enum open_node_op {
};
/** Open or close cursor statement node */
-struct open_node_struct{
+struct open_node_t{
que_common_t common; /*!< type: QUE_NODE_OPEN */
enum open_node_op
op_type; /*!< operation type: open or
@@ -388,7 +381,7 @@ struct open_node_struct{
};
/** Row printf statement node */
-struct row_printf_node_struct{
+struct row_printf_node_t{
que_common_t common; /*!< type: QUE_NODE_ROW_PRINTF */
sel_node_t* sel_node; /*!< select */
};
diff --git a/storage/innobase/include/row0types.h b/storage/innobase/include/row0types.h
index 463651b43b8..52c89cb01fa 100644
--- a/storage/innobase/include/row0types.h
+++ b/storage/innobase/include/row0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,32 +26,28 @@ Created 12/27/1996 Heikki Tuuri
#ifndef row0types_h
#define row0types_h
-typedef struct plan_struct plan_t;
+struct plan_t;
-typedef struct upd_struct upd_t;
+struct upd_t;
+struct upd_field_t;
+struct upd_node_t;
+struct del_node_t;
+struct ins_node_t;
+struct sel_node_t;
+struct open_node_t;
+struct fetch_node_t;
-typedef struct upd_field_struct upd_field_t;
+struct row_printf_node_t;
+struct sel_buf_t;
-typedef struct upd_node_struct upd_node_t;
+struct undo_node_t;
-typedef struct del_node_struct del_node_t;
+struct purge_node_t;
-typedef struct ins_node_struct ins_node_t;
+struct row_ext_t;
-typedef struct sel_node_struct sel_node_t;
-
-typedef struct open_node_struct open_node_t;
-
-typedef struct fetch_node_struct fetch_node_t;
-
-typedef struct row_printf_node_struct row_printf_node_t;
-typedef struct sel_buf_struct sel_buf_t;
-
-typedef struct undo_node_struct undo_node_t;
-
-typedef struct purge_node_struct purge_node_t;
-
-typedef struct row_ext_struct row_ext_t;
+/** Buffer for logging modifications during online index creation */
+struct row_log_t;
/* MySQL data types */
struct TABLE;
diff --git a/storage/innobase/include/row0uins.h b/storage/innobase/include/row0uins.h
index 5f3a7212ee1..ebf4881208a 100644
--- a/storage/innobase/include/row0uins.h
+++ b/storage/innobase/include/row0uins.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,11 +42,11 @@ if it figures out that an index record will be removed in the purge
anyway, it will remove it in the rollback.
@return DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_undo_ins(
/*=========*/
- undo_node_t* node); /*!< in: row undo node */
-
+ undo_node_t* node) /*!< in: row undo node */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_NONINL
#include "row0uins.ic"
#endif
diff --git a/storage/innobase/include/row0umod.h b/storage/innobase/include/row0umod.h
index 84831e59d90..f89d5a334fc 100644
--- a/storage/innobase/include/row0umod.h
+++ b/storage/innobase/include/row0umod.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,12 +38,12 @@ Created 2/27/1997 Heikki Tuuri
Undoes a modify operation on a row of a table.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
row_undo_mod(
/*=========*/
undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr); /*!< in: query thread */
-
+ que_thr_t* thr) /*!< in: query thread */
+ __attribute__((nonnull, warn_unused_result));
#ifndef UNIV_NONINL
#include "row0umod.ic"
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
index 47f9afdc74a..5dddfb4eae1 100644
--- a/storage/innobase/include/row0undo.h
+++ b/storage/innobase/include/row0undo.h
@@ -95,7 +95,7 @@ enum undo_exec {
};
/** Undo node structure */
-struct undo_node_struct{
+struct undo_node_t{
que_common_t common; /*!< node type: QUE_NODE_UNDO */
enum undo_exec state; /*!< node execution state */
trx_t* trx; /*!< trx for which undo is done */
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
index a7687bb1ded..27dedeb65a7 100644
--- a/storage/innobase/include/row0upd.h
+++ b/storage/innobase/include/row0upd.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -101,7 +101,7 @@ byte*
row_upd_write_sys_vals_to_log(
/*==========================*/
dict_index_t* index, /*!< in: clustered index */
- trx_t* trx, /*!< in: transaction */
+ trx_id_t trx_id, /*!< in: transaction id */
roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */
byte* log_ptr,/*!< pointer to a buffer of size > 20 opened
in mlog */
@@ -118,8 +118,9 @@ row_upd_rec_sys_fields(
uncompressed part will be updated, or NULL */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- trx_t* trx, /*!< in: transaction */
- roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record */
+ const trx_t* trx, /*!< in: transaction */
+ roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record,
+ can be 0 during IMPORT */
/*********************************************************************//**
Sets the trx id or roll ptr field of a clustered index entry. */
UNIV_INTERN
@@ -165,6 +166,15 @@ row_upd_changes_field_size_or_external(
dict_index_t* index, /*!< in: index */
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
const upd_t* update);/*!< in: update vector */
+/***********************************************************//**
+Returns true if row update contains disowned external fields.
+@return true if the update contains disowned external fields. */
+UNIV_INTERN
+bool
+row_upd_changes_disowned_external(
+/*==============================*/
+ const upd_t* update) /*!< in: update vector */
+ __attribute__((nonnull, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Replaces the new column values stored in the update vector to the
@@ -192,11 +202,12 @@ UNIV_INTERN
upd_t*
row_upd_build_sec_rec_difference_binary(
/*====================================*/
+ const rec_t* rec, /*!< in: secondary index record */
dict_index_t* index, /*!< in: index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
const dtuple_t* entry, /*!< in: entry to insert */
- const rec_t* rec, /*!< in: secondary index record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap); /*!< in: memory heap from which allocated */
+ mem_heap_t* heap) /*!< in: memory heap from which allocated */
+ __attribute__((warn_unused_result, nonnull));
/***************************************************************//**
Builds an update vector from those fields, excluding the roll ptr and
trx id fields, which in an index entry differ from a record that has
@@ -204,14 +215,19 @@ the equal ordering fields. NOTE: we compare the fields as binary strings!
@return own: update vector of differing fields, excluding roll ptr and
trx id */
UNIV_INTERN
-upd_t*
+const upd_t*
row_upd_build_difference_binary(
/*============================*/
dict_index_t* index, /*!< in: clustered index */
const dtuple_t* entry, /*!< in: entry to insert */
const rec_t* rec, /*!< in: clustered index record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap); /*!< in: memory heap from which allocated */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
+ bool no_sys, /*!< in: skip the system columns
+ DB_TRX_ID and DB_ROLL_PTR */
+ trx_t* trx, /*!< in: transaction (for diagnostics),
+ or NULL */
+ mem_heap_t* heap) /*!< in: memory heap from which allocated */
+ __attribute__((nonnull(1,2,3,7), warn_unused_result));
/***********************************************************//**
Replaces the new column values stored in the update vector to the index entry
given. */
@@ -315,25 +331,14 @@ row_upd_changes_fts_column(
upd_field_t* upd_field); /*!< in: field to check */
/***********************************************************//**
Checks if an FTS Doc ID column is affected by an UPDATE.
-@return TRUE if Doc ID column is affected */
+@return whether Doc ID column is affected */
UNIV_INTERN
-ulint
+bool
row_upd_changes_doc_id(
/*===================*/
dict_table_t* table, /*!< in: table */
- upd_field_t* upd_field); /*!< in: field to check */
-/***********************************************************//**
-Checks if an update vector changes the table's FTS-indexed columns.
-NOTE: must not be called for tables which do not have an FTS-index.
-Also, the vector returned must be explicitly freed as it's allocated
-using the ut_malloc() allocator.
-@return vector of FTS indexes that were affected by the update else NULL */
-UNIV_INTERN
-ib_vector_t*
-row_upd_changes_fts_columns(
-/*========================*/
- dict_table_t* table, /*!< in: table */
- upd_t* update); /*!< in: update vector for the row */
+ upd_field_t* upd_field) /*!< in: field to check */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************//**
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
@@ -397,7 +402,7 @@ row_upd_index_parse(
/* Update vector field */
-struct upd_field_struct{
+struct upd_field_t{
unsigned field_no:16; /*!< field number in an index, usually
the clustered index, but in updating
a secondary index record in btr0cur.cc
@@ -416,7 +421,7 @@ struct upd_field_struct{
};
/* Update vector structure */
-struct upd_struct{
+struct upd_t{
ulint info_bits; /*!< new value of info bits to record;
default is 0 */
ulint n_fields; /*!< number of update fields */
@@ -427,7 +432,7 @@ struct upd_struct{
/* Update node structure which also implements the delete operation
of a row */
-struct upd_node_struct{
+struct upd_node_t{
que_common_t common; /*!< node type: QUE_NODE_UPDATE */
ibool is_delete;/* TRUE if delete, FALSE if update */
ibool searched_update;
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
index d054662c080..618a77fa4bf 100644
--- a/storage/innobase/include/row0upd.ic
+++ b/storage/innobase/include/row0upd.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -46,7 +46,6 @@ upd_create(
update = (upd_t*) mem_heap_zalloc(heap, sizeof(upd_t));
- update->info_bits = 0;
update->n_fields = n;
update->fields = (upd_field_t*)
mem_heap_zalloc(heap, sizeof(upd_field_t) * n);
@@ -111,6 +110,7 @@ upd_field_set_field_no(
fprintf(stderr, "\n"
"InnoDB: but index only has %lu fields\n",
(ulong) dict_index_get_n_fields(index));
+ ut_ad(0);
}
dict_col_copy_type(dict_index_get_nth_col(index, field_no),
@@ -152,8 +152,9 @@ row_upd_rec_sys_fields(
uncompressed part will be updated, or NULL */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- trx_t* trx, /*!< in: transaction */
- roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record */
+ const trx_t* trx, /*!< in: transaction */
+ roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record,
+ can be 0 during IMPORT */
{
ut_ad(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -172,8 +173,14 @@ row_upd_rec_sys_fields(
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
#endif
- ut_ad(lock_check_trx_id_sanity(trx_read_trx_id(rec + offset),
- rec, index, offsets));
+ /* During IMPORT the trx id in the record can be in the
+ future, if the .ibd file is being imported from another
+ instance. During IMPORT roll_ptr will be 0. */
+ ut_ad(roll_ptr == 0
+ || lock_check_trx_id_sanity(
+ trx_read_trx_id(rec + offset),
+ rec, index, offsets));
+
trx_write_trx_id(rec + offset, trx->id);
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
}
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
index d9e3471b3dc..1df5b4d3e98 100644
--- a/storage/innobase/include/row0vers.h
+++ b/storage/innobase/include/row0vers.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -87,7 +87,7 @@ read should see. We assume that the trx id stored in rec is such that
the consistent read should not see rec in its present version.
@return DB_SUCCESS or DB_MISSING_HISTORY */
UNIV_INTERN
-ulint
+dberr_t
row_vers_build_for_consistent_read(
/*===============================*/
const rec_t* rec, /*!< in: record in a clustered index; the
@@ -106,16 +106,17 @@ row_vers_build_for_consistent_read(
*old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- rec_t** old_vers);/*!< out, own: old version, or NULL if the
- record does not exist in the view, that is,
+ rec_t** old_vers)/*!< out, own: old version, or NULL
+ if the history is missing or the record
+ does not exist in the view, that is,
it was freshly inserted afterwards */
+ __attribute__((nonnull(1,2,3,4,5,6,7)));
/*****************************************************************//**
Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
+which should be seen by a semi-consistent read. */
UNIV_INTERN
-ulint
+void
row_vers_build_for_semi_consistent_read(
/*====================================*/
const rec_t* rec, /*!< in: record in a clustered index; the
@@ -132,9 +133,10 @@ row_vers_build_for_semi_consistent_read(
*old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- const rec_t** old_vers);/*!< out: rec, old version, or NULL if the
+ const rec_t** old_vers)/*!< out: rec, old version, or NULL if the
record does not exist in the view, that is,
it was freshly inserted afterwards */
+ __attribute__((nonnull(1,2,3,4,5)));
#ifndef UNIV_NONINL
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 5e47f82f416..48d4b94dcae 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -1,6 +1,7 @@
/***********************************************************************
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -55,7 +56,7 @@ fill in counter information as described in "monitor_info_t" and
create the internal counter ID in "monitor_id_t". */
/** Structure containing the actual values of a monitor counter. */
-struct monitor_value_struct {
+struct monitor_value_t {
ib_time_t mon_start_time; /*!< Start time of monitoring */
ib_time_t mon_stop_time; /*!< Stop time of monitoring */
ib_time_t mon_reset_time; /*!< Time counter resetted */
@@ -70,11 +71,9 @@ struct monitor_value_struct {
monitor_running_t mon_status; /* whether monitor still running */
};
-typedef struct monitor_value_struct monitor_value_t;
-
/** Follwoing defines are possible values for "monitor_type" field in
"struct monitor_info" */
-enum monitor_type_value {
+enum monitor_type_t {
MONITOR_NONE = 0, /*!< No monitoring */
MONITOR_MODULE = 1, /*!< This is a monitor module type,
not a counter */
@@ -97,8 +96,6 @@ enum monitor_type_value {
metrics table */
};
-typedef enum monitor_type_value monitor_type_t;
-
/** Counter minimum value is initialized to be max value of
mon_type_t (ib_int64_t) */
#define MIN_RESERVED ((mon_type_t) (IB_ULONGLONG_MAX >> 1))
@@ -117,7 +114,7 @@ name shall start with MONITOR_OVLD
Please refer to "innodb_counter_info" in srv/srv0mon.cc for detail
information for each monitor counter */
-enum monitor_id_value {
+enum monitor_id_t {
/* This is to identify the default value set by the metrics
control global variables */
MONITOR_DEFAULT_START = 0,
@@ -154,14 +151,15 @@ enum monitor_id_value {
MONITOR_OVLD_BUF_POOL_READS,
MONITOR_OVLD_BUF_POOL_READ_REQUESTS,
MONITOR_OVLD_BUF_POOL_WRITE_REQUEST,
- MONITOR_PAGE_INFLUSH,
MONITOR_OVLD_BUF_POOL_WAIT_FREE,
MONITOR_OVLD_BUF_POOL_READ_AHEAD,
MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED,
MONITOR_OVLD_BUF_POOL_PAGE_TOTAL,
MONITOR_OVLD_BUF_POOL_PAGE_MISC,
MONITOR_OVLD_BUF_POOL_PAGES_DATA,
+ MONITOR_OVLD_BUF_POOL_BYTES_DATA,
MONITOR_OVLD_BUF_POOL_PAGES_DIRTY,
+ MONITOR_OVLD_BUF_POOL_BYTES_DIRTY,
MONITOR_OVLD_BUF_POOL_PAGES_FREE,
MONITOR_OVLD_PAGE_CREATED,
MONITOR_OVLD_PAGES_WRITTEN,
@@ -177,15 +175,15 @@ enum monitor_id_value {
MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
MONITOR_FLUSH_NEIGHBOR_COUNT,
MONITOR_FLUSH_NEIGHBOR_PAGES,
- MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE,
- MONITOR_FLUSH_MAX_DIRTY_COUNT,
- MONITOR_FLUSH_MAX_DIRTY_PAGES,
+ MONITOR_FLUSH_N_TO_FLUSH_REQUESTED,
+ MONITOR_FLUSH_AVG_PAGE_RATE,
+ MONITOR_FLUSH_LSN_AVG_RATE,
+ MONITOR_FLUSH_PCT_FOR_DIRTY,
+ MONITOR_FLUSH_PCT_FOR_LSN,
+ MONITOR_FLUSH_SYNC_WAITS,
MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
MONITOR_FLUSH_ADAPTIVE_COUNT,
MONITOR_FLUSH_ADAPTIVE_PAGES,
- MONITOR_FLUSH_ASYNC_TOTAL_PAGE,
- MONITOR_FLUSH_ASYNC_COUNT,
- MONITOR_FLUSH_ASYNC_PAGES,
MONITOR_FLUSH_SYNC_TOTAL_PAGE,
MONITOR_FLUSH_SYNC_COUNT,
MONITOR_FLUSH_SYNC_PAGES,
@@ -303,6 +301,8 @@ enum monitor_id_value {
MONITOR_MODULE_PAGE,
MONITOR_PAGE_COMPRESS,
MONITOR_PAGE_DECOMPRESS,
+ MONITOR_PAD_INCREMENTS,
+ MONITOR_PAD_DECREMENTS,
/* Index related counters */
MONITOR_MODULE_INDEX,
@@ -367,7 +367,10 @@ enum monitor_id_value {
/* Data DDL related counters */
MONITOR_MODULE_DDL_STATS,
+ MONITOR_BACKGROUND_DROP_INDEX,
MONITOR_BACKGROUND_DROP_TABLE,
+ MONITOR_ONLINE_CREATE_INDEX,
+ MONITOR_PENDING_ALTER_TABLE,
MONITOR_MODULE_ICP,
MONITOR_ICP_ATTEMPTS,
@@ -383,8 +386,6 @@ enum monitor_id_value {
NUM_MONITOR
};
-typedef enum monitor_id_value monitor_id_t;
-
/** This informs the monitor control system to turn
on/off and reset monitor counters through wild card match */
#define MONITOR_WILDCARD_MATCH (NUM_MONITOR + 1)
@@ -394,7 +395,7 @@ on/off and reset monitor counters through wild card match */
/** struct monitor_info describes the basic/static information
about each monitor counter. */
-struct monitor_info_struct {
+struct monitor_info_t {
const char* monitor_name; /*!< Monitor name */
const char* monitor_module; /*!< Sub Module the monitor
belongs to */
@@ -408,12 +409,10 @@ struct monitor_info_struct {
monitor_id_t */
};
-typedef struct monitor_info_struct monitor_info_t;
-
/** Following are the "set_option" values allowed for
srv_mon_process_existing_counter() and srv_mon_process_existing_counter()
functions. To turn on/off/reset the monitor counters. */
-enum mon_set_option {
+enum mon_option_t {
MONITOR_TURN_ON = 1, /*!< Turn on the counter */
MONITOR_TURN_OFF, /*!< Turn off the counter */
MONITOR_RESET_VALUE, /*!< Reset current values */
@@ -423,8 +422,6 @@ enum mon_set_option {
function */
};
-typedef enum mon_set_option mon_option_t;
-
/** Number of bit in a ulint datatype */
#define NUM_BITS_ULINT (sizeof(ulint) * CHAR_BIT)
@@ -533,8 +530,37 @@ on the counters */
} \
}
-#ifdef HAVE_ATOMIC_BUILTINS
+/** Increment a monitor counter under mutex protection.
+Use MONITOR_INC if appropriate mutex protection already exists.
+@param monitor monitor to be incremented by 1
+@param mutex mutex to acquire and relese */
+# define MONITOR_MUTEX_INC(mutex, monitor) \
+ ut_ad(!mutex_own(mutex)); \
+ if (MONITOR_IS_ON(monitor)) { \
+ mutex_enter(mutex); \
+ if (++MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
+ MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor); \
+ } \
+ mutex_exit(mutex); \
+ }
+/** Decrement a monitor counter under mutex protection.
+Use MONITOR_DEC if appropriate mutex protection already exists.
+@param monitor monitor to be decremented by 1
+@param mutex mutex to acquire and relese */
+# define MONITOR_MUTEX_DEC(mutex, monitor) \
+ ut_ad(!mutex_own(mutex)); \
+ if (MONITOR_IS_ON(monitor)) { \
+ mutex_enter(mutex); \
+ if (--MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
+ MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor); \
+ } \
+ mutex_exit(mutex); \
+ }
+#if defined HAVE_ATOMIC_BUILTINS_64
+/** Atomically increment a monitor counter.
+Use MONITOR_INC if appropriate mutex protection exists.
+@param monitor monitor to be incremented by 1 */
# define MONITOR_ATOMIC_INC(monitor) \
if (MONITOR_IS_ON(monitor)) { \
ib_uint64_t value; \
@@ -547,10 +573,13 @@ on the counters */
} \
}
+/** Atomically decrement a monitor counter.
+Use MONITOR_DEC if appropriate mutex protection exists.
+@param monitor monitor to be decremented by 1 */
# define MONITOR_ATOMIC_DEC(monitor) \
if (MONITOR_IS_ON(monitor)) { \
ib_uint64_t value; \
- value = os_atomic_decrement_ulint( \
+ value = os_atomic_decrement_uint64( \
(ib_uint64_t*) &MONITOR_VALUE(monitor), 1); \
/* Note: This is not 100% accurate because of the \
inherent race, we ignore it due to performance. */ \
@@ -558,7 +587,34 @@ on the counters */
MONITOR_MIN_VALUE(monitor) = value; \
} \
}
-#endif /* HAVE_ATOMIC_BUILTINS */
+# define srv_mon_create() ((void) 0)
+# define srv_mon_free() ((void) 0)
+#else /* HAVE_ATOMIC_BUILTINS_64 */
+/** Mutex protecting atomic operations on platforms that lack
+built-in operations for atomic memory access */
+extern ib_mutex_t monitor_mutex;
+/****************************************************************//**
+Initialize the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_create(void);
+/*================*/
+/****************************************************************//**
+Close the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_free(void);
+/*==============*/
+
+/** Atomically increment a monitor counter.
+Use MONITOR_INC if appropriate mutex protection exists.
+@param monitor monitor to be incremented by 1 */
+# define MONITOR_ATOMIC_INC(monitor) MONITOR_MUTEX_INC(&monitor_mutex, monitor)
+/** Atomically decrement a monitor counter.
+Use MONITOR_DEC if appropriate mutex protection exists.
+@param monitor monitor to be decremented by 1 */
+# define MONITOR_ATOMIC_DEC(monitor) MONITOR_MUTEX_DEC(&monitor_mutex, monitor)
+#endif /* HAVE_ATOMIC_BUILTINS_64 */
#define MONITOR_DEC(monitor) \
if (MONITOR_IS_ON(monitor)) { \
@@ -568,7 +624,17 @@ on the counters */
} \
}
+#ifdef UNIV_DEBUG_VALGRIND
+# define MONITOR_CHECK_DEFINED(value) do { \
+ mon_type_t m = value; \
+ UNIV_MEM_ASSERT_RW(&m, sizeof m); \
+} while (0)
+#else /* UNIV_DEBUG_VALGRIND */
+# define MONITOR_CHECK_DEFINED(value) (void) 0
+#endif /* UNIV_DEBUG_VALGRIND */
+
#define MONITOR_INC_VALUE(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
if (MONITOR_IS_ON(monitor)) { \
MONITOR_VALUE(monitor) += (mon_type_t) (value); \
if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
@@ -577,6 +643,7 @@ on the counters */
}
#define MONITOR_DEC_VALUE(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
if (MONITOR_IS_ON(monitor)) { \
ut_ad(MONITOR_VALUE(monitor) >= (mon_type_t) (value); \
MONITOR_VALUE(monitor) -= (mon_type_t) (value); \
@@ -605,6 +672,7 @@ could already be checked as a module group */
/** Directly set a monitor counter's value */
#define MONITOR_SET(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
if (MONITOR_IS_ON(monitor)) { \
MONITOR_VALUE(monitor) = (mon_type_t) (value); \
if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
@@ -617,9 +685,10 @@ could already be checked as a module group */
/** Add time difference between now and input "value" (in seconds) to the
monitor counter
-@monitor monitor to update for the time difference
-@value the start time value */
+@param monitor monitor to update for the time difference
+@param value the start time value */
#define MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
if (MONITOR_IS_ON(monitor)) { \
ullint old_time = (value); \
value = ut_time_us(NULL); \
@@ -629,15 +698,16 @@ monitor counter
/** This macro updates 3 counters in one call. However, it only checks the
main/first monitor counter 'monitor', to see it is on or off to decide
whether to do the update.
-@monitor the main monitor counter to update. It accounts for
+@param monitor the main monitor counter to update. It accounts for
the accumulative value for the counter.
-@monitor_n_calls counter that counts number of times this macro is
+@param monitor_n_calls counter that counts number of times this macro is
called
-@monitor_per_call counter that records the current and max value of
+@param monitor_per_call counter that records the current and max value of
each incremental value
-@value incremental value to record this time */
+@param value incremental value to record this time */
#define MONITOR_INC_VALUE_CUMULATIVE( \
monitor, monitor_n_calls, monitor_per_call, value) \
+ MONITOR_CHECK_DEFINED(value); \
if (MONITOR_IS_ON(monitor)) { \
MONITOR_VALUE(monitor_n_calls)++; \
MONITOR_VALUE(monitor_per_call) = (mon_type_t) (value); \
@@ -655,6 +725,7 @@ whether to do the update.
/** Directly set a monitor counter's value, and if the value
is monotonically increasing, only max value needs to be updated */
#define MONITOR_SET_UPD_MAX_ONLY(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
if (MONITOR_IS_ON(monitor)) { \
MONITOR_VALUE(monitor) = (mon_type_t) (value); \
if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
@@ -665,6 +736,7 @@ is monotonically increasing, only max value needs to be updated */
/** Some values such as log sequence number are montomically increasing
number, do not need to record max/min values */
#define MONITOR_SET_SIMPLE(monitor, value) \
+ MONITOR_CHECK_DEFINED(value); \
if (MONITOR_IS_ON(monitor)) { \
MONITOR_VALUE(monitor) = (mon_type_t) (value); \
}
@@ -693,9 +765,11 @@ consolidate information from existing system status variables. */
/** Save the passed-in value to mon_start_value field of monitor
counters */
-#define MONITOR_SAVE_START(monitor, value) \
+#define MONITOR_SAVE_START(monitor, value) do { \
+ MONITOR_CHECK_DEFINED(value); \
(MONITOR_START_VALUE(monitor) = \
- (mon_type_t) (value) - MONITOR_VALUE_RESET(monitor))
+ (mon_type_t) (value) - MONITOR_VALUE_RESET(monitor)); \
+ } while (0)
/** Save the passed-in value to mon_last_value field of monitor
counters */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 99cff251e3c..201f19c0cd8 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -50,22 +50,91 @@ Created 10/10/1995 Heikki Tuuri
#include "trx0types.h"
#include "srv0conc.h"
#include "buf0checksum.h"
+#include "ut0counter.h"
+
+/* Global counters used inside InnoDB. */
+struct srv_stats_t {
+ typedef ib_counter_t<lsn_t, 1, single_indexer_t> lsn_ctr_1_t;
+ typedef ib_counter_t<ulint, 1, single_indexer_t> ulint_ctr_1_t;
+ typedef ib_counter_t<lint, 1, single_indexer_t> lint_ctr_1_t;
+ typedef ib_counter_t<ulint, 64> ulint_ctr_64_t;
+ typedef ib_counter_t<ib_int64_t, 1, single_indexer_t> ib_int64_ctr_1_t;
+
+ /** Count the amount of data written in total (in bytes) */
+ ulint_ctr_1_t data_written;
+
+ /** Number of the log write requests done */
+ ulint_ctr_1_t log_write_requests;
+
+ /** Number of physical writes to the log performed */
+ ulint_ctr_1_t log_writes;
+
+ /** Amount of data written to the log files in bytes */
+ lsn_ctr_1_t os_log_written;
+
+ /** Number of writes being done to the log files */
+ lint_ctr_1_t os_log_pending_writes;
+
+ /** We increase this counter, when we don't have enough
+ space in the log buffer and have to flush it */
+ ulint_ctr_1_t log_waits;
+
+ /** Count the number of times the doublewrite buffer was flushed */
+ ulint_ctr_1_t dblwr_writes;
+
+ /** Store the number of pages that have been flushed to the
+ doublewrite buffer */
+ ulint_ctr_1_t dblwr_pages_written;
+
+ /** Store the number of write requests issued */
+ ulint_ctr_1_t buf_pool_write_requests;
+
+ /** Store the number of times when we had to wait for a free page
+ in the buffer pool. It happens when the buffer pool is full and we
+ need to make a flush, in order to be able to read or create a page. */
+ ulint_ctr_1_t buf_pool_wait_free;
+
+ /** Count the number of pages that were written from buffer
+ pool to the disk */
+ ulint_ctr_1_t buf_pool_flushed;
+
+ /** Number of buffer pool reads that led to the reading of
+ a disk page */
+ ulint_ctr_1_t buf_pool_reads;
+
+ /** Number of data read in total (in bytes) */
+ ulint_ctr_1_t data_read;
+
+ /** Wait time of database locks */
+ ib_int64_ctr_1_t n_lock_wait_time;
+
+ /** Number of database lock waits */
+ ulint_ctr_1_t n_lock_wait_count;
+
+ /** Number of threads currently waiting on database locks */
+ lint_ctr_1_t n_lock_wait_current_count;
+
+ /** Number of rows read. */
+ ulint_ctr_64_t n_rows_read;
+
+ /** Number of rows updated */
+ ulint_ctr_64_t n_rows_updated;
+
+ /** Number of rows deleted */
+ ulint_ctr_64_t n_rows_deleted;
+
+ /** Number of rows inserted */
+ ulint_ctr_64_t n_rows_inserted;
+};
extern const char* srv_main_thread_op_info;
/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
extern const char srv_mysql50_table_name_prefix[10];
-/* When this event is set the lock timeout and InnoDB monitor
-thread starts running */
-extern os_event_t srv_lock_timeout_thread_event;
-
/* The monitor thread waits on this event. */
extern os_event_t srv_monitor_event;
-/* The lock timeout thread waits on this event. */
-extern os_event_t srv_timeout_event;
-
/* The error monitor thread waits on this event. */
extern os_event_t srv_error_event;
@@ -89,20 +158,20 @@ at a time */
#define SRV_AUTO_EXTEND_INCREMENT \
(srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
-/* Mutex for locking srv_monitor_file */
-extern mutex_t srv_monitor_file_mutex;
+/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
+extern ib_mutex_t srv_monitor_file_mutex;
/* Temporary file for innodb monitor output */
extern FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
+/* Mutex for locking srv_dict_tmpfile. Only created if !srv_read_only_mode.
This mutex has a very high rank; threads reserving it should not
be holding any InnoDB latches. */
-extern mutex_t srv_dict_tmpfile_mutex;
+extern ib_mutex_t srv_dict_tmpfile_mutex;
/* Temporary file for output from the data dictionary */
extern FILE* srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
+/* Mutex for locking srv_misc_tmpfile. Only created if !srv_read_only_mode.
This mutex has a very low rank; threads reserving it should not
acquire any further latches or sleep before releasing this one. */
-extern mutex_t srv_misc_tmpfile_mutex;
+extern ib_mutex_t srv_misc_tmpfile_mutex;
/* Temporary file for miscellanous diagnostic output */
extern FILE* srv_misc_tmpfile;
@@ -114,6 +183,10 @@ extern char* srv_data_home;
extern char* srv_arch_dir;
#endif /* UNIV_LOG_ARCHIVE */
+/** Set if InnoDB must operate in read-only mode. We don't do any
+recovery and open all tables in RO mode instead of RW mode. We don't
+sync the max trx id to disk either. */
+extern my_bool srv_read_only_mode;
/** store to its own file each table created by an user; data
dictionary tables are in the system tablespace 0 */
extern my_bool srv_file_per_table;
@@ -134,8 +207,10 @@ extern ulint srv_max_file_format_at_startup;
on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
-/* Variable specifying the FTS parallel sort buffer size */
+/** Sort buffer size in index creation */
extern ulong srv_sort_buf_size;
+/** Maximum modification log file size for online index creation */
+extern unsigned long long srv_online_max_size;
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
@@ -153,6 +228,9 @@ extern char* srv_undo_dir;
/** Number of undo tablespaces to use. */
extern ulong srv_undo_tablespaces;
+/** The number of UNDO tablespaces that are open and ready to use. */
+extern ulint srv_undo_tablespaces_open;
+
/* The number of undo segments to use */
extern ulong srv_undo_logs;
@@ -163,17 +241,20 @@ extern ulint* srv_data_file_is_raw_partition;
extern ibool srv_auto_extend_last_data_file;
extern ulint srv_last_file_size_max;
-extern char** srv_log_group_home_dirs;
+extern char* srv_log_group_home_dir;
#ifndef UNIV_HOTBACKUP
extern ulong srv_auto_extend_increment;
extern ibool srv_created_new_raw;
-extern ulint srv_n_log_groups;
-extern ulint srv_n_log_files;
+/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
+#define SRV_N_LOG_FILES_MAX 100
+extern ulong srv_n_log_files;
extern ib_uint64_t srv_log_file_size;
+extern ib_uint64_t srv_log_file_size_requested;
extern ulint srv_log_buffer_size;
extern ulong srv_flush_log_at_trx_commit;
+extern uint srv_flush_log_at_timeout;
extern char srv_adaptive_flushing;
/* If this flag is TRUE, then we will load the indexes' (and tables') metadata
@@ -195,7 +276,7 @@ extern ulong srv_n_page_hash_locks; /*!< number of locks to
protect buf_pool->page_hash */
extern ulong srv_LRU_scan_depth; /*!< Scan depth for LRU
flush batch */
-extern my_bool srv_flush_neighbors; /*!< whether or not to flush
+extern ulong srv_flush_neighbors; /*!< whether or not to flush
neighbors of a block */
extern ulint srv_buf_pool_old_size; /*!< previously requested size */
extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */
@@ -210,6 +291,12 @@ extern ulint srv_n_write_io_threads;
/* Number of IO operations per second the server can do */
extern ulong srv_io_capacity;
+
+/* We use this dummy default value at startup for max_io_capacity.
+The real value is set based on the value of io_capacity. */
+#define SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT (~0UL)
+#define SRV_MAX_IO_CAPACITY_LIMIT (~0UL)
+extern ulong srv_max_io_capacity;
/* Returns the number of IO operations that is X percent of the
capacity. PCT_IO(5) -> returns the number of IO operations that
is 5% of the max where max is srv_io_capacity. */
@@ -232,9 +319,16 @@ extern ulint srv_win_file_flush_method;
extern ulint srv_max_n_open_files;
-extern ulint srv_max_dirty_pages_pct;
+extern ulong srv_max_dirty_pages_pct;
+extern ulong srv_max_dirty_pages_pct_lwm;
+
+extern ulong srv_adaptive_flushing_lwm;
+extern ulong srv_flushing_avg_loops;
-extern ulint srv_force_recovery;
+extern ulong srv_force_recovery;
+#ifndef DBUG_OFF
+extern ulong srv_force_recovery_crash;
+#endif /* !DBUG_OFF */
extern ulint srv_fast_shutdown; /*!< If this is 1, do not do a
purge and index buffer merge.
@@ -246,7 +340,9 @@ extern ulint srv_fast_shutdown; /*!< If this is 1, do not do a
extern ibool srv_innodb_status;
extern unsigned long long srv_stats_transient_sample_pages;
+extern my_bool srv_stats_persistent;
extern unsigned long long srv_stats_persistent_sample_pages;
+extern my_bool srv_stats_auto_recalc;
extern ibool srv_use_doublewrite_buf;
extern ulong srv_doublewrite_batch_size;
@@ -259,11 +355,6 @@ extern ulong srv_max_purge_lag_delay;
extern ulong srv_replication_delay;
/*-------------------------------------------*/
-extern ulint srv_n_rows_inserted;
-extern ulint srv_n_rows_updated;
-extern ulint srv_n_rows_deleted;
-extern ulint srv_n_rows_read;
-
extern ibool srv_print_innodb_monitor;
extern ibool srv_print_innodb_lock_monitor;
extern ibool srv_print_innodb_tablespace_monitor;
@@ -274,21 +365,21 @@ extern ibool srv_print_verbose_log;
"tables instead, see " REFMAN "innodb-i_s-tables.html"
extern ibool srv_print_innodb_table_monitor;
-extern ibool srv_lock_timeout_active;
extern ibool srv_monitor_active;
extern ibool srv_error_monitor_active;
/* TRUE during the lifetime of the buffer pool dump/load thread */
extern ibool srv_buf_dump_thread_active;
+/* TRUE during the lifetime of the stats thread */
+extern ibool srv_dict_stats_thread_active;
+
extern ulong srv_n_spin_wait_rounds;
extern ulong srv_n_free_tickets_to_enter;
extern ulong srv_thread_sleep_delay;
extern ulong srv_spin_wait_delay;
extern ibool srv_priority_boost;
-extern ulint srv_n_lock_wait_count;
-
extern ulint srv_truncated_status_writes;
extern ulint srv_available_undo_logs;
@@ -309,12 +400,21 @@ extern ibool srv_print_latch_waits;
# define srv_print_latch_waits FALSE
#endif /* UNIV_DEBUG */
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+extern my_bool srv_ibuf_disable_background_merge;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
+#ifdef UNIV_DEBUG
+extern my_bool srv_purge_view_update_only_debug;
+#endif /* UNIV_DEBUG */
+
extern ulint srv_fatal_semaphore_wait_threshold;
+#define SRV_SEMAPHORE_WAIT_EXTENSION 7200
extern ulint srv_dml_needed_delay;
#ifndef HAVE_ATOMIC_BUILTINS
/** Mutex protecting some server global variables. */
-extern mutex_t server_mutex;
+extern ib_mutex_t server_mutex;
#endif /* !HAVE_ATOMIC_BUILTINS */
#define SRV_MAX_N_IO_THREADS 130
@@ -324,22 +424,6 @@ i/o handler thread */
extern const char* srv_io_thread_op_info[];
extern const char* srv_io_thread_function[];
-/* the number of the log write requests done */
-extern ulint srv_log_write_requests;
-
-/* the number of physical writes to the log performed */
-extern ulint srv_log_writes;
-
-/* amount of data written to the log files in bytes */
-extern lsn_t srv_os_log_written;
-
-/* amount of writes being done to the log files */
-extern ulint srv_os_log_pending_writes;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-extern ulint srv_log_waits;
-
/* the number of purge threads to use from the worker pool (currently 0 or 1) */
extern ulong srv_n_purge_threads;
@@ -349,50 +433,16 @@ extern ulong srv_purge_batch_size;
/* the number of sync wait arrays */
extern ulong srv_sync_array_size;
-/* variable that counts amount of data read in total (in bytes) */
-extern ulint srv_data_read;
-
-/* here we count the amount of data written in total (in bytes) */
-extern ulint srv_data_written;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-extern ulint srv_dblwr_writes;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-extern ulint srv_dblwr_pages_written;
-
-/* in this variable we store the number of write requests issued */
-extern ulint srv_buf_pool_write_requests;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-extern ulint srv_buf_pool_wait_free;
-
-/* variable to count the number of pages that were written from the
-buffer pool to disk */
-extern ulint srv_buf_pool_flushed;
-
-/** Number of buffer pool reads that led to the
-reading of a disk page */
-extern ulint srv_buf_pool_reads;
-
/* print all user-level transactions deadlocks to mysqld stderr */
extern my_bool srv_print_all_deadlocks;
-/** Status variables to be passed to MySQL */
-typedef struct export_var_struct export_struc;
-
-/** Thread slot in the thread table */
-typedef struct srv_slot_struct srv_slot_t;
-
-/** Thread table is an array of slots */
-typedef srv_slot_t srv_table_t;
+extern my_bool srv_cmp_per_index_enabled;
/** Status variables to be passed to MySQL */
-extern export_struc export_vars;
+extern struct export_var_t export_vars;
+
+/** Global counters */
+extern srv_stats_t srv_stats;
# ifdef UNIV_PFS_THREAD
/* Keys to register InnoDB threads with performance schema */
@@ -404,19 +454,20 @@ extern mysql_pfs_key_t srv_error_monitor_thread_key;
extern mysql_pfs_key_t srv_monitor_thread_key;
extern mysql_pfs_key_t srv_master_thread_key;
extern mysql_pfs_key_t srv_purge_thread_key;
+extern mysql_pfs_key_t recv_writer_thread_key;
/* This macro register the current thread and its key with performance
schema */
# define pfs_register_thread(key) \
do { \
- struct PSI_thread* psi = PSI_CALL(new_thread)(key, NULL, 0);\
- PSI_CALL(set_thread)(psi); \
+ struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\
+ PSI_THREAD_CALL(set_thread)(psi); \
} while (0)
/* This macro delist the current thread from performance schema */
# define pfs_delete_thread() \
do { \
- PSI_CALL(delete_current_thread)(); \
+ PSI_THREAD_CALL(delete_current_thread)(); \
} while (0)
# endif /* UNIV_PFS_THREAD */
@@ -439,8 +490,19 @@ enum {
when writing data files, but do flush
after writing to log files */
SRV_UNIX_NOSYNC, /*!< do not flush after writing */
- SRV_UNIX_O_DIRECT /*!< invoke os_file_set_nocache() on
- data files */
+ SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
+ data files. This implies using
+ non-buffered IO but still using fsync,
+ the reason for which is that some FS
+ do not flush meta-data when
+ unbuffered IO happens */
+ SRV_UNIX_O_DIRECT_NO_FSYNC
+ /*!< do not use fsync() when using
+ direct IO i.e.: it can be set to avoid
+ the fsync() call that we make when
+ using SRV_UNIX_O_DIRECT. However, in
+ this case user/DBA should be sure about
+ the integrity of the meta-data */
};
/** Alternatives for file i/o in Windows */
@@ -499,10 +561,9 @@ enum srv_thread_type {
};
/*********************************************************************//**
-Boots Innobase server.
-@return DB_SUCCESS or error code */
+Boots Innobase server. */
UNIV_INTERN
-ulint
+void
srv_boot(void);
/*==========*/
/*********************************************************************//**
@@ -533,6 +594,12 @@ srv_set_io_thread_op_info(
ulint i, /*!< in: the 'segment' of the i/o thread */
const char* str); /*!< in: constant char string describing the
state */
+/*********************************************************************//**
+Resets the info describing an i/o thread current state. */
+UNIV_INTERN
+void
+srv_reset_io_thread_op_info();
+/*=========================*/
/*******************************************************************//**
Tells the purge thread that there has been activity in the database
and wakes up the purge thread if it is suspended (not sleeping). Note
@@ -714,7 +781,7 @@ srv_purge_wakeup(void);
/*==================*/
/** Status variables to be passed to MySQL */
-struct export_var_struct{
+struct export_var_t{
ulint innodb_data_pending_reads; /*!< Pending reads */
ulint innodb_data_pending_writes; /*!< Pending writes */
ulint innodb_data_pending_fsyncs; /*!< Pending fsyncs */
@@ -727,7 +794,9 @@ struct export_var_struct{
char innodb_buffer_pool_load_status[512];/*!< Buf pool load status */
ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
ulint innodb_buffer_pool_pages_data; /*!< Data pages */
+ ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */
ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */
+ ulint innodb_buffer_pool_bytes_dirty; /*!< File bytes modified */
ulint innodb_buffer_pool_pages_misc; /*!< Miscellanous pages */
ulint innodb_buffer_pool_pages_free; /*!< Free pages */
#ifdef UNIV_DEBUG
@@ -771,10 +840,15 @@ struct export_var_struct{
ulint innodb_num_open_files; /*!< fil_n_file_opened */
ulint innodb_truncated_status_writes; /*!< srv_truncated_status_writes */
ulint innodb_available_undo_logs; /*!< srv_available_undo_logs */
+#ifdef UNIV_DEBUG
+ ulint innodb_purge_trx_id_age; /*!< rw_max_trx_id - purged trx_id */
+ ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id
+ - purged view's min trx_id */
+#endif /* UNIV_DEBUG */
};
/** Thread slot in the thread table. */
-struct srv_slot_struct{
+struct srv_slot_t{
srv_thread_type type; /*!< thread type: user,
utility etc. */
ibool in_use; /*!< TRUE if this slot
@@ -803,6 +877,7 @@ struct srv_slot_struct{
# define srv_use_native_aio FALSE
# define srv_force_recovery 0UL
# define srv_set_io_thread_op_info(t,info) ((void) 0)
+# define srv_reset_io_thread_op_info() ((void) 0)
# define srv_is_being_started 0
# define srv_win_file_flush_method SRV_WIN_IO_UNBUFFERED
# define srv_unix_file_flush_method SRV_UNIX_O_DSYNC
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index 9d948675011..e136f30f96a 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -83,24 +83,50 @@ Starts Innobase and creates a new database if database files
are not found and the user wants.
@return DB_SUCCESS or error code */
UNIV_INTERN
-int
+dberr_t
innobase_start_or_create_for_mysql(void);
/*====================================*/
/****************************************************************//**
Shuts down the Innobase database.
@return DB_SUCCESS or error code */
UNIV_INTERN
-int
+dberr_t
innobase_shutdown_for_mysql(void);
/********************************************************************
Signal all per-table background threads to shutdown, and wait for them to do
so. */
-
+UNIV_INTERN
void
srv_shutdown_table_bg_threads(void);
-
/*=============================*/
+
+/*************************************************************//**
+Copy the file path component of the physical file to parameter. It will
+copy up to and including the terminating path separator.
+@return number of bytes copied or ULINT_UNDEFINED if destination buffer
+ is smaller than the path to be copied. */
+UNIV_INTERN
+ulint
+srv_path_copy(
+/*==========*/
+ char* dest, /*!< out: destination buffer */
+ ulint dest_len, /*!< in: max bytes to copy */
+ const char* basedir, /*!< in: base directory */
+ const char* table_name) /*!< in: source table name */
+ __attribute__((nonnull, warn_unused_result));
+
+/*****************************************************************//**
+Get the meta-data filename from the table name. */
+UNIV_INTERN
+void
+srv_get_meta_data_filename(
+/*======================*/
+ dict_table_t* table, /*!< in: table */
+ char* filename, /*!< out: filename */
+ ulint max_len) /*!< in: filename max length */
+ __attribute__((nonnull));
+
/** Log sequence number at shutdown */
extern lsn_t srv_shutdown_lsn;
/** Log sequence number immediately after startup */
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
index 56f9ff78c49..bb4d1037a62 100644
--- a/storage/innobase/include/sync0arr.h
+++ b/storage/innobase/include/sync0arr.h
@@ -32,9 +32,9 @@ Created 9/5/1995 Heikki Tuuri
#include "os0thread.h"
/** Synchronization wait array cell */
-typedef struct sync_cell_struct sync_cell_t;
+struct sync_cell_t;
/** Synchronization wait array */
-typedef struct sync_array_struct sync_array_t;
+struct sync_array_t;
/******************************************************************//**
Reserves a wait array cell for waiting for an object.
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index b0c21d0c76b..c268098d1ea 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -36,6 +36,7 @@ Created 9/11/1995 Heikki Tuuri
#include "univ.i"
#ifndef UNIV_HOTBACKUP
#include "ut0lst.h"
+#include "ut0counter.h"
#include "sync0sync.h"
#include "os0sync.h"
@@ -44,6 +45,43 @@ in MySQL: */
#undef rw_lock_t
#endif /* !UNIV_HOTBACKUP */
+/** Counters for RW locks. */
+struct rw_lock_stats_t {
+ typedef ib_counter_t<ib_int64_t, IB_N_SLOTS> ib_int64_counter_t;
+
+ /** number of spin waits on rw-latches,
+ resulted during shared (read) locks */
+ ib_int64_counter_t rw_s_spin_wait_count;
+
+ /** number of spin loop rounds on rw-latches,
+ resulted during shared (read) locks */
+ ib_int64_counter_t rw_s_spin_round_count;
+
+ /** number of OS waits on rw-latches,
+ resulted during shared (read) locks */
+ ib_int64_counter_t rw_s_os_wait_count;
+
+ /** number of unlocks (that unlock shared locks),
+ set only when UNIV_SYNC_PERF_STAT is defined */
+ ib_int64_counter_t rw_s_exit_count;
+
+ /** number of spin waits on rw-latches,
+ resulted during exclusive (write) locks */
+ ib_int64_counter_t rw_x_spin_wait_count;
+
+ /** number of spin loop rounds on rw-latches,
+ resulted during exclusive (write) locks */
+ ib_int64_counter_t rw_x_spin_round_count;
+
+ /** number of OS waits on rw-latches,
+ resulted during exclusive (write) locks */
+ ib_int64_counter_t rw_x_os_wait_count;
+
+ /** number of unlocks (that unlock exclusive locks),
+ set only when UNIV_SYNC_PERF_STAT is defined */
+ ib_int64_counter_t rw_x_exit_count;
+};
+
/* Latch types; these are used also in btr0btr.h: keep the numerical values
smaller than 30 and the order of the numerical values like below! */
#define RW_S_LATCH 1
@@ -57,22 +95,22 @@ of concurrent read locks before the rw_lock breaks. The current value of
0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
#define X_LOCK_DECR 0x00100000
-typedef struct rw_lock_struct rw_lock_t;
+struct rw_lock_t;
#ifdef UNIV_SYNC_DEBUG
-typedef struct rw_lock_debug_struct rw_lock_debug_t;
+struct rw_lock_debug_t;
#endif /* UNIV_SYNC_DEBUG */
typedef UT_LIST_BASE_NODE_T(rw_lock_t) rw_lock_list_t;
extern rw_lock_list_t rw_lock_list;
-extern mutex_t rw_lock_list_mutex;
+extern ib_mutex_t rw_lock_list_mutex;
#ifdef UNIV_SYNC_DEBUG
/* The global mutex which protects debug info lists of all rw-locks.
To modify the debug info list of an rw-lock, this mutex has to be
acquired in addition to the mutex protecting the lock. */
-extern mutex_t rw_lock_debug_mutex;
+extern ib_mutex_t rw_lock_debug_mutex;
extern os_event_t rw_lock_debug_event; /*!< If deadlock detection does
not get immediately the mutex it
may wait for this event */
@@ -80,30 +118,8 @@ extern ibool rw_lock_debug_waiters; /*!< This is set to TRUE, if
there may be waiters for the event */
#endif /* UNIV_SYNC_DEBUG */
-/** number of spin waits on rw-latches,
-resulted during exclusive (write) locks */
-extern ib_int64_t rw_s_spin_wait_count;
-/** number of spin loop rounds on rw-latches,
-resulted during exclusive (write) locks */
-extern ib_int64_t rw_s_spin_round_count;
-/** number of unlocks (that unlock shared locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-extern ib_int64_t rw_s_exit_count;
-/** number of OS waits on rw-latches,
-resulted during shared (read) locks */
-extern ib_int64_t rw_s_os_wait_count;
-/** number of spin waits on rw-latches,
-resulted during shared (read) locks */
-extern ib_int64_t rw_x_spin_wait_count;
-/** number of spin loop rounds on rw-latches,
-resulted during shared (read) locks */
-extern ib_int64_t rw_x_spin_round_count;
-/** number of OS waits on rw-latches,
-resulted during exclusive (write) locks */
-extern ib_int64_t rw_x_os_wait_count;
-/** number of unlocks (that unlock exclusive locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-extern ib_int64_t rw_x_exit_count;
+/** Counters for RW locks. */
+extern rw_lock_stats_t rw_lock_stats;
#ifdef UNIV_PFS_RWLOCK
/* Following are rwlock keys used to register with MySQL
@@ -121,10 +137,10 @@ extern mysql_pfs_key_t checkpoint_lock_key;
extern mysql_pfs_key_t fil_space_latch_key;
extern mysql_pfs_key_t fts_cache_rw_lock_key;
extern mysql_pfs_key_t fts_cache_init_rw_lock_key;
-extern mysql_pfs_key_t index_tree_rw_lock_key;
extern mysql_pfs_key_t trx_i_s_cache_lock_key;
extern mysql_pfs_key_t trx_purge_latch_key;
extern mysql_pfs_key_t index_tree_rw_lock_key;
+extern mysql_pfs_key_t index_online_log_key;
extern mysql_pfs_key_t dict_table_stats_latch_key;
extern mysql_pfs_key_t trx_sys_rw_lock_key;
extern mysql_pfs_key_t hash_table_rw_lock_key;
@@ -159,6 +175,9 @@ unlocking, not the corresponding function. */
# define rw_lock_s_lock(M) \
rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
+# define rw_lock_s_lock_inline(M, P, F, L) \
+ rw_lock_s_lock_func((M), (P), (F), (L))
+
# define rw_lock_s_lock_gen(M, P) \
rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
@@ -175,12 +194,18 @@ unlocking, not the corresponding function. */
# define rw_lock_x_lock(M) \
rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
+# define rw_lock_x_lock_inline(M, P, F, L) \
+ rw_lock_x_lock_func((M), (P), (F), (L))
+
# define rw_lock_x_lock_gen(M, P) \
rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
# define rw_lock_x_lock_nowait(M) \
rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
+# define rw_lock_x_lock_func_nowait_inline(M, F, L) \
+ rw_lock_x_lock_func_nowait((M), (F), (L))
+
# ifdef UNIV_SYNC_DEBUG
# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L)
# else
@@ -212,6 +237,9 @@ unlocking, not the corresponding function. */
# define rw_lock_s_lock(M) \
pfs_rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
+# define rw_lock_s_lock_inline(M, P, F, L) \
+ pfs_rw_lock_s_lock_func((M), (P), (F), (L))
+
# define rw_lock_s_lock_gen(M, P) \
pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
@@ -227,12 +255,18 @@ unlocking, not the corresponding function. */
# define rw_lock_x_lock(M) \
pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
+# define rw_lock_x_lock_inline(M, P, F, L) \
+ pfs_rw_lock_x_lock_func((M), (P), (F), (L))
+
# define rw_lock_x_lock_gen(M, P) \
pfs_rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
# define rw_lock_x_lock_nowait(M) \
pfs_rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
+# define rw_lock_x_lock_func_nowait_inline(M, F, L) \
+ pfs_rw_lock_x_lock_func_nowait((M), (F), (L))
+
# ifdef UNIV_SYNC_DEBUG
# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(P, L)
# else
@@ -367,30 +401,6 @@ rw_lock_x_unlock_func(
been passed to another thread to unlock */
#endif
rw_lock_t* lock); /*!< in/out: rw-lock */
-
-
-/******************************************************************//**
-Low-level function which locks an rw-lock in s-mode when we know that it
-is possible and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_s_lock_direct(
-/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line); /*!< in: line where lock requested */
-/******************************************************************//**
-Low-level function which locks an rw-lock in x-mode when we know that it
-is not locked and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_x_lock_direct(
-/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line); /*!< in: line where lock requested */
/******************************************************************//**
This function is used in the insert buffer to move the ownership of an
x-latch on a buffer frame to the current thread. The x-latch was set by
@@ -558,7 +568,7 @@ shared locks are allowed. To prevent starving of a writer blocked by
readers, a writer may queue for x-lock by decrementing lock_word: no
new readers will be let in while the thread waits for readers to
exit. */
-struct rw_lock_struct {
+struct rw_lock_t {
volatile lint lock_word;
/*!< Holds the state of the lock. */
volatile ulint waiters;/*!< 1: there are waiters */
@@ -583,7 +593,7 @@ struct rw_lock_struct {
/*!< Event for next-writer to wait on. A thread
must decrement lock_word before waiting. */
#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_t mutex; /*!< The mutex protecting rw_lock_struct */
+ ib_mutex_t mutex; /*!< The mutex protecting rw_lock_t */
#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
UT_LIST_NODE_T(rw_lock_t) list;
@@ -615,7 +625,7 @@ struct rw_lock_struct {
unsigned last_x_line:14; /*!< Line number where last time x-locked */
#ifdef UNIV_DEBUG
ulint magic_n; /*!< RW_LOCK_MAGIC_N */
-/** Value of rw_lock_struct::magic_n */
+/** Value of rw_lock_t::magic_n */
#define RW_LOCK_MAGIC_N 22643
#endif /* UNIV_DEBUG */
};
@@ -623,7 +633,7 @@ struct rw_lock_struct {
#ifdef UNIV_SYNC_DEBUG
/** The structure for storing debug info of an rw-lock. All access to this
structure must be protected by rw_lock_debug_mutex_enter(). */
-struct rw_lock_debug_struct {
+struct rw_lock_debug_t {
os_thread_id_t thread_id; /*!< The thread id of the thread which
locked the rw-lock */
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index eab89e2619e..8786ad84643 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -90,7 +90,7 @@ rw_lock_set_waiter_flag(
rw_lock_t* lock) /*!< in/out: rw-lock */
{
#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- os_compare_and_swap_ulint(&lock->waiters, 0, 1);
+ (void) os_compare_and_swap_ulint(&lock->waiters, 0, 1);
#else /* INNODB_RW_LOCKS_USE_ATOMICS */
lock->waiters = 1;
#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
@@ -107,7 +107,7 @@ rw_lock_reset_waiter_flag(
rw_lock_t* lock) /*!< in/out: rw-lock */
{
#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- os_compare_and_swap_ulint(&lock->waiters, 1, 0);
+ (void) os_compare_and_swap_ulint(&lock->waiters, 1, 0);
#else /* INNODB_RW_LOCKS_USE_ATOMICS */
lock->waiters = 0;
#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
@@ -128,7 +128,7 @@ rw_lock_get_writer(
/* return NOT_LOCKED in s-lock state, like the writer
member of the old lock implementation. */
return(RW_LOCK_NOT_LOCKED);
- } else if (((-lock_word) % X_LOCK_DECR) == 0) {
+ } else if ((lock_word == 0) || (lock_word <= -X_LOCK_DECR)) {
return(RW_LOCK_EX);
} else {
ut_ad(lock_word > -X_LOCK_DECR);
@@ -158,7 +158,7 @@ rw_lock_get_reader_count(
#ifndef INNODB_RW_LOCKS_USE_ATOMICS
UNIV_INLINE
-mutex_t*
+ib_mutex_t*
rw_lock_get_mutex(
/*==============*/
rw_lock_t* lock)
@@ -178,11 +178,10 @@ rw_lock_get_x_lock_count(
const rw_lock_t* lock) /*!< in: rw-lock */
{
lint lock_copy = lock->lock_word;
- /* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
- if (lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
+ if ((lock_copy != 0) && (lock_copy > -X_LOCK_DECR)) {
return(0);
}
- return(((-lock_copy) / X_LOCK_DECR) + 1);
+ return((lock_copy == 0) ? 1 : (2 - (lock_copy + X_LOCK_DECR)));
}
/******************************************************************//**
@@ -325,58 +324,6 @@ rw_lock_s_lock_low(
}
/******************************************************************//**
-Low-level function which locks an rw-lock in s-mode when we know that it
-is possible and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_s_lock_direct(
-/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line) /*!< in: line where lock requested */
-{
- ut_ad(lock->lock_word == X_LOCK_DECR);
-
- /* Indicate there is a new reader by decrementing lock_word */
- lock->lock_word--;
-
- lock->last_s_file_name = file_name;
- lock->last_s_line = line;
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line);
-#endif
-}
-
-/******************************************************************//**
-Low-level function which locks an rw-lock in x-mode when we know that it
-is not locked and none else is currently accessing the rw-lock structure.
-Then we can do the locking without reserving the mutex. */
-UNIV_INLINE
-void
-rw_lock_x_lock_direct(
-/*==================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- const char* file_name, /*!< in: file name where requested */
- ulint line) /*!< in: line where lock requested */
-{
- ut_ad(rw_lock_validate(lock));
- ut_ad(lock->lock_word == X_LOCK_DECR);
-
- lock->lock_word -= X_LOCK_DECR;
- lock->writer_thread = os_thread_get_curr_id();
- lock->recursive = TRUE;
-
- lock->last_x_file_name = file_name;
- lock->last_x_line = line;
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
-}
-
-/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in shared mode for the current thread. If the rw-lock is locked
in exclusive mode, or there is an exclusive lock request waiting, the
@@ -458,10 +405,11 @@ rw_lock_x_lock_func_nowait(
/* Relock: this lock_word modification is safe since no other
threads can modify (lock, unlock, or reserve) lock_word while
there is an exclusive writer and this is the writer thread. */
- lock->lock_word -= X_LOCK_DECR;
-
- /* Recursive x-locks must be multiples of X_LOCK_DECR. */
- ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0);
+ if (lock->lock_word == 0) {
+ lock->lock_word = -X_LOCK_DECR;
+ } else {
+ lock->lock_word--;
+ }
/* Watch for too many recursive locks */
ut_ad(lock->lock_word < 0);
@@ -494,7 +442,9 @@ rw_lock_s_unlock_func(
#endif
rw_lock_t* lock) /*!< in/out: rw-lock */
{
- ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
+ ut_ad(lock->lock_word > -X_LOCK_DECR);
+ ut_ad(lock->lock_word != 0);
+ ut_ad(lock->lock_word < X_LOCK_DECR);
#ifdef UNIV_SYNC_DEBUG
rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
@@ -530,7 +480,7 @@ rw_lock_x_unlock_func(
#endif
rw_lock_t* lock) /*!< in/out: rw-lock */
{
- ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
+ ut_ad(lock->lock_word == 0 || lock->lock_word <= -X_LOCK_DECR);
/* lock->recursive flag also indicates if lock->writer_thread is
valid or stale. If we are the last of the recursive callers
@@ -541,15 +491,23 @@ rw_lock_x_unlock_func(
if (lock->lock_word == 0) {
/* Last caller in a possible recursive chain. */
lock->recursive = FALSE;
- UNIV_MEM_INVALID(&lock->writer_thread,
- sizeof lock->writer_thread);
}
#ifdef UNIV_SYNC_DEBUG
rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
#endif
- if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
+ ulint x_lock_incr;
+ if (lock->lock_word == 0) {
+ x_lock_incr = X_LOCK_DECR;
+ } else if (lock->lock_word == -X_LOCK_DECR) {
+ x_lock_incr = X_LOCK_DECR;
+ } else {
+ ut_ad(lock->lock_word < -X_LOCK_DECR);
+ x_lock_incr = 1;
+ }
+
+ if (rw_lock_lock_word_incr(lock, x_lock_incr) == X_LOCK_DECR) {
/* Lock is now free. May have to signal read/write waiters.
We do not need to signal wait_ex waiters, since they cannot
exist when there is a writer. */
@@ -590,7 +548,7 @@ pfs_rw_lock_create_func(
ulint cline) /*!< in: file line where created */
{
/* Initialize the rwlock for performance schema */
- lock->pfs_psi = PSI_CALL(init_rwlock)(key, lock);
+ lock->pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock);
/* The actual function to initialize an rwlock */
rw_lock_create_func(lock,
@@ -623,13 +581,13 @@ pfs_rw_lock_x_lock_func(
PSI_rwlock_locker_state state;
/* Record the entry of rw x lock request in performance schema */
- locker = PSI_CALL(start_rwlock_wrwait)(
+ locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
&state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK, file_name, line);
rw_lock_x_lock_func(lock, pass, file_name, line);
if (locker != NULL)
- PSI_CALL(end_rwlock_wrwait)(locker, 0);
+ PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
}
else
{
@@ -659,13 +617,13 @@ pfs_rw_lock_x_lock_func_nowait(
PSI_rwlock_locker_state state;
/* Record the entry of rw x lock request in performance schema */
- locker = PSI_CALL(start_rwlock_wrwait)(
+ locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
&state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK, file_name, line);
ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
if (locker != NULL)
- PSI_CALL(end_rwlock_wrwait)(locker, ret);
+ PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, ret);
}
else
{
@@ -686,7 +644,7 @@ pfs_rw_lock_free_func(
{
if (lock->pfs_psi != NULL)
{
- PSI_CALL(destroy_rwlock)(lock->pfs_psi);
+ PSI_RWLOCK_CALL(destroy_rwlock)(lock->pfs_psi);
lock->pfs_psi = NULL;
}
@@ -714,13 +672,13 @@ pfs_rw_lock_s_lock_func(
PSI_rwlock_locker_state state;
/* Instrumented to inform we are aquiring a shared rwlock */
- locker = PSI_CALL(start_rwlock_rdwait)(
+ locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
&state, lock->pfs_psi, PSI_RWLOCK_READLOCK, file_name, line);
rw_lock_s_lock_func(lock, pass, file_name, line);
if (locker != NULL)
- PSI_CALL(end_rwlock_rdwait)(locker, 0);
+ PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
}
else
{
@@ -753,13 +711,13 @@ pfs_rw_lock_s_lock_low(
PSI_rwlock_locker_state state;
/* Instrumented to inform we are aquiring a shared rwlock */
- locker = PSI_CALL(start_rwlock_rdwait)(
+ locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
&state, lock->pfs_psi, PSI_RWLOCK_READLOCK, file_name, line);
ret = rw_lock_s_lock_low(lock, pass, file_name, line);
if (locker != NULL)
- PSI_CALL(end_rwlock_rdwait)(locker, ret);
+ PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, ret);
}
else
{
@@ -786,7 +744,7 @@ pfs_rw_lock_x_unlock_func(
{
/* Inform performance schema we are unlocking the lock */
if (lock->pfs_psi != NULL)
- PSI_CALL(unlock_rwlock)(lock->pfs_psi);
+ PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
rw_lock_x_unlock_func(
#ifdef UNIV_SYNC_DEBUG
@@ -812,7 +770,7 @@ pfs_rw_lock_s_unlock_func(
{
/* Inform performance schema we are unlocking the lock */
if (lock->pfs_psi != NULL)
- PSI_CALL(unlock_rwlock)(lock->pfs_psi);
+ PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
rw_lock_s_unlock_func(
#ifdef UNIV_SYNC_DEBUG
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index 1adcf938903..9950a6fbf6b 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -1,7 +1,8 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
+Copyright (c) 2012, Facebook Inc.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -95,6 +96,7 @@ extern mysql_pfs_key_t mem_pool_mutex_key;
extern mysql_pfs_key_t mutex_list_mutex_key;
extern mysql_pfs_key_t purge_sys_bh_mutex_key;
extern mysql_pfs_key_t recv_sys_mutex_key;
+extern mysql_pfs_key_t recv_writer_mutex_key;
extern mysql_pfs_key_t rseg_mutex_key;
# ifdef UNIV_SYNC_DEBUG
extern mysql_pfs_key_t rw_lock_debug_mutex_key;
@@ -120,9 +122,13 @@ extern mysql_pfs_key_t srv_sys_tasks_mutex_key;
#ifndef HAVE_ATOMIC_BUILTINS
extern mysql_pfs_key_t srv_conc_mutex_key;
#endif /* !HAVE_ATOMIC_BUILTINS */
+#ifndef HAVE_ATOMIC_BUILTINS_64
+extern mysql_pfs_key_t monitor_mutex_key;
+#endif /* !HAVE_ATOMIC_BUILTINS_64 */
extern mysql_pfs_key_t event_os_mutex_key;
extern mysql_pfs_key_t ut_list_mutex_key;
extern mysql_pfs_key_t os_mutex_key;
+extern mysql_pfs_key_t zip_pad_mutex_key;
#endif /* UNIV_PFS_MUTEX */
/******************************************************************//**
@@ -223,7 +229,7 @@ UNIV_INTERN
void
mutex_create_func(
/*==============*/
- mutex_t* mutex, /*!< in: pointer to memory */
+ ib_mutex_t* mutex, /*!< in: pointer to memory */
#ifdef UNIV_DEBUG
const char* cmutex_name, /*!< in: mutex name */
# ifdef UNIV_SYNC_DEBUG
@@ -242,7 +248,7 @@ UNIV_INTERN
void
mutex_free_func(
/*============*/
- mutex_t* mutex); /*!< in: mutex */
+ ib_mutex_t* mutex); /*!< in: mutex */
/**************************************************************//**
NOTE! The following macro should be used in mutex locking, not the
corresponding function. */
@@ -259,7 +265,7 @@ UNIV_INLINE
void
mutex_enter_func(
/*=============*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line); /*!< in: line where locked */
/********************************************************************//**
@@ -271,7 +277,7 @@ UNIV_INTERN
ulint
mutex_enter_nowait_func(
/*====================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where mutex
requested */
ulint line); /*!< in: line where requested */
@@ -282,7 +288,7 @@ UNIV_INLINE
void
mutex_exit_func(
/*============*/
- mutex_t* mutex); /*!< in: pointer to mutex */
+ ib_mutex_t* mutex); /*!< in: pointer to mutex */
#ifdef UNIV_PFS_MUTEX
@@ -297,7 +303,7 @@ void
pfs_mutex_create_func(
/*==================*/
PSI_mutex_key key, /*!< in: Performance Schema key */
- mutex_t* mutex, /*!< in: pointer to memory */
+ ib_mutex_t* mutex, /*!< in: pointer to memory */
# ifdef UNIV_DEBUG
const char* cmutex_name, /*!< in: mutex name */
# ifdef UNIV_SYNC_DEBUG
@@ -315,7 +321,7 @@ UNIV_INLINE
void
pfs_mutex_enter_func(
/*=================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line); /*!< in: line where locked */
/********************************************************************//**
@@ -328,7 +334,7 @@ UNIV_INLINE
ulint
pfs_mutex_enter_nowait_func(
/*========================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where mutex
requested */
ulint line); /*!< in: line where requested */
@@ -341,7 +347,7 @@ UNIV_INLINE
void
pfs_mutex_exit_func(
/*================*/
- mutex_t* mutex); /*!< in: pointer to mutex */
+ ib_mutex_t* mutex); /*!< in: pointer to mutex */
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_free(), not directly
@@ -352,7 +358,7 @@ UNIV_INLINE
void
pfs_mutex_free_func(
/*================*/
- mutex_t* mutex); /*!< in: mutex */
+ ib_mutex_t* mutex); /*!< in: mutex */
#endif /* UNIV_PFS_MUTEX */
@@ -390,7 +396,7 @@ UNIV_INTERN
ibool
mutex_validate(
/*===========*/
- const mutex_t* mutex); /*!< in: mutex */
+ const ib_mutex_t* mutex); /*!< in: mutex */
/******************************************************************//**
Checks that the current thread owns the mutex. Works only
in the debug version.
@@ -399,7 +405,7 @@ UNIV_INTERN
ibool
mutex_own(
/*======*/
- const mutex_t* mutex) /*!< in: mutex */
+ const ib_mutex_t* mutex) /*!< in: mutex */
__attribute__((warn_unused_result));
#endif /* UNIV_DEBUG */
#ifdef UNIV_SYNC_DEBUG
@@ -470,7 +476,7 @@ UNIV_INTERN
void
mutex_get_debug_info(
/*=================*/
- mutex_t* mutex, /*!< in: mutex */
+ ib_mutex_t* mutex, /*!< in: mutex */
const char** file_name, /*!< out: file where requested */
ulint* line, /*!< out: line where requested */
os_thread_id_t* thread_id); /*!< out: id of the thread which owns
@@ -490,7 +496,7 @@ UNIV_INLINE
lock_word_t
mutex_get_lock_word(
/*================*/
- const mutex_t* mutex); /*!< in: mutex */
+ const ib_mutex_t* mutex); /*!< in: mutex */
#ifdef UNIV_SYNC_DEBUG
/******************************************************************//**
NOT to be used outside this module except in debugging! Gets the waiters
@@ -500,7 +506,7 @@ UNIV_INLINE
ulint
mutex_get_waiters(
/*==============*/
- const mutex_t* mutex); /*!< in: mutex */
+ const ib_mutex_t* mutex); /*!< in: mutex */
#endif /* UNIV_SYNC_DEBUG */
/*
@@ -662,6 +668,7 @@ or row lock! */
#define SYNC_FTS_CACHE 1005 /* FTS cache rwlock */
#define SYNC_DICT 1000
#define SYNC_DICT_AUTOINC_MUTEX 999
+#define SYNC_STATS_AUTO_RECALC 997
#define SYNC_DICT_HEADER 995
#define SYNC_IBUF_HEADER 914
#define SYNC_IBUF_PESS_INSERT_MUTEX 912
@@ -679,14 +686,16 @@ or row lock! */
#define SYNC_EXTERN_STORAGE 500
#define SYNC_FSP 400
#define SYNC_FSP_PAGE 395
-/*------------------------------------- Insert buffer headers */
+/*------------------------------------- Change buffer headers */
#define SYNC_IBUF_MUTEX 370 /* ibuf_mutex */
-/*------------------------------------- Insert buffer tree */
+/*------------------------------------- Change buffer tree */
#define SYNC_IBUF_INDEX_TREE 360
#define SYNC_IBUF_TREE_NODE_NEW 359
#define SYNC_IBUF_TREE_NODE 358
#define SYNC_IBUF_BITMAP_MUTEX 351
#define SYNC_IBUF_BITMAP 350
+/*------------------------------------- Change log for online create index */
+#define SYNC_INDEX_ONLINE_LOG 340
/*------------------------------------- MySQL query cache mutex */
/*------------------------------------- MySQL binlog mutex */
/*-------------------------------*/
@@ -733,7 +742,7 @@ Do not use its fields directly! The structure used in the spin lock
implementation of a mutual exclusion semaphore. */
/** InnoDB mutex */
-struct mutex_struct {
+struct ib_mutex_t {
os_event_t event; /*!< Used by sync0arr.cc for the wait queue */
volatile lock_word_t lock_word; /*!< lock_word is the target
of the atomic test-and-set instruction when
@@ -748,7 +757,7 @@ struct mutex_struct {
may be) threads waiting in the global wait
array for this mutex to be released.
Otherwise, this is 0. */
- UT_LIST_NODE_T(mutex_t) list; /*!< All allocated mutexes are put into
+ UT_LIST_NODE_T(ib_mutex_t) list; /*!< All allocated mutexes are put into
a list. Pointers to the next and prev. */
#ifdef UNIV_SYNC_DEBUG
const char* file_name; /*!< File where the mutex was locked */
@@ -757,23 +766,17 @@ struct mutex_struct {
#endif /* UNIV_SYNC_DEBUG */
const char* cfile_name;/*!< File name where mutex created */
ulint cline; /*!< Line where created */
+ ulong count_os_wait; /*!< count of os_wait */
#ifdef UNIV_DEBUG
+
+/** Value of mutex_t::magic_n */
+# define MUTEX_MAGIC_N 979585UL
+
os_thread_id_t thread_id; /*!< The thread id of the thread
which locked the mutex. */
ulint magic_n; /*!< MUTEX_MAGIC_N */
-/** Value of mutex_struct::magic_n */
-# define MUTEX_MAGIC_N (ulint)979585
-#endif /* UNIV_DEBUG */
- ulong count_os_wait; /*!< count of os_wait */
-#ifdef UNIV_DEBUG
- ulong count_using; /*!< count of times mutex used */
- ulong count_spin_loop; /*!< count of spin loops */
- ulong count_spin_rounds;/*!< count of spin rounds */
- ulong count_os_yield; /*!< count of os_wait */
- ulonglong lspent_time; /*!< mutex os_wait timer msec */
- ulonglong lmax_spent_time;/*!< mutex os_wait timer msec */
const char* cmutex_name; /*!< mutex name */
- ulint mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */
+ ulint ib_mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */
#endif /* UNIV_DEBUG */
#ifdef UNIV_PFS_MUTEX
struct PSI_mutex* pfs_psi; /*!< The performance schema
@@ -799,12 +802,12 @@ extern ibool sync_order_checks_on;
extern ibool sync_initialized;
/** Global list of database mutexes (not OS mutexes) created. */
-typedef UT_LIST_BASE_NODE_T(mutex_t) ut_list_base_node_t;
+typedef UT_LIST_BASE_NODE_T(ib_mutex_t) ut_list_base_node_t;
/** Global list of database mutexes (not OS mutexes) created. */
extern ut_list_base_node_t mutex_list;
/** Mutex protecting the mutex_list variable */
-extern mutex_t mutex_list_mutex;
+extern ib_mutex_t mutex_list_mutex;
#ifndef HAVE_ATOMIC_BUILTINS
/**********************************************************//**
@@ -813,7 +816,7 @@ UNIV_INLINE
void
os_atomic_dec_ulint_func(
/*=====================*/
- mutex_t* mutex, /*!< in: mutex guarding the
+ ib_mutex_t* mutex, /*!< in: mutex guarding the
decrement */
volatile ulint* var, /*!< in/out: variable to
decrement */
@@ -824,7 +827,7 @@ UNIV_INLINE
void
os_atomic_inc_ulint_func(
/*=====================*/
- mutex_t* mutex, /*!< in: mutex guarding the
+ ib_mutex_t* mutex, /*!< in: mutex guarding the
increment */
volatile ulint* var, /*!< in/out: variable to
increment */
diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
index 746e73ebee7..ad77ad6d5a4 100644
--- a/storage/innobase/include/sync0sync.ic
+++ b/storage/innobase/include/sync0sync.ic
@@ -36,7 +36,7 @@ UNIV_INTERN
void
mutex_set_waiters(
/*==============*/
- mutex_t* mutex, /*!< in: mutex */
+ ib_mutex_t* mutex, /*!< in: mutex */
ulint n); /*!< in: value to set */
/******************************************************************//**
Reserves a mutex for the current thread. If the mutex is reserved, the
@@ -46,7 +46,7 @@ UNIV_INTERN
void
mutex_spin_wait(
/*============*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where mutex
requested */
ulint line); /*!< in: line where requested */
@@ -57,7 +57,7 @@ UNIV_INTERN
void
mutex_set_debug_info(
/*=================*/
- mutex_t* mutex, /*!< in: mutex */
+ ib_mutex_t* mutex, /*!< in: mutex */
const char* file_name, /*!< in: file where requested */
ulint line); /*!< in: line where requested */
#endif /* UNIV_SYNC_DEBUG */
@@ -67,7 +67,7 @@ UNIV_INTERN
void
mutex_signal_object(
/*================*/
- mutex_t* mutex); /*!< in: mutex */
+ ib_mutex_t* mutex); /*!< in: mutex */
/******************************************************************//**
Performs an atomic test-and-set instruction to the lock_word field of a
@@ -75,9 +75,9 @@ mutex.
@return the previous value of lock_word: 0 or 1 */
UNIV_INLINE
byte
-mutex_test_and_set(
+ib_mutex_test_and_set(
/*===============*/
- mutex_t* mutex) /*!< in: mutex */
+ ib_mutex_t* mutex) /*!< in: mutex */
{
#if defined(HAVE_ATOMIC_BUILTINS)
return(os_atomic_test_and_set_byte(&mutex->lock_word, 1));
@@ -105,7 +105,7 @@ UNIV_INLINE
void
mutex_reset_lock_word(
/*==================*/
- mutex_t* mutex) /*!< in: mutex */
+ ib_mutex_t* mutex) /*!< in: mutex */
{
#if defined(HAVE_ATOMIC_BUILTINS)
/* In theory __sync_lock_release should be used to release the lock.
@@ -125,7 +125,7 @@ UNIV_INLINE
lock_word_t
mutex_get_lock_word(
/*================*/
- const mutex_t* mutex) /*!< in: mutex */
+ const ib_mutex_t* mutex) /*!< in: mutex */
{
ut_ad(mutex);
@@ -139,7 +139,7 @@ UNIV_INLINE
ulint
mutex_get_waiters(
/*==============*/
- const mutex_t* mutex) /*!< in: mutex */
+ const ib_mutex_t* mutex) /*!< in: mutex */
{
const volatile ulint* ptr; /*!< declared volatile to ensure that
the value is read from memory */
@@ -158,7 +158,7 @@ UNIV_INLINE
void
mutex_exit_func(
/*============*/
- mutex_t* mutex) /*!< in: pointer to mutex */
+ ib_mutex_t* mutex) /*!< in: pointer to mutex */
{
ut_ad(mutex_own(mutex));
@@ -199,7 +199,7 @@ UNIV_INLINE
void
mutex_enter_func(
/*=============*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line) /*!< in: line where locked */
{
@@ -209,9 +209,7 @@ mutex_enter_func(
/* Note that we do not peek at the value of lock_word before trying
the atomic test_and_set; we could peek, and possibly save time. */
- ut_d(mutex->count_using++);
-
- if (!mutex_test_and_set(mutex)) {
+ if (!ib_mutex_test_and_set(mutex)) {
ut_d(mutex->thread_id = os_thread_get_curr_id());
#ifdef UNIV_SYNC_DEBUG
mutex_set_debug_info(mutex, file_name, line);
@@ -232,28 +230,28 @@ UNIV_INLINE
void
pfs_mutex_enter_func(
/*=================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line) /*!< in: line where locked */
{
- if (mutex->pfs_psi != NULL)
- {
+ if (mutex->pfs_psi != NULL) {
PSI_mutex_locker* locker;
PSI_mutex_locker_state state;
- locker = PSI_CALL(start_mutex_wait)(&state, mutex->pfs_psi,
+ locker = PSI_MUTEX_CALL(start_mutex_wait)(
+ &state, mutex->pfs_psi,
PSI_MUTEX_LOCK, file_name, line);
mutex_enter_func(mutex, file_name, line);
- if (locker != NULL)
- PSI_CALL(end_mutex_wait)(locker, 0);
- }
- else
- {
+ if (locker != NULL) {
+ PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
+ }
+ } else {
mutex_enter_func(mutex, file_name, line);
}
}
+
/********************************************************************//**
NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
this function!
@@ -264,33 +262,33 @@ UNIV_INLINE
ulint
pfs_mutex_enter_nowait_func(
/*========================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where mutex
requested */
ulint line) /*!< in: line where requested */
{
- ulint ret;
+ ulint ret;
- if (mutex->pfs_psi != NULL)
- {
+ if (mutex->pfs_psi != NULL) {
PSI_mutex_locker* locker;
PSI_mutex_locker_state state;
- locker = PSI_CALL(start_mutex_wait)(&state, mutex->pfs_psi,
+ locker = PSI_MUTEX_CALL(start_mutex_wait)(
+ &state, mutex->pfs_psi,
PSI_MUTEX_TRYLOCK, file_name, line);
ret = mutex_enter_nowait_func(mutex, file_name, line);
- if (locker != NULL)
- PSI_CALL(end_mutex_wait)(locker, (int) ret);
- }
- else
- {
+ if (locker != NULL) {
+ PSI_MUTEX_CALL(end_mutex_wait)(locker, (int) ret);
+ }
+ } else {
ret = mutex_enter_nowait_func(mutex, file_name, line);
}
return(ret);
}
+
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_exit(), not directly
this function!
@@ -300,10 +298,11 @@ UNIV_INLINE
void
pfs_mutex_exit_func(
/*================*/
- mutex_t* mutex) /*!< in: pointer to mutex */
+ ib_mutex_t* mutex) /*!< in: pointer to mutex */
{
- if (mutex->pfs_psi != NULL)
- PSI_CALL(unlock_mutex)(mutex->pfs_psi);
+ if (mutex->pfs_psi != NULL) {
+ PSI_MUTEX_CALL(unlock_mutex)(mutex->pfs_psi);
+ }
mutex_exit_func(mutex);
}
@@ -319,7 +318,7 @@ void
pfs_mutex_create_func(
/*==================*/
mysql_pfs_key_t key, /*!< in: Performance Schema key */
- mutex_t* mutex, /*!< in: pointer to memory */
+ ib_mutex_t* mutex, /*!< in: pointer to memory */
# ifdef UNIV_DEBUG
const char* cmutex_name, /*!< in: mutex name */
# ifdef UNIV_SYNC_DEBUG
@@ -329,7 +328,7 @@ pfs_mutex_create_func(
const char* cfile_name, /*!< in: file name where created */
ulint cline) /*!< in: file line where created */
{
- mutex->pfs_psi = PSI_CALL(init_mutex)(key, mutex);
+ mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex);
mutex_create_func(mutex,
# ifdef UNIV_DEBUG
@@ -341,6 +340,7 @@ pfs_mutex_create_func(
cfile_name,
cline);
}
+
/******************************************************************//**
NOTE! Please use the corresponding macro mutex_free(), not directly
this function!
@@ -350,11 +350,10 @@ UNIV_INLINE
void
pfs_mutex_free_func(
/*================*/
- mutex_t* mutex) /*!< in: mutex */
+ ib_mutex_t* mutex) /*!< in: mutex */
{
- if (mutex->pfs_psi != NULL)
- {
- PSI_CALL(destroy_mutex)(mutex->pfs_psi);
+ if (mutex->pfs_psi != NULL) {
+ PSI_MUTEX_CALL(destroy_mutex)(mutex->pfs_psi);
mutex->pfs_psi = NULL;
}
@@ -370,7 +369,7 @@ UNIV_INLINE
void
os_atomic_dec_ulint_func(
/*=====================*/
- mutex_t* mutex, /*!< in: mutex guarding the dec */
+ ib_mutex_t* mutex, /*!< in: mutex guarding the dec */
volatile ulint* var, /*!< in/out: variable to decrement */
ulint delta) /*!< in: delta to decrement */
{
@@ -391,7 +390,7 @@ UNIV_INLINE
void
os_atomic_inc_ulint_func(
/*=====================*/
- mutex_t* mutex, /*!< in: mutex guarding the increment */
+ ib_mutex_t* mutex, /*!< in: mutex guarding the increment */
volatile ulint* var, /*!< in/out: variable to increment */
ulint delta) /*!< in: delta to increment */
{
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index 679cf6a9074..0d143004a7a 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -26,9 +26,6 @@ Created 9/5/1995 Heikki Tuuri
#ifndef sync0types_h
#define sync0types_h
-/** Rename mutex_t to avoid name space collision on some systems */
-#define mutex_t ib_mutex_t
-/** InnoDB mutex */
-typedef struct mutex_struct mutex_t;
+struct ib_mutex_t;
#endif
diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h
index c286fc4d9ae..662971a7841 100644
--- a/storage/innobase/include/trx0i_s.h
+++ b/storage/innobase/include/trx0i_s.h
@@ -79,25 +79,21 @@ do { \
} while (0)
/** A row of INFORMATION_SCHEMA.innodb_locks */
-typedef struct i_s_locks_row_struct i_s_locks_row_t;
-/** A row of INFORMATION_SCHEMA.innodb_trx */
-typedef struct i_s_trx_row_struct i_s_trx_row_t;
-/** A row of INFORMATION_SCHEMA.innodb_lock_waits */
-typedef struct i_s_lock_waits_row_struct i_s_lock_waits_row_t;
+struct i_s_locks_row_t;
/** Objects of trx_i_s_cache_t::locks_hash */
-typedef struct i_s_hash_chain_struct i_s_hash_chain_t;
+struct i_s_hash_chain_t;
/** Objects of this type are added to the hash table
trx_i_s_cache_t::locks_hash */
-struct i_s_hash_chain_struct {
+struct i_s_hash_chain_t {
i_s_locks_row_t* value; /*!< row of
INFORMATION_SCHEMA.innodb_locks*/
i_s_hash_chain_t* next; /*!< next item in the hash chain */
};
/** This structure represents INFORMATION_SCHEMA.innodb_locks row */
-struct i_s_locks_row_struct {
+struct i_s_locks_row_t {
trx_id_t lock_trx_id; /*!< transaction identifier */
const char* lock_mode; /*!< lock mode from
lock_get_mode_str() */
@@ -128,16 +124,16 @@ struct i_s_locks_row_struct {
};
/** This structure represents INFORMATION_SCHEMA.innodb_trx row */
-struct i_s_trx_row_struct {
+struct i_s_trx_row_t {
trx_id_t trx_id; /*!< transaction identifier */
const char* trx_state; /*!< transaction state from
trx_get_que_state_str() */
- ib_time_t trx_started; /*!< trx_struct::start_time */
+ ib_time_t trx_started; /*!< trx_t::start_time */
const i_s_locks_row_t* requested_lock_row;
/*!< pointer to a row
in innodb_locks if trx
is waiting, or NULL */
- ib_time_t trx_wait_started; /*!< trx_struct::wait_started */
+ ib_time_t trx_wait_started; /*!< trx_t::wait_started */
ullint trx_weight; /*!< TRX_WEIGHT() */
ulint trx_mysql_thread_id; /*!< thd_get_thread_id() */
const char* trx_query; /*!< MySQL statement being
@@ -145,36 +141,34 @@ struct i_s_trx_row_struct {
struct charset_info_st* trx_query_cs;
/*!< charset encode the MySQL
statement */
- const char* trx_operation_state; /*!< trx_struct::op_info */
+ const char* trx_operation_state; /*!< trx_t::op_info */
ulint trx_tables_in_use;/*!< n_mysql_tables_in_use in
- trx_struct */
+ trx_t */
ulint trx_tables_locked;
/*!< mysql_n_tables_locked in
- trx_struct */
+ trx_t */
ulint trx_lock_structs;/*!< list len of trx_locks in
- trx_struct */
+ trx_t */
ulint trx_lock_memory_bytes;
/*!< mem_heap_get_size(
trx->lock_heap) */
ulint trx_rows_locked;/*!< lock_number_of_rows_locked() */
- ullint trx_rows_modified;/*!< trx_struct::undo_no */
+ ullint trx_rows_modified;/*!< trx_t::undo_no */
ulint trx_concurrency_tickets;
/*!< n_tickets_to_enter_innodb in
- trx_struct */
+ trx_t */
const char* trx_isolation_level;
- /*!< isolation_level in trx_struct*/
+ /*!< isolation_level in trx_t */
ibool trx_unique_checks;
- /*!< check_unique_secondary in
- trx_struct*/
+ /*!< check_unique_secondary in trx_t*/
ibool trx_foreign_key_checks;
- /*!< check_foreigns in trx_struct */
+ /*!< check_foreigns in trx_t */
const char* trx_foreign_key_error;
- /*!< detailed_error in trx_struct */
+ /*!< detailed_error in trx_t */
ibool trx_has_search_latch;
- /*!< has_search_latch in trx_struct */
+ /*!< has_search_latch in trx_t */
ulint trx_search_latch_timeout;
- /*!< search_latch_timeout in
- trx_struct */
+ /*!< search_latch_timeout in trx_t */
ulint trx_is_read_only;
/*!< trx_t::read_only */
ulint trx_is_autocommit_non_locking;
@@ -183,13 +177,13 @@ struct i_s_trx_row_struct {
};
/** This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */
-struct i_s_lock_waits_row_struct {
+struct i_s_lock_waits_row_t {
const i_s_locks_row_t* requested_lock_row; /*!< requested lock */
const i_s_locks_row_t* blocking_lock_row; /*!< blocking lock */
};
/** Cache of INFORMATION_SCHEMA table data */
-typedef struct trx_i_s_cache_struct trx_i_s_cache_t;
+struct trx_i_s_cache_t;
/** Auxiliary enum used by functions that need to select one of the
INFORMATION_SCHEMA tables */
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index 0199083467c..1e13c883800 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -108,7 +108,8 @@ enum purge_state_t {
PURGE_STATE_INIT, /*!< Purge instance created */
PURGE_STATE_RUN, /*!< Purge should be running */
PURGE_STATE_STOP, /*!< Purge should be stopped */
- PURGE_STATE_EXIT /*!< Purge has been shutdown */
+ PURGE_STATE_EXIT, /*!< Purge has been shutdown */
+ PURGE_STATE_DISABLED /*!< Purge was never started */
};
/*******************************************************************//**
@@ -121,16 +122,16 @@ trx_purge_state(void);
/** This is the purge pointer/iterator. We need both the undo no and the
transaction no up to which purge has parsed and applied the records. */
-typedef struct purge_iter_struct {
+struct purge_iter_t {
trx_id_t trx_no; /*!< Purge has advanced past all
transactions whose number is less
than this */
undo_no_t undo_no; /*!< Purge has advanced past all records
whose undo number is less than this */
-} purge_iter_t;
+};
/** The control structure used in the purge operation */
-struct trx_purge_struct{
+struct trx_purge_t{
sess_t* sess; /*!< System session running the purge
query */
trx_t* trx; /*!< System transaction running the
@@ -146,7 +147,8 @@ struct trx_purge_struct{
protects state and running */
os_event_t event; /*!< State signal event */
ulint n_stop; /*!< Counter to track number stops */
- bool running; /*!< true, if purge is active */
+ volatile bool running; /*!< true, if purge is active,
+ we check this without the latch too */
volatile purge_state_t state; /*!< Purge coordinator thread states,
we check this in several places
without holding the latch. */
@@ -171,6 +173,10 @@ struct trx_purge_struct{
purge_iter_t limit; /* The 'purge pointer' which advances
during a purge, and which is used in
history list truncation */
+#ifdef UNIV_DEBUG
+ purge_iter_t done; /* Indicate 'purge pointer' which have
+ purged already accurately. */
+#endif /* UNIV_DEBUG */
/*-----------------------------*/
ibool next_stored; /*!< TRUE if the info of the next record
to purge is stored below: if yes, then
@@ -196,17 +202,15 @@ struct trx_purge_struct{
ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on
rseg_queue_t::trx_no. It is protected
by the bh_mutex */
- mutex_t bh_mutex; /*!< Mutex protecting ib_bh */
+ ib_mutex_t bh_mutex; /*!< Mutex protecting ib_bh */
};
/** Info required to purge a record */
-struct trx_purge_rec_struct {
+struct trx_purge_rec_t {
trx_undo_rec_t* undo_rec; /*!< Record to purge */
roll_ptr_t roll_ptr; /*!< File pointr to UNDO record */
};
-typedef struct trx_purge_rec_struct trx_purge_rec_t;
-
#ifndef UNIV_NONINL
#include "trx0purge.ic"
#endif
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
index c9fae45dad4..cd1ecc096fd 100644
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -105,10 +105,11 @@ trx_undo_rec_get_pars(
TRX_UNDO_INSERT_REC, ... */
ulint* cmpl_info, /*!< out: compiler info, relevant only
for update type records */
- ibool* updated_extern, /*!< out: TRUE if we updated an
+ bool* updated_extern, /*!< out: true if we updated an
externally stored fild */
undo_no_t* undo_no, /*!< out: undo log record number */
- table_id_t* table_id); /*!< out: table id */
+ table_id_t* table_id) /*!< out: table id */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Builds a row reference from an undo log record.
@return pointer to remaining part of undo record */
@@ -178,8 +179,9 @@ trx_undo_update_rec_get_update(
needed is allocated */
upd_t** upd); /*!< out, own: update vector */
/*******************************************************************//**
-Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table.
+Builds a partial row from an update undo log record, for purge.
+It contains the columns which occur as ordering in any index of the table.
+Any missing columns are indicated by col->mtype == DATA_MISSING.
@return pointer to remaining part of undo record */
UNIV_INTERN
byte*
@@ -197,8 +199,9 @@ trx_undo_rec_get_partial_row(
ibool ignore_prefix, /*!< in: flag to indicate if we
expect blob prefixes in undo. Used
only in the assertion. */
- mem_heap_t* heap); /*!< in: memory heap from which the memory
+ mem_heap_t* heap) /*!< in: memory heap from which the memory
needed is allocated */
+ __attribute__((nonnull, warn_unused_result));
/***********************************************************************//**
Writes information to an undo log about an insert, update, or a delete marking
of a clustered index record. This information is used in a rollback of the
@@ -206,7 +209,7 @@ transaction and in consistent reads that must look to the history of this
transaction.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
trx_undo_report_row_operation(
/*==========================*/
ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
@@ -225,10 +228,12 @@ trx_undo_report_row_operation(
const rec_t* rec, /*!< in: case of an update or delete
marking, the record in the clustered
index, otherwise NULL */
- roll_ptr_t* roll_ptr); /*!< out: rollback pointer to the
+ const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the
inserted undo log record,
0 if BTR_NO_UNDO_LOG
flag was specified */
+ __attribute__((nonnull(3,4,10), warn_unused_result));
/******************************************************************//**
Copies an undo record to heap. This function can be called if we know that
the undo log record exists.
@@ -238,16 +243,17 @@ trx_undo_rec_t*
trx_undo_get_undo_rec_low(
/*======================*/
roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- mem_heap_t* heap); /*!< in: memory heap where copied */
+ mem_heap_t* heap) /*!< in: memory heap where copied */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Build a previous version of a clustered index record. The caller must
-hold a latch on the index page of the clustered index record, to
-guarantee that the stack of versions is locked all the way down to the
-purge_sys->view.
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
-earlier than purge_view, which means that it may have been removed */
+hold a latch on the index page of the clustered index record.
+@retval true if previous version was built, or if it was an insert
+or the table has been rebuilt
+@retval false if the previous version is earlier than purge_view,
+which means that it may have been removed */
UNIV_INTERN
-ulint
+bool
trx_undo_prev_version_build(
/*========================*/
const rec_t* index_rec,/*!< in: clustered index record in the
@@ -256,12 +262,13 @@ trx_undo_prev_version_build(
index_rec page and purge_view */
const rec_t* rec, /*!< in: version of a clustered index record */
dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
- rec_t** old_vers);/*!< out, own: previous version, or NULL if
+ rec_t** old_vers)/*!< out, own: previous version, or NULL if
rec is the first inserted version, or if
history data has been deleted */
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses a redo log record of adding an undo log record.
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
index 847c26f03a8..08704f6b821 100644
--- a/storage/innobase/include/trx0rec.ic
+++ b/storage/innobase/include/trx0rec.ic
@@ -90,7 +90,7 @@ trx_undo_rec_get_offset(
/*====================*/
undo_no_t undo_no) /*!< in: undo no read from node */
{
- return (3 + mach_ull_get_much_compressed_size(undo_no));
+ return(3 + mach_ull_get_much_compressed_size(undo_no));
}
/***********************************************************************//**
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
index 3b724e03830..9d020a10725 100644
--- a/storage/innobase/include/trx0roll.h
+++ b/storage/innobase/include/trx0roll.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -146,29 +146,32 @@ trx_rollback_step(
Rollback a transaction used in MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
trx_rollback_for_mysql(
/*===================*/
- trx_t* trx); /*!< in/out: transaction */
+ trx_t* trx) /*!< in/out: transaction */
+ __attribute__((nonnull));
/*******************************************************************//**
Rollback the latest SQL statement for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
trx_rollback_last_sql_stat_for_mysql(
/*=================================*/
- trx_t* trx); /*!< in/out: transaction */
+ trx_t* trx) /*!< in/out: transaction */
+ __attribute__((nonnull));
/*******************************************************************//**
Rollback a transaction to a given savepoint or do a complete rollback.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
trx_rollback_to_savepoint(
/*======================*/
trx_t* trx, /*!< in: transaction handle */
- trx_savept_t* savept);/*!< in: pointer to savepoint undo number, if
+ trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
partial rollback requested, or NULL for
complete rollback */
+ __attribute__((nonnull(1)));
/*******************************************************************//**
Rolls back a transaction back to a named savepoint. Modifications after the
savepoint are undone but InnoDB does NOT release the corresponding locks
@@ -179,17 +182,18 @@ were set after this savepoint are deleted.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_rollback_to_savepoint_for_mysql(
/*================================*/
trx_t* trx, /*!< in: transaction handle */
const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t* mysql_binlog_cache_pos);/*!< out: the MySQL binlog cache
+ ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
position corresponding to this
savepoint; MySQL needs this
information to remove the
binlog entries of the queries
executed after the savepoint */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Creates a named savepoint. If the transaction is not yet started, starts it.
If there is already a savepoint of the same name, this call erases that old
@@ -197,28 +201,28 @@ savepoint and replaces it with a new. Savepoints are deleted in a transaction
commit or rollback.
@return always DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_savepoint_for_mysql(
/*====================*/
trx_t* trx, /*!< in: transaction handle */
const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t binlog_cache_pos); /*!< in: MySQL binlog cache
+ ib_int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
position corresponding to this
connection at the time of the
savepoint */
-
+ __attribute__((nonnull));
/*******************************************************************//**
Releases a named savepoint. Savepoints which
were set after this savepoint are deleted.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_release_savepoint_for_mysql(
/*============================*/
trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name); /*!< in: savepoint name */
-
+ const char* savepoint_name) /*!< in: savepoint name */
+ __attribute__((nonnull, warn_unused_result));
/*******************************************************************//**
Frees savepoint structs starting from savep. */
UNIV_INTERN
@@ -230,8 +234,8 @@ trx_roll_savepoints_free(
if this is NULL, free all savepoints
of trx */
-/** A cell of trx_undo_arr_struct; used during a rollback and a purge */
-struct trx_undo_inf_struct{
+/** A cell of trx_undo_arr_t; used during a rollback and a purge */
+struct trx_undo_inf_t{
ibool in_use; /*!< true if cell is being used */
trx_id_t trx_no; /*!< transaction number: not defined during
a rollback */
@@ -241,7 +245,7 @@ struct trx_undo_inf_struct{
/** During a rollback and a purge, undo numbers of undo records currently being
processed are stored in this array */
-struct trx_undo_arr_struct{
+struct trx_undo_arr_t{
ulint n_cells; /*!< number of cells in the array */
ulint n_used; /*!< number of cells in use */
trx_undo_inf_t* infos; /*!< the array of undo infos */
@@ -258,7 +262,7 @@ enum roll_node_state {
};
/** Rollback command node in a query graph */
-struct roll_node_struct{
+struct roll_node_t{
que_common_t common; /*!< node type: QUE_NODE_ROLLBACK */
enum roll_node_state state; /*!< node execution state */
ibool partial;/*!< TRUE if we want a partial
@@ -270,7 +274,7 @@ struct roll_node_struct{
};
/** A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
-struct trx_named_savept_struct{
+struct trx_named_savept_t{
char* name; /*!< savepoint name */
trx_savept_t savept; /*!< the undo number corresponding to
the savepoint */
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 66e5449cf57..185b05876b4 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -151,11 +151,11 @@ trx_rseg_get_n_undo_tablespaces(
#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)
/* The rollback segment memory object */
-struct trx_rseg_struct{
+struct trx_rseg_t{
/*--------------------------------------------------------*/
ulint id; /*!< rollback segment id == the index of
its slot in the trx system file copy */
- mutex_t mutex; /*!< mutex protecting the fields in this
+ ib_mutex_t mutex; /*!< mutex protecting the fields in this
struct except id, which is constant */
ulint space; /*!< space where the rollback segment is
header is placed */
@@ -192,13 +192,11 @@ struct trx_rseg_struct{
};
/** For prioritising the rollback segments for purge. */
-struct rseg_queue_struct {
+struct rseg_queue_t {
trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */
trx_rseg_t* rseg; /*!< Rollback segment */
};
-typedef struct rseg_queue_struct rseg_queue_t;
-
/* Undo log segment slot in a rollback segment header */
/*-------------------------------------------------------------*/
#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index b1aa3d2224c..70f214d1ac7 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -426,7 +426,7 @@ trx_sys_file_format_max_get(void);
Check for the max file format tag stored on disk.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
trx_sys_file_format_max_check(
/*==========================*/
ulint max_format_id); /*!< in: the max format id to check */
@@ -600,18 +600,28 @@ identifier is added to this 64-bit constant. */
#ifndef UNIV_HOTBACKUP
/** The transaction system central memory data structure. */
-struct trx_sys_struct{
+struct trx_sys_t{
- mutex_t mutex; /*!< mutex protecting most fields in
+ ib_mutex_t mutex; /*!< mutex protecting most fields in
this structure except when noted
otherwise */
- ulint n_mysql_trx; /*!< Number of transactions currently
- allocated for MySQL */
ulint n_prepared_trx; /*!< Number of transactions currently
in the XA PREPARED state */
+ ulint n_prepared_recovered_trx; /*!< Number of transactions
+ currently in XA PREPARED state that are
+ also recovered. Such transactions cannot
+ be added during runtime. They can only
+ occur after recovery if mysqld crashed
+ while there were XA PREPARED
+ transactions. We disable query cache
+ if such transactions exist. */
trx_id_t max_trx_id; /*!< The smallest number not yet
assigned as a transaction id or
transaction number */
+#ifdef UNIV_DEBUG
+ trx_id_t rw_max_trx_id; /*!< Max trx id of read-write transactions
+ which exist or existed */
+#endif
trx_list_t rw_trx_list; /*!< List of active and committed in
memory read-write transactions, sorted
on trx id, biggest first. Recovered
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 3e6cfc7d0da..bb84c1806f2 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -121,20 +121,69 @@ UNIV_INTERN
void
trx_lists_init_at_db_start(void);
/*============================*/
+
+#ifdef UNIV_DEBUG
+#define trx_start_if_not_started_xa(t) \
+ { \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_if_not_started_xa_low((t)); \
+ }
+#else
+#define trx_start_if_not_started_xa(t) \
+ trx_start_if_not_started_xa_low((t))
+#endif /* UNIV_DEBUG */
+
/*************************************************************//**
Starts the transaction if it is not yet started. */
UNIV_INTERN
void
-trx_start_if_not_started_xa(
-/*========================*/
+trx_start_if_not_started_xa_low(
+/*============================*/
trx_t* trx); /*!< in: transaction */
/*************************************************************//**
Starts the transaction if it is not yet started. */
UNIV_INTERN
void
-trx_start_if_not_started(
-/*=====================*/
+trx_start_if_not_started_low(
+/*=========================*/
trx_t* trx); /*!< in: transaction */
+
+#ifdef UNIV_DEBUG
+#define trx_start_if_not_started(t) \
+ { \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_if_not_started_low((t)); \
+ }
+#else
+#define trx_start_if_not_started(t) \
+ trx_start_if_not_started_low((t))
+#endif /* UNIV_DEBUG */
+
+/*************************************************************//**
+Starts the transaction for a DDL operation. */
+UNIV_INTERN
+void
+trx_start_for_ddl_low(
+/*==================*/
+ trx_t* trx, /*!< in/out: transaction */
+ trx_dict_op_t op) /*!< in: dictionary operation type */
+ __attribute__((nonnull));
+
+#ifdef UNIV_DEBUG
+#define trx_start_for_ddl(t, o) \
+ { \
+ ut_ad((t)->start_file == 0); \
+ (t)->start_line = __LINE__; \
+ (t)->start_file = __FILE__; \
+ trx_start_for_ddl_low((t), (o)); \
+ }
+#else
+#define trx_start_for_ddl(t, o) \
+ trx_start_for_ddl_low((t), (o))
+#endif /* UNIV_DEBUG */
+
/****************************************************************//**
Commits a transaction. */
UNIV_INTERN
@@ -155,7 +204,7 @@ trx_cleanup_at_db_startup(
Does the transaction commit for MySQL.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
trx_commit_for_mysql(
/*=================*/
trx_t* trx); /*!< in/out: transaction */
@@ -189,13 +238,13 @@ trx_get_trx_by_xid(
const XID* xid); /*!< in: X/Open XA transaction identifier */
/**********************************************************************//**
If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE.
-@return 0 or error number */
+with trx->flush_log_later == TRUE. */
UNIV_INTERN
-ulint
+void
trx_commit_complete_for_mysql(
/*==========================*/
- trx_t* trx); /*!< in: trx handle */
+ trx_t* trx) /*!< in/out: transaction */
+ __attribute__((nonnull));
/**********************************************************************//**
Marks the latest SQL statement ended. */
UNIV_INTERN
@@ -251,9 +300,9 @@ trx_print_low(
ulint max_query_len,
/*!< in: max query length to print,
or 0 to use the default max length */
- ulint n_lock_rec,
+ ulint n_rec_locks,
/*!< in: lock_number_of_rows_locked(&trx->lock) */
- ulint n_lock_struct,
+ ulint n_trx_locks,
/*!< in: length of trx->lock.trx_locks */
ulint heap_size)
/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
@@ -286,26 +335,11 @@ trx_print(
or 0 to use the default max length */
__attribute__((nonnull));
-/** Type of data dictionary operation */
-typedef enum trx_dict_op {
- /** The transaction is not modifying the data dictionary. */
- TRX_DICT_OP_NONE = 0,
- /** The transaction is creating a table or an index, or
- dropping a table. The table must be dropped in crash
- recovery. This and TRX_DICT_OP_NONE are the only possible
- operation modes in crash recovery. */
- TRX_DICT_OP_TABLE = 1,
- /** The transaction is creating or dropping an index in an
- existing table. In crash recovery, the data dictionary
- must be locked, but the table must not be dropped. */
- TRX_DICT_OP_INDEX = 2
-} trx_dict_op_t;
-
/**********************************************************************//**
Determine if a transaction is a dictionary operation.
@return dictionary operation mode */
UNIV_INLINE
-enum trx_dict_op
+enum trx_dict_op_t
trx_get_dict_operation(
/*===================*/
const trx_t* trx) /*!< in: transaction */
@@ -317,7 +351,7 @@ void
trx_set_dict_operation(
/*===================*/
trx_t* trx, /*!< in/out: transaction */
- enum trx_dict_op op); /*!< in: operation, not
+ enum trx_dict_op_t op); /*!< in: operation, not
TRX_DICT_OP_NONE */
#ifndef UNIV_HOTBACKUP
@@ -359,7 +393,7 @@ UNIV_INTERN
ibool
trx_is_interrupted(
/*===============*/
- trx_t* trx); /*!< in: transaction */
+ const trx_t* trx); /*!< in: transaction */
/**********************************************************************//**
Determines if the currently running transaction is in strict mode.
@return TRUE if strict */
@@ -405,6 +439,15 @@ trx_get_que_state_str(
/*==================*/
const trx_t* trx); /*!< in: transaction */
+/****************************************************************//**
+Assign a read-only transaction a rollback-segment, if it is attempting
+to write to a TEMPORARY table. */
+UNIV_INTERN
+void
+trx_assign_rseg(
+/*============*/
+ trx_t* trx); /*!< A read-only transaction that
+ needs to be assigned a RBS. */
/*******************************************************************//**
Transactions that aren't started by the MySQL server don't set
the trx_t::mysql_thd field. For such transactions we set the lock
@@ -450,7 +493,6 @@ non-locking select */
ut_ad(!trx_is_autocommit_non_locking((t))); \
switch ((t)->state) { \
case TRX_STATE_PREPARED: \
- ut_a(!(t)->read_only); \
/* fall through */ \
case TRX_STATE_ACTIVE: \
case TRX_STATE_COMMITTED_IN_MEMORY: \
@@ -463,7 +505,7 @@ non-locking select */
#ifdef UNIV_DEBUG
/*******************************************************************//**
-Assert that an autocommit non-locking slect cannot be in the
+Assert that an autocommit non-locking select cannot be in the
ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
The tranasction must be in the mysql_trx_list. */
# define assert_trx_nonlocking_or_in_list(t) \
@@ -511,7 +553,7 @@ code and no mutex is required when the query thread is no longer waiting. */
/** The locks and state of an active transaction. Protected by
lock_sys->mutex, trx->mutex or both. */
-struct trx_lock_struct {
+struct trx_lock_t {
ulint n_active_thrs; /*!< number of active query threads */
trx_que_t que_state; /*!< valid when trx->state
@@ -620,10 +662,10 @@ lock_rec_convert_impl_to_expl()) will access transactions associated
to other connections. The locks of transactions are protected by
lock_sys->mutex and sometimes by trx->mutex. */
-struct trx_struct{
+struct trx_t{
ulint magic_n;
- mutex_t mutex; /*!< Mutex protecting the fields
+ ib_mutex_t mutex; /*!< Mutex protecting the fields
state and lock
(except some fields of lock, which
are protected by lock_sys->mutex) */
@@ -657,8 +699,7 @@ struct trx_struct{
Latching and various transaction lists membership rules:
- XA (2PC) transactions are always treated as read-write and
- non-autocommit.
+ XA (2PC) transactions are always treated as non-autocommit.
Transitions to ACTIVE or NOT_STARTED occur when
!in_rw_trx_list and !in_ro_trx_list (no trx_sys->mutex needed).
@@ -793,9 +834,9 @@ struct trx_struct{
transaction branch */
lsn_t commit_lsn; /*!< lsn at the time of the commit */
table_id_t table_id; /*!< Table to drop iff dict_operation
- is TRUE, or 0. */
+ == TRX_DICT_OP_TABLE, or 0. */
/*------------------------------*/
- void* mysql_thd; /*!< MySQL thread handle corresponding
+ THD* mysql_thd; /*!< MySQL thread handle corresponding
to this trx, or NULL */
const char* mysql_log_file_name;
/*!< if MySQL binlog is used, this field
@@ -838,7 +879,7 @@ struct trx_struct{
trx_sys->mysql_trx_list */
#endif /* UNIV_DEBUG */
/*------------------------------*/
- enum db_err error_state; /*!< 0 if no error, otherwise error
+ dberr_t error_state; /*!< 0 if no error, otherwise error
number; NOTE That ONLY the thread
doing the transaction is allowed to
set this field: this is NOT protected
@@ -873,7 +914,7 @@ struct trx_struct{
trx_savepoints; /*!< savepoints set with SAVEPOINT ...,
oldest first */
/*------------------------------*/
- mutex_t undo_mutex; /*!< mutex protecting the fields in this
+ ib_mutex_t undo_mutex; /*!< mutex protecting the fields in this
section (down to undo_no_arr), EXCEPT
last_sql_stat_start, which can be
accessed only when we know that there
@@ -929,12 +970,24 @@ struct trx_struct{
ulint will_lock; /*!< Will acquire some locks. Increment
each time we determine that a lock will
be acquired by the MySQL layer. */
+ bool ddl; /*!< true if it is a transaction that
+ is being started for a DDL operation */
/*------------------------------*/
- fts_trx_t* fts_trx; /* FTS information, or NULL if
+ fts_trx_t* fts_trx; /*!< FTS information, or NULL if
transaction hasn't modified tables
with FTS indexes (yet). */
doc_id_t fts_next_doc_id;/* The document id used for updates */
/*------------------------------*/
+ ulint flush_tables; /*!< if "covering" the FLUSH TABLES",
+ count of tables being flushed. */
+
+ /*------------------------------*/
+#ifdef UNIV_DEBUG
+ ulint start_line; /*!< Track where it was started from */
+ const char* start_file; /*!< Filename where it was started */
+#endif /* UNIV_DEBUG */
+
+ /*------------------------------*/
char detailed_error[256]; /*!< detailed error message for last
error, or empty. */
};
@@ -1003,7 +1056,7 @@ enum commit_node_state {
};
/** Commit command node in a query graph */
-struct commit_node_struct{
+struct commit_node_t{
que_common_t common; /*!< node type: QUE_NODE_COMMIT */
enum commit_node_state
state; /*!< node execution state */
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
index ceeb121ab70..69ee17ea98b 100644
--- a/storage/innobase/include/trx0trx.ic
+++ b/storage/innobase/include/trx0trx.ic
@@ -44,7 +44,7 @@ trx_state_eq(
#ifdef UNIV_DEBUG
switch (trx->state) {
case TRX_STATE_PREPARED:
- assert_trx_in_rw_list(trx);
+ ut_ad(!trx_is_autocommit_non_locking(trx));
return(trx->state == state);
case TRX_STATE_ACTIVE:
@@ -108,12 +108,12 @@ trx_get_que_state_str(
Determine if a transaction is a dictionary operation.
@return dictionary operation mode */
UNIV_INLINE
-enum trx_dict_op
+enum trx_dict_op_t
trx_get_dict_operation(
/*===================*/
const trx_t* trx) /*!< in: transaction */
{
- enum trx_dict_op op = (enum trx_dict_op) trx->dict_operation;
+ trx_dict_op_t op = static_cast<trx_dict_op_t>(trx->dict_operation);
#ifdef UNIV_DEBUG
switch (op) {
@@ -124,7 +124,7 @@ trx_get_dict_operation(
}
ut_error;
#endif /* UNIV_DEBUG */
- return((enum trx_dict_op) op);
+ return(op);
}
/**********************************************************************//**
Flag a transaction a dictionary operation. */
@@ -133,11 +133,11 @@ void
trx_set_dict_operation(
/*===================*/
trx_t* trx, /*!< in/out: transaction */
- enum trx_dict_op op) /*!< in: operation, not
+ enum trx_dict_op_t op) /*!< in: operation, not
TRX_DICT_OP_NONE */
{
#ifdef UNIV_DEBUG
- enum trx_dict_op old_op = trx_get_dict_operation(trx);
+ enum trx_dict_op_t old_op = trx_get_dict_operation(trx);
switch (op) {
case TRX_DICT_OP_NONE:
@@ -159,6 +159,7 @@ trx_set_dict_operation(
ok:
#endif /* UNIV_DEBUG */
+ trx->ddl = true;
trx->dict_operation = op;
}
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
index 650d5878e64..4f515cb5248 100644
--- a/storage/innobase/include/trx0types.h
+++ b/storage/innobase/include/trx0types.h
@@ -36,7 +36,7 @@ the terminating NUL character. */
#define TRX_ID_MAX_LEN 17
/** Transaction execution states when trx->state == TRX_STATE_ACTIVE */
-enum trx_que_enum {
+enum trx_que_t {
TRX_QUE_RUNNING, /*!< transaction is running */
TRX_QUE_LOCK_WAIT, /*!< transaction is waiting for
a lock */
@@ -45,43 +45,54 @@ enum trx_que_enum {
};
/** Transaction states (trx_t::state) */
-enum trx_state_enum {
+enum trx_state_t {
TRX_STATE_NOT_STARTED,
TRX_STATE_ACTIVE,
TRX_STATE_PREPARED, /* Support for 2PC/XA */
TRX_STATE_COMMITTED_IN_MEMORY
};
+/** Type of data dictionary operation */
+enum trx_dict_op_t {
+ /** The transaction is not modifying the data dictionary. */
+ TRX_DICT_OP_NONE = 0,
+ /** The transaction is creating a table or an index, or
+ dropping a table. The table must be dropped in crash
+ recovery. This and TRX_DICT_OP_NONE are the only possible
+ operation modes in crash recovery. */
+ TRX_DICT_OP_TABLE = 1,
+ /** The transaction is creating or dropping an index in an
+ existing table. In crash recovery, the data dictionary
+ must be locked, but the table must not be dropped. */
+ TRX_DICT_OP_INDEX = 2
+};
+
/** Memory objects */
/* @{ */
/** Transaction */
-typedef struct trx_struct trx_t;
+struct trx_t;
/** The locks and state of an active transaction */
-typedef struct trx_lock_struct trx_lock_t;
+struct trx_lock_t;
/** Transaction system */
-typedef struct trx_sys_struct trx_sys_t;
+struct trx_sys_t;
/** Signal */
-typedef struct trx_sig_struct trx_sig_t;
+struct trx_sig_t;
/** Rollback segment */
-typedef struct trx_rseg_struct trx_rseg_t;
+struct trx_rseg_t;
/** Transaction undo log */
-typedef struct trx_undo_struct trx_undo_t;
+struct trx_undo_t;
/** Array of undo numbers of undo records being rolled back or purged */
-typedef struct trx_undo_arr_struct trx_undo_arr_t;
+struct trx_undo_arr_t;
/** A cell of trx_undo_arr_t */
-typedef struct trx_undo_inf_struct trx_undo_inf_t;
+struct trx_undo_inf_t;
/** The control structure used in the purge operation */
-typedef struct trx_purge_struct trx_purge_t;
+struct trx_purge_t;
/** Rollback command node in a query graph */
-typedef struct roll_node_struct roll_node_t;
+struct roll_node_t;
/** Commit command node in a query graph */
-typedef struct commit_node_struct commit_node_t;
+struct commit_node_t;
/** SAVEPOINT command node in a query graph */
-typedef struct trx_named_savept_struct trx_named_savept_t;
-/** Transaction concurrency state */
-typedef enum trx_state_enum trx_state_t;
-/** Transaction query thread state */
-typedef enum trx_que_enum trx_que_t;
+struct trx_named_savept_t;
/* @} */
/** Rollback contexts */
@@ -109,9 +120,7 @@ typedef ib_id_t roll_ptr_t;
typedef ib_id_t undo_no_t;
/** Transaction savepoint */
-typedef struct trx_savept_struct trx_savept_t;
-/** Transaction savepoint */
-struct trx_savept_struct{
+struct trx_savept_t{
undo_no_t least_undo_no; /*!< least undo number to undo */
};
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index ed2ce66bbb6..4021d71c68a 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -65,6 +65,15 @@ ibool
trx_undo_roll_ptr_is_insert(
/*========================*/
roll_ptr_t roll_ptr); /*!< in: roll pointer */
+/***********************************************************************//**
+Returns true if the record is of the insert type.
+@return true if the record was freshly inserted (not updated). */
+UNIV_INLINE
+bool
+trx_undo_trx_id_is_insert(
+/*======================*/
+ const byte* trx_id) /*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
+ __attribute__((nonnull, pure, warn_unused_result));
#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Writes a roll ptr to an index page. In case that the size changes in
@@ -285,11 +294,12 @@ undo log reused.
are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
DB_OUT_OF_MEMORY */
UNIV_INTERN
-ulint
+dberr_t
trx_undo_assign_undo(
/*=================*/
trx_t* trx, /*!< in: transaction */
- ulint type); /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+ ulint type) /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+ __attribute__((nonnull, warn_unused_result));
/******************************************************************//**
Sets the state of the undo log segment at a transaction finish.
@return undo log segment header page, x-latched */
@@ -404,7 +414,7 @@ trx_undo_mem_free(
/** Transaction undo log memory object; this is protected by the undo_mutex
in the corresponding transaction object */
-struct trx_undo_struct{
+struct trx_undo_t{
/*-----------------------------*/
ulint id; /*!< undo log slot number within the
rollback segment */
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic
index 4b38e63297c..577759d6c3d 100644
--- a/storage/innobase/include/trx0undo.ic
+++ b/storage/innobase/include/trx0undo.ic
@@ -101,6 +101,21 @@ trx_undo_roll_ptr_is_insert(
ut_ad(roll_ptr < (1ULL << 56));
return((ibool) (roll_ptr >> 55));
}
+
+/***********************************************************************//**
+Returns true if the record is of the insert type.
+@return true if the record was freshly inserted (not updated). */
+UNIV_INLINE
+bool
+trx_undo_trx_id_is_insert(
+/*======================*/
+ const byte* trx_id) /*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
+{
+#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
+# error
+#endif
+ return(static_cast<bool>(trx_id[DATA_TRX_ID_LEN] >> 7));
+}
#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 60eb1fede91..fbb62e8de01 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -380,11 +380,16 @@ This number varies depending on UNIV_PAGE_SIZE. */
/** Maximum number of parallel threads in a parallelized operation */
#define UNIV_MAX_PARALLELISM 32
-/** The maximum length of a table name. This is the MySQL limit and is
-defined in mysql_com.h like NAME_CHAR_LEN*SYSTEM_CHARSET_MBMAXLEN, the
-number does not include a terminating '\0'. InnoDB probably can handle
-longer names internally */
-#define MAX_TABLE_NAME_LEN 192
+/** This is the "mbmaxlen" for my_charset_filename (defined in
+strings/ctype-utf8.c), which is used to encode File and Database names. */
+#define FILENAME_CHARSET_MAXNAMLEN 5
+
+/** The maximum length of an encode table name in bytes. The max
+table and database names are NAME_CHAR_LEN (64) characters. After the
+encoding, the max length would be NAME_CHAR_LEN (64) *
+FILENAME_CHARSET_MAXNAMLEN (5) = 320 bytes. The number does not include a
+terminating '\0'. InnoDB can handle longer names internally */
+#define MAX_TABLE_NAME_LEN 320
/** The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is
the MySQL's NAME_LEN, see check_and_convert_db_name(). */
@@ -398,6 +403,16 @@ database name and table name. In addition, 14 bytes is added for:
#define MAX_FULL_NAME_LEN \
(MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14)
+/** The maximum length in bytes that a database name can occupy when stored in
+UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
+mysql_com.h if you are to use this macro. */
+#define MAX_DB_UTF8_LEN (NAME_LEN + 1)
+
+/** The maximum length in bytes that a table name can occupy when stored in
+UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
+mysql_com.h if you are to use this macro. */
+#define MAX_TABLE_UTF8_LEN (NAME_LEN + sizeof(srv_mysql50_table_name_prefix))
+
/*
UNIVERSAL TYPE DEFINITIONS
==========================
@@ -417,6 +432,7 @@ macro ULINTPF. */
# define UINT32PF "%I32u"
# define INT64PF "%I64d"
# define UINT64PF "%I64u"
+# define UINT64PFx "%016I64u"
typedef __int64 ib_int64_t;
typedef unsigned __int64 ib_uint64_t;
typedef unsigned __int32 ib_uint32_t;
@@ -425,6 +441,7 @@ typedef unsigned __int32 ib_uint32_t;
# define UINT32PF "%"PRIu32
# define INT64PF "%"PRId64
# define UINT64PF "%"PRIu64
+# define UINT64PFx "%016"PRIx64
typedef int64_t ib_int64_t;
typedef uint64_t ib_uint64_t;
typedef uint32_t ib_uint32_t;
@@ -489,6 +506,8 @@ headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
#endif
+#define UNIV_NOTHROW
+
/** The following number as the length of a logical field means that the field
has the SQL NULL as its value. NOTE that because we assume that the length
of a field is a 32-bit integer when we store it, for example, to an undo log
@@ -588,15 +607,23 @@ typedef void* os_thread_ret_t;
# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
# define UNIV_MEM_DESC(addr, size) VALGRIND_CREATE_BLOCK(addr, size, #addr)
# define UNIV_MEM_UNDESC(b) VALGRIND_DISCARD(b)
-# define UNIV_MEM_ASSERT_RW(addr, size) do { \
+# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do { \
const void* _p = (const void*) (ulint) \
VALGRIND_CHECK_MEM_IS_DEFINED(addr, size); \
- if (UNIV_LIKELY_NULL(_p)) \
+ if (UNIV_LIKELY_NULL(_p)) { \
fprintf(stderr, "%s:%d: %p[%u] undefined at %ld\n", \
__FILE__, __LINE__, \
(const void*) (addr), (unsigned) (size), (long) \
(((const char*) _p) - ((const char*) (addr)))); \
- } while (0)
+ if (should_abort) { \
+ ut_error; \
+ } \
+ } \
+} while (0)
+# define UNIV_MEM_ASSERT_RW(addr, size) \
+ UNIV_MEM_ASSERT_RW_LOW(addr, size, false)
+# define UNIV_MEM_ASSERT_RW_ABORT(addr, size) \
+ UNIV_MEM_ASSERT_RW_LOW(addr, size, true)
# define UNIV_MEM_ASSERT_W(addr, size) do { \
const void* _p = (const void*) (ulint) \
VALGRIND_CHECK_MEM_IS_ADDRESSABLE(addr, size); \
@@ -613,7 +640,9 @@ typedef void* os_thread_ret_t;
# define UNIV_MEM_ALLOC(addr, size) do {} while(0)
# define UNIV_MEM_DESC(addr, size) do {} while(0)
# define UNIV_MEM_UNDESC(b) do {} while(0)
+# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do {} while(0)
# define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0)
+# define UNIV_MEM_ASSERT_RW_ABORT(addr, size) do {} while(0)
# define UNIV_MEM_ASSERT_W(addr, size) do {} while(0)
#endif
#define UNIV_MEM_ASSERT_AND_FREE(addr, size) do { \
diff --git a/storage/innobase/include/usr0sess.h b/storage/innobase/include/usr0sess.h
index 4a0710c5060..b5c80b97b43 100644
--- a/storage/innobase/include/usr0sess.h
+++ b/storage/innobase/include/usr0sess.h
@@ -53,7 +53,7 @@ sess_close(
/* The session handle. This data structure is only used by purge and is
not really necessary. We should get rid of it. */
-struct sess_struct{
+struct sess_t{
ulint state; /*!< state of the session */
trx_t* trx; /*!< transaction object permanently
assigned for the session: the
diff --git a/storage/innobase/include/usr0types.h b/storage/innobase/include/usr0types.h
index 403ad0223a8..6ba937cacc8 100644
--- a/storage/innobase/include/usr0types.h
+++ b/storage/innobase/include/usr0types.h
@@ -26,6 +26,6 @@ Created 6/25/1996 Heikki Tuuri
#ifndef usr0types_h
#define usr0types_h
-typedef struct sess_struct sess_t;
+struct sess_t;
#endif
diff --git a/storage/innobase/include/ut0bh.h b/storage/innobase/include/ut0bh.h
index 4c029e256a9..84ea6dd915a 100644
--- a/storage/innobase/include/ut0bh.h
+++ b/storage/innobase/include/ut0bh.h
@@ -31,7 +31,7 @@ Created 2010-05-28 by Sunny Bains
/** Comparison function for objects in the binary heap. */
typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
-typedef struct ib_bh_struct ib_bh_t;
+struct ib_bh_t;
/**********************************************************************//**
Get the number of elements in the binary heap.
@@ -138,7 +138,7 @@ ib_bh_pop(
ib_bh_t* ib_bh); /*!< in/out: instance */
/** Binary heap data structure */
-struct ib_bh_struct {
+struct ib_bh_t {
ulint max_elems; /*!< max elements allowed */
ulint n_elems; /*!< current size */
ulint sizeof_elem; /*!< sizeof element */
diff --git a/storage/innobase/include/ut0counter.h b/storage/innobase/include/ut0counter.h
new file mode 100644
index 00000000000..fe0f36dfff2
--- /dev/null
+++ b/storage/innobase/include/ut0counter.h
@@ -0,0 +1,203 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/ut0counter.h
+
+Counter utility class
+
+Created 2012/04/12 by Sunny Bains
+*******************************************************/
+
+#ifndef UT0COUNTER_H
+#define UT0COUNTER_H
+
+#include "univ.i"
+#include <string.h>
+#include "os0thread.h"
+
+/** CPU cache line size */
+#define CACHE_LINE_SIZE 64
+
+/** Default number of slots to use in ib_counter_t */
+#define IB_N_SLOTS 64
+
+/** Get the offset into the counter array. */
+template <typename Type, int N>
+struct generic_indexer_t {
+ /** Default constructor/destructor should be OK. */
+
+ /** @return offset within m_counter */
+ size_t offset(size_t index) const UNIV_NOTHROW {
+ return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type)));
+ }
+};
+
+#ifdef HAVE_SCHED_GETCPU
+#include <utmpx.h>
+/** Use the cpu id to index into the counter array. If it fails then
+use the thread id. */
+template <typename Type, int N>
+struct get_sched_indexer_t : public generic_indexer_t<Type, N> {
+ /** Default constructor/destructor should be OK. */
+
+ /* @return result from sched_getcpu(), the thread id if it fails. */
+ size_t get_rnd_index() const UNIV_NOTHROW {
+
+ size_t cpu = sched_getcpu();
+ if (cpu == -1) {
+ cpu = (lint) os_thread_get_curr_id();
+ }
+
+ return(cpu);
+ }
+};
+#endif /* HAVE_SCHED_GETCPU */
+
+/** Use the thread id to index into the counter array. */
+template <typename Type, int N>
+struct thread_id_indexer_t : public generic_indexer_t<Type, N> {
+ /** Default constructor/destructor should are OK. */
+
+ /* @return a random number, currently we use the thread id. Where
+ thread id is represented as a pointer, it may not work as
+ effectively. */
+ size_t get_rnd_index() const UNIV_NOTHROW {
+ return((lint) os_thread_get_curr_id());
+ }
+};
+
+/** For counters wher N=1 */
+template <typename Type, int N=1>
+struct single_indexer_t {
+ /** Default constructor/destructor should are OK. */
+
+ /** @return offset within m_counter */
+ size_t offset(size_t index) const UNIV_NOTHROW {
+ ut_ad(N == 1);
+ return((CACHE_LINE_SIZE / sizeof(Type)));
+ }
+
+ /* @return 1 */
+ size_t get_rnd_index() const UNIV_NOTHROW {
+ ut_ad(N == 1);
+ return(1);
+ }
+};
+
+/** Class for using fuzzy counters. The counter is not protected by any
+mutex and the results are not guaranteed to be 100% accurate but close
+enough. Creates an array of counters and separates each element by the
+CACHE_LINE_SIZE bytes */
+template <
+ typename Type,
+ int N = IB_N_SLOTS,
+ template<typename, int> class Indexer = thread_id_indexer_t>
+class ib_counter_t {
+public:
+ ib_counter_t() { memset(m_counter, 0x0, sizeof(m_counter)); }
+
+ ~ib_counter_t()
+ {
+ ut_ad(validate());
+ }
+
+ bool validate() UNIV_NOTHROW {
+#ifdef UNIV_DEBUG
+ size_t n = (CACHE_LINE_SIZE / sizeof(Type));
+
+ /* Check that we aren't writing outside our defined bounds. */
+ for (size_t i = 0; i < UT_ARR_SIZE(m_counter); i += n) {
+ for (size_t j = 1; j < n - 1; ++j) {
+ ut_ad(m_counter[i + j] == 0);
+ }
+ }
+#endif /* UNIV_DEBUG */
+ return(true);
+ }
+
+ /** If you can't use a good index id. Increment by 1. */
+ void inc() UNIV_NOTHROW { add(1); }
+
+ /** If you can't use a good index id.
+ * @param n - is the amount to increment */
+ void add(Type n) UNIV_NOTHROW {
+ size_t i = m_policy.offset(m_policy.get_rnd_index());
+
+ ut_ad(i < UT_ARR_SIZE(m_counter));
+
+ m_counter[i] += n;
+ }
+
+ /** Use this if you can use a unique indentifier, saves a
+ call to get_rnd_index().
+ @param i - index into a slot
+ @param n - amount to increment */
+ void add(size_t index, Type n) UNIV_NOTHROW {
+ size_t i = m_policy.offset(index);
+
+ ut_ad(i < UT_ARR_SIZE(m_counter));
+
+ m_counter[i] += n;
+ }
+
+ /** If you can't use a good index id. Decrement by 1. */
+ void dec() UNIV_NOTHROW { sub(1); }
+
+ /** If you can't use a good index id.
+ * @param - n is the amount to decrement */
+ void sub(Type n) UNIV_NOTHROW {
+ size_t i = m_policy.offset(m_policy.get_rnd_index());
+
+ ut_ad(i < UT_ARR_SIZE(m_counter));
+
+ m_counter[i] -= n;
+ }
+
+ /** Use this if you can use a unique indentifier, saves a
+ call to get_rnd_index().
+ @param i - index into a slot
+ @param n - amount to decrement */
+ void sub(size_t index, Type n) UNIV_NOTHROW {
+ size_t i = m_policy.offset(index);
+
+ ut_ad(i < UT_ARR_SIZE(m_counter));
+
+ m_counter[i] -= n;
+ }
+
+ /* @return total value - not 100% accurate, since it is not atomic. */
+ operator Type() const UNIV_NOTHROW {
+ Type total = 0;
+
+ for (size_t i = 0; i < N; ++i) {
+ total += m_counter[m_policy.offset(i)];
+ }
+
+ return(total);
+ }
+
+private:
+ /** Indexer into the array */
+ Indexer<Type, N>m_policy;
+
+ /** Slot 0 is unused. */
+ Type m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))];
+};
+
+#endif /* UT0COUNTER_H */
diff --git a/storage/innobase/include/ut0crc32.h b/storage/innobase/include/ut0crc32.h
index 456648001aa..86217692764 100644
--- a/storage/innobase/include/ut0crc32.h
+++ b/storage/innobase/include/ut0crc32.h
@@ -45,4 +45,7 @@ or 0x1EDC6F41 without the high-order bit) */
typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len);
extern ib_ut_crc32_t ut_crc32;
+
+extern bool ut_crc32_sse2_enabled;
+
#endif /* ut0crc32_h */
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
index e9ad62fb81b..0f2da165da7 100644
--- a/storage/innobase/include/ut0dbg.h
+++ b/storage/innobase/include/ut0dbg.h
@@ -145,10 +145,10 @@ ut_dbg_stop_thread(
#include <sys/resource.h>
/** structure used for recording usage statistics */
-typedef struct speedo_struct {
+struct speedo_t {
struct rusage ru; /*!< getrusage() result */
struct timeval tv; /*!< gettimeofday() result */
-} speedo_t;
+};
/*******************************************************************//**
Resets a speedo (records the current time in it). */
diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h
index 57d6bdc33a6..29fc8669ce4 100644
--- a/storage/innobase/include/ut0list.h
+++ b/storage/innobase/include/ut0list.h
@@ -48,9 +48,8 @@ automatically freeing the list node when the item's heap is freed.
#include "mem0mem.h"
-typedef struct ib_list_struct ib_list_t;
-typedef struct ib_list_node_struct ib_list_node_t;
-typedef struct ib_list_helper_struct ib_list_helper_t;
+struct ib_list_t;
+struct ib_list_node_t;
/****************************************************************//**
Create a new list using mem_alloc. Lists created with this function must be
@@ -152,7 +151,7 @@ ib_list_is_empty(
const ib_list_t* list); /* in: list */
/* List. */
-struct ib_list_struct {
+struct ib_list_t {
ib_list_node_t* first; /*!< first node */
ib_list_node_t* last; /*!< last node */
ibool is_heap_list; /*!< TRUE if this list was
@@ -160,7 +159,7 @@ struct ib_list_struct {
};
/* A list node. */
-struct ib_list_node_struct {
+struct ib_list_node_t {
ib_list_node_t* prev; /*!< previous node */
ib_list_node_t* next; /*!< next node */
void* data; /*!< user data */
@@ -169,7 +168,7 @@ struct ib_list_node_struct {
/* Quite often, the only additional piece of data you need is the per-item
memory heap, so we have this generic struct available to use in those
cases. */
-struct ib_list_helper_struct {
+struct ib_list_helper_t {
mem_heap_t* heap; /*!< memory heap */
void* data; /*!< user data */
};
diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
index 51c89f15a77..b53e7ade4c1 100644
--- a/storage/innobase/include/ut0lst.h
+++ b/storage/innobase/include/ut0lst.h
@@ -65,8 +65,7 @@ The name of the field in the node struct should be the name given
to the list.
@param TYPE the list node type name */
/* Example:
-typedef struct LRU_node_struct LRU_node_t;
-struct LRU_node_struct {
+struct LRU_node_t {
UT_LIST_NODE_T(LRU_node_t) LRU_list;
...
}
diff --git a/storage/innobase/include/ut0rbt.h b/storage/innobase/include/ut0rbt.h
index e8a4430e76b..e0593e99bde 100644
--- a/storage/innobase/include/ut0rbt.h
+++ b/storage/innobase/include/ut0rbt.h
@@ -44,25 +44,19 @@ Created 2007-03-20 Sunny Bains
#define FALSE 0
#endif
-/* Red black tree typedefs */
-typedef struct ib_rbt_struct ib_rbt_t;
-typedef struct ib_rbt_node_struct ib_rbt_node_t;
-/* FIXME: Iterator is a better name than _bound_ */
-typedef struct ib_rbt_bound_struct ib_rbt_bound_t;
+struct ib_rbt_node_t;
typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node);
typedef int (*ib_rbt_compare)(const void* p1, const void* p2);
typedef int (*ib_rbt_arg_compare)(const void*, const void* p1, const void* p2);
/** Red black tree color types */
-enum ib_rbt_color_enum {
+enum ib_rbt_color_t {
IB_RBT_RED,
IB_RBT_BLACK
};
-typedef enum ib_rbt_color_enum ib_rbt_color_t;
-
/** Red black tree node */
-struct ib_rbt_node_struct {
+struct ib_rbt_node_t {
ib_rbt_color_t color; /* color of this node */
ib_rbt_node_t* left; /* points left child */
@@ -73,7 +67,7 @@ struct ib_rbt_node_struct {
};
/** Red black tree instance.*/
-struct ib_rbt_struct {
+struct ib_rbt_t {
ib_rbt_node_t* nil; /* Black colored node that is
used as a sentinel. This is
pre-allocated too.*/
@@ -89,12 +83,12 @@ struct ib_rbt_struct {
compare_with_arg; /* Fn. to use for comparison
with argument */
ulint sizeof_value; /* Sizeof the item in bytes */
- const void* cmp_arg; /* Compare func argument */
+ void* cmp_arg; /* Compare func argument */
};
/** The result of searching for a key in the tree, this is useful for
a speedy lookup and insert if key doesn't exist.*/
-struct ib_rbt_bound_struct {
+struct ib_rbt_bound_t {
const ib_rbt_node_t*
last; /* Last node visited */
@@ -142,7 +136,7 @@ rbt_create_arg_cmp(
size_t sizeof_value, /*!< in: size in bytes */
ib_rbt_arg_compare
compare, /*!< in: comparator */
- const void* cmp_arg); /*!< in: compare fn arg */
+ void* cmp_arg); /*!< in: compare fn arg */
/**********************************************************************//**
Delete a node from the red black tree, identified by key */
UNIV_INTERN
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index 35b8a580e68..1260e0381bf 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -65,16 +65,16 @@ typedef time_t ib_time_t;
# elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-# elif defined(HAVE_ATOMIC_BUILTINS)
-# define UT_RELAX_CPU() do { \
- volatile lint volatile_var; \
- os_compare_and_swap_lint(&volatile_var, 0, 1); \
- } while (0)
# elif defined(HAVE_WINDOWS_ATOMICS)
/* In the Win32 API, the x86 PAUSE instruction is executed by calling
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
independent way by using YieldProcessor. */
# define UT_RELAX_CPU() YieldProcessor()
+# elif defined(HAVE_ATOMIC_BUILTINS)
+# define UT_RELAX_CPU() do { \
+ volatile lint volatile_var; \
+ os_compare_and_swap_lint(&volatile_var, 0, 1); \
+ } while (0)
# else
# define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
# endif
@@ -345,7 +345,7 @@ ut_print_filename(
#ifndef UNIV_HOTBACKUP
/* Forward declaration of transaction handle */
-struct trx_struct;
+struct trx_t;
/**********************************************************************//**
Outputs a fixed-length string, quoted as an SQL identifier.
@@ -357,7 +357,7 @@ void
ut_print_name(
/*==========*/
FILE* f, /*!< in: output stream */
- struct trx_struct*trx, /*!< in: transaction */
+ const trx_t* trx, /*!< in: transaction */
ibool table_id,/*!< in: TRUE=print a table name,
FALSE=print other identifier */
const char* name); /*!< in: name to print */
@@ -372,13 +372,31 @@ void
ut_print_namel(
/*===========*/
FILE* f, /*!< in: output stream */
- struct trx_struct*trx, /*!< in: transaction (NULL=no quotes) */
+ const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
ibool table_id,/*!< in: TRUE=print a table name,
FALSE=print other identifier */
const char* name, /*!< in: name to print */
ulint namelen);/*!< in: length of name */
/**********************************************************************//**
+Formats a table or index name, quoted as an SQL identifier. If the name
+contains a slash '/', the result will contain two identifiers separated by
+a period (.), as in SQL database_name.identifier.
+@return pointer to 'formatted' */
+UNIV_INTERN
+char*
+ut_format_name(
+/*===========*/
+ const char* name, /*!< in: table or index name, must be
+ '\0'-terminated */
+ ibool is_table, /*!< in: if TRUE then 'name' is a table
+ name */
+ char* formatted, /*!< out: formatted result, will be
+ '\0'-terminated */
+ ulint formatted_size);/*!< out: no more than this number of
+ bytes will be written to 'formatted' */
+
+/**********************************************************************//**
Catenate files. */
UNIV_INTERN
void
@@ -442,7 +460,7 @@ UNIV_INTERN
const char*
ut_strerr(
/*======*/
- enum db_err num); /*!< in: error number */
+ dberr_t num); /*!< in: error number */
/****************************************************************
Sort function for ulint arrays. */
diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h
index f2a5aba8116..432fb348a09 100644
--- a/storage/innobase/include/ut0vec.h
+++ b/storage/innobase/include/ut0vec.h
@@ -29,8 +29,8 @@ Created 4/6/2006 Osku Salerma
#include "univ.i"
#include "mem0mem.h"
-typedef struct ib_alloc_struct ib_alloc_t;
-typedef struct ib_vector_struct ib_vector_t;
+struct ib_alloc_t;
+struct ib_vector_t;
typedef void* (*ib_mem_alloc_t)(
/* out: Pointer to allocated memory */
@@ -64,7 +64,7 @@ freeing it when done with the vector.
/********************************************************************
Create a new vector with the given initial size. */
-
+UNIV_INTERN
ib_vector_t*
ib_vector_create(
/*=============*/
@@ -124,7 +124,7 @@ ib_vector_size(
/********************************************************************
Increase the size of the vector. */
-
+UNIV_INTERN
void
ib_vector_resize(
/*=============*/
@@ -311,7 +311,7 @@ ib_ut_allocator_free(
ib_alloc_t* ib_ut_alloc); /* in: alloc instace to free */
/* Allocator used by ib_vector_t. */
-struct ib_alloc_struct {
+struct ib_alloc_t {
ib_mem_alloc_t mem_malloc; /* For allocating memory */
ib_mem_free_t mem_release; /* For freeing memory */
ib_mem_resize_t mem_resize; /* For resizing memory */
@@ -320,7 +320,7 @@ struct ib_alloc_struct {
};
/* See comment at beginning of file. */
-struct ib_vector_struct {
+struct ib_vector_t {
ib_alloc_t* allocator; /* Allocator, because one size
doesn't fit all */
void* data; /* data elements */
diff --git a/storage/innobase/include/ut0vec.ic b/storage/innobase/include/ut0vec.ic
index 1255caee2d9..f41a85e1d1d 100644
--- a/storage/innobase/include/ut0vec.ic
+++ b/storage/innobase/include/ut0vec.ic
@@ -346,9 +346,10 @@ ib_vector_remove(
ib_vector_t* vec, /*!< in: vector */
const void* elem) /*!< in: value to remove */
{
- void* current;
+ void* current = NULL;
void* next;
ulint i;
+ ulint old_used_count = vec->used;
for (i = 0; i < vec->used; i++) {
current = ib_vector_get(vec, i);
@@ -359,14 +360,14 @@ ib_vector_remove(
}
next = ib_vector_get(vec, i + 1);
- memcpy(current, next, vec->sizeof_value
- * (vec->used - i - 1));
+ memmove(current, next, vec->sizeof_value
+ * (vec->used - i - 1));
+ --vec->used;
+ break;
}
}
- --vec->used;
-
- return(current);
+ return((old_used_count != vec->used) ? current : NULL);
}
/********************************************************************
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
index ed4e65e4dc6..33385ddf2d4 100644
--- a/storage/innobase/include/ut0wqueue.h
+++ b/storage/innobase/include/ut0wqueue.h
@@ -37,7 +37,7 @@ processing.
#include "os0sync.h"
#include "sync0types.h"
-typedef struct ib_wqueue_struct ib_wqueue_t;
+struct ib_wqueue_t;
/****************************************************************//**
Create a new work queue.
@@ -96,8 +96,8 @@ ib_wqueue_timedwait(
ib_time_t wait_in_usecs); /* in: wait time in micro seconds */
/* Work queue. */
-struct ib_wqueue_struct {
- mutex_t mutex; /*!< mutex protecting everything */
+struct ib_wqueue_t {
+ ib_mutex_t mutex; /*!< mutex protecting everything */
ib_list_t* items; /*!< work item list */
os_event_t event; /*!< event we use to signal additions to list */
};
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 476b305ca70..1152152cc77 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -37,15 +37,17 @@ Created 5/7/1996 Heikki Tuuri
#include "usr0sess.h"
#include "trx0purge.h"
#include "dict0mem.h"
+#include "dict0boot.h"
#include "trx0sys.h"
#include "pars0pars.h" /* pars_complete_graph_for_exec() */
#include "que0que.h" /* que_node_get_parent() */
#include "row0mysql.h" /* row_mysql_handle_errors() */
-#include "row0sel.h" /* sel_node_create(), sel_node_struct */
+#include "row0sel.h" /* sel_node_create(), sel_node_t */
#include "row0types.h" /* sel_node_t */
#include "srv0mon.h"
#include "ut0vec.h"
#include "btr0btr.h"
+#include "dict0boot.h"
/* Restricts the length of search we will do in the waits-for
graph of transactions */
@@ -345,10 +347,7 @@ static const byte lock_strength_matrix[5][5] = {
};
/** Deadlock check context. */
-typedef struct lock_deadlock_ctx_struct lock_deadlock_ctx_t;
-
-/** Deadlock check context. */
-struct lock_deadlock_ctx_struct {
+struct lock_deadlock_ctx_t {
const trx_t* start; /*!< Joining transaction that is
requesting a lock in an incompatible
mode */
@@ -366,10 +365,8 @@ struct lock_deadlock_ctx_struct {
was aborted */
};
-typedef struct lock_stack_struct lock_stack_t;
-
/** DFS visited node information used during deadlock checking. */
-struct lock_stack_struct {
+struct lock_stack_t {
const lock_t* lock; /*!< Current lock */
const lock_t* wait_lock; /*!< Waiting for lock */
unsigned heap_no:16; /*!< heap number if rec lock */
@@ -415,9 +412,10 @@ lock_rec_validate_page(
/* The lock system */
UNIV_INTERN lock_sys_t* lock_sys = NULL;
-/* We store info on the latest deadlock error to this buffer. InnoDB
+/** We store info on the latest deadlock error to this buffer. InnoDB
Monitor will then fetch it and print */
UNIV_INTERN ibool lock_deadlock_found = FALSE;
+/** Only created if !srv_read_only_mode */
static FILE* lock_latest_err_file;
/********************************************************************//**
@@ -502,7 +500,7 @@ lock_check_trx_id_sanity(
dict_index_t* index, /*!< in: index */
const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
{
- ibool is_ok;
+ bool is_ok;
trx_id_t max_trx_id;
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -520,10 +518,10 @@ lock_check_trx_id_sanity(
/*********************************************************************//**
Checks that a record is seen in a consistent read.
-@return TRUE if sees, or FALSE if an earlier version of the record
+@return true if sees, or false if an earlier version of the record
should be retrieved */
UNIV_INTERN
-ibool
+bool
lock_clust_rec_cons_read_sees(
/*==========================*/
const rec_t* rec, /*!< in: user record which should be read or
@@ -550,14 +548,14 @@ lock_clust_rec_cons_read_sees(
Checks that a non-clustered index record is seen in a consistent read.
NOTE that a non-clustered index page contains so little information on
-its modifications that also in the case FALSE, the present version of
+its modifications that also in the case false, the present version of
rec may be the right, but we must check this from the clustered index
record.
-@return TRUE if certainly sees, or FALSE if an earlier version of the
+@return true if certainly sees, or false if an earlier version of the
clustered index record might be needed */
UNIV_INTERN
-ulint
+bool
lock_sec_rec_cons_read_sees(
/*========================*/
const rec_t* rec, /*!< in: user record which
@@ -574,7 +572,7 @@ lock_sec_rec_cons_read_sees(
if (recv_recovery_is_on()) {
- return(FALSE);
+ return(false);
}
max_trx_id = page_get_max_trx_id(page_align(rec));
@@ -593,12 +591,6 @@ lock_sys_create(
{
ulint lock_sys_sz;
- srv_n_lock_wait_count = 0;
- srv_n_lock_wait_time = 0;
- srv_n_lock_max_wait_time = 0;
- srv_lock_timeout_active = FALSE;
- srv_n_lock_wait_current_count = 0;
-
lock_sys_sz = sizeof(*lock_sys)
+ OS_THREAD_MAX_N * sizeof(srv_slot_t);
@@ -618,12 +610,14 @@ lock_sys_create(
mutex_create(lock_sys_wait_mutex_key,
&lock_sys->wait_mutex, SYNC_LOCK_WAIT_SYS);
- lock_sys->rec_hash = hash_create(n_cells);
+ lock_sys->timeout_event = os_event_create();
- lock_latest_err_file = os_file_create_tmpfile();
- ut_a(lock_latest_err_file);
+ lock_sys->rec_hash = hash_create(n_cells);
- srv_timeout_event = os_event_create(NULL);
+ if (!srv_read_only_mode) {
+ lock_latest_err_file = os_file_create_tmpfile();
+ ut_a(lock_latest_err_file);
+ }
}
/*********************************************************************//**
@@ -858,13 +852,16 @@ lock_reset_lock_and_trx_wait(
/*=========================*/
lock_t* lock) /*!< in/out: record lock */
{
- ut_ad(lock->trx->lock.wait_lock == lock);
ut_ad(lock_get_wait(lock));
ut_ad(lock_mutex_own());
/* Reset the back pointer in trx to this waiting lock request */
-
- lock->trx->lock.wait_lock = NULL;
+ if (!(lock->type_mode & LOCK_CONV_BY_OTHER)) {
+ ut_ad(lock->trx->lock.wait_lock == lock);
+ lock->trx->lock.wait_lock = NULL;
+ } else {
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ }
lock->type_mode &= ~LOCK_WAIT;
}
@@ -1476,7 +1473,7 @@ Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
to precise_mode.
@return lock or NULL */
UNIV_INLINE
-const lock_t*
+lock_t*
lock_rec_has_expl(
/*==============*/
ulint precise_mode,/*!< in: LOCK_S or LOCK_X
@@ -1489,7 +1486,7 @@ lock_rec_has_expl(
ulint heap_no,/*!< in: heap number of the record */
const trx_t* trx) /*!< in: transaction */
{
- const lock_t* lock;
+ lock_t* lock;
ut_ad(lock_mutex_own());
ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
@@ -1498,14 +1495,14 @@ lock_rec_has_expl(
for (lock = lock_rec_get_first(block, heap_no);
lock != NULL;
- lock = lock_rec_get_next_const(heap_no, lock)) {
+ lock = lock_rec_get_next(heap_no, lock)) {
if (lock->trx == trx
+ && !lock_is_wait_not_by_other(lock->type_mode)
&& lock_mode_stronger_or_eq(
lock_get_mode(lock),
static_cast<enum lock_mode>(
precise_mode & LOCK_MODE_MASK))
- && !lock_get_wait(lock)
&& (!lock_rec_get_rec_not_gap(lock)
|| (precise_mode & LOCK_REC_NOT_GAP)
|| heap_no == PAGE_HEAP_NO_SUPREMUM)
@@ -1756,6 +1753,7 @@ lock_rec_create(
ut_ad(lock_mutex_own());
ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
+ ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
/* Non-locking autocommit read-only transactions should not set
any locks. */
@@ -1813,7 +1811,7 @@ lock_rec_create(
}
ut_ad(trx_mutex_own(trx));
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+ if (lock_is_wait_not_by_other(type_mode)) {
lock_set_lock_and_trx_wait(lock, trx);
}
@@ -1838,7 +1836,7 @@ DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
there was a deadlock, but another transaction was chosen as a victim,
and we got the lock immediately: no need to wait then */
static
-enum db_err
+dberr_t
lock_rec_enqueue_waiting(
/*=====================*/
ulint type_mode,/*!< in: lock mode this
@@ -1853,14 +1851,16 @@ lock_rec_enqueue_waiting(
const buf_block_t* block, /*!< in: buffer block containing
the record */
ulint heap_no,/*!< in: heap number of the record */
+ lock_t* lock, /*!< in: lock object; NULL if a new
+ one should be created. */
dict_index_t* index, /*!< in: index of record */
que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
- lock_t* lock;
trx_id_t victim_trx_id;
ut_ad(lock_mutex_own());
+ ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
trx = thr_get_trx(thr);
@@ -1893,10 +1893,20 @@ lock_rec_enqueue_waiting(
ut_ad(0);
}
- /* Enqueue the lock request that will wait to be granted, note that
- we already own the trx mutex. */
- lock = lock_rec_create(
- type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE);
+ if (lock == NULL) {
+ /* Enqueue the lock request that will wait
+ to be granted, note that we already own
+ the trx mutex. */
+ lock = lock_rec_create(
+ type_mode | LOCK_WAIT, block, heap_no,
+ index, trx, TRUE);
+ } else {
+ ut_ad(lock->type_mode & LOCK_WAIT);
+ ut_ad(lock->type_mode & LOCK_CONV_BY_OTHER);
+
+ lock->type_mode &= ~LOCK_CONV_BY_OTHER;
+ lock_set_lock_and_trx_wait(lock, trx);
+ }
/* Release the mutex to obey the latching order.
This is safe, because lock_deadlock_check_and_resolve()
@@ -1979,6 +1989,7 @@ lock_rec_add_to_queue(
ut_ad(lock_mutex_own());
ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
+ ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
#ifdef UNIV_DEBUG
switch (type_mode & LOCK_MODE_MASK) {
case LOCK_X:
@@ -2100,6 +2111,7 @@ lock_rec_lock_fast(
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
+ ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
lock = lock_rec_get_first_on_page(block);
@@ -2146,7 +2158,7 @@ lock, or in the case of a page supremum record, a gap type lock.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
or DB_QUE_THR_SUSPENDED */
static
-enum db_err
+dberr_t
lock_rec_lock_slow(
/*===============*/
ibool impl, /*!< in: if TRUE, no lock is set
@@ -2163,7 +2175,8 @@ lock_rec_lock_slow(
que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
- enum db_err err = DB_SUCCESS;
+ lock_t* lock;
+ dberr_t err = DB_SUCCESS;
ut_ad(lock_mutex_own());
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
@@ -2175,12 +2188,33 @@ lock_rec_lock_slow(
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
+ ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
trx = thr_get_trx(thr);
trx_mutex_enter(trx);
- if (lock_rec_has_expl(mode, block, heap_no, trx)) {
+ lock = lock_rec_has_expl(mode, block, heap_no, trx);
+ if (lock) {
+ if (lock->type_mode & LOCK_CONV_BY_OTHER) {
+ /* This lock or lock waiting was created by the other
+ transaction, not by the transaction (trx) itself.
+ So, the transaction (trx) should treat it collectly
+ according as whether granted or not. */
+
+ if (lock->type_mode & LOCK_WAIT) {
+ /* This lock request was not granted yet.
+ Should wait for granted. */
+
+ goto enqueue_waiting;
+ } else {
+ /* This lock request was already granted.
+ Just clearing the flag. */
+
+ lock->type_mode &= ~LOCK_CONV_BY_OTHER;
+ }
+ }
+
/* The trx already has a strong enough lock on rec: do
nothing */
@@ -2193,8 +2227,10 @@ lock_rec_lock_slow(
have a lock strong enough already granted on the
record, we have to wait. */
+ ut_ad(lock == NULL);
+enqueue_waiting:
err = lock_rec_enqueue_waiting(
- mode, block, heap_no, index, thr);
+ mode, block, heap_no, lock, index, thr);
} else if (!impl) {
/* Set the requested lock on the record, note that
@@ -2220,7 +2256,7 @@ of a page supremum record, a gap type lock.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
or DB_QUE_THR_SUSPENDED */
static
-enum db_err
+dberr_t
lock_rec_lock(
/*==========*/
ibool impl, /*!< in: if TRUE, no lock is set
@@ -2246,6 +2282,7 @@ lock_rec_lock(
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
|| mode - (LOCK_MODE_MASK & mode) == 0);
+ ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
/* We try a simplified and faster subroutine for the most
common cases */
@@ -2348,7 +2385,8 @@ lock_grant(
TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
for it */
- if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+ if (!(lock->type_mode & LOCK_CONV_BY_OTHER)
+ && lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
que_thr_t* thr;
thr = que_thr_end_lock_wait(lock->trx);
@@ -2375,6 +2413,7 @@ lock_rec_cancel(
ut_ad(lock_mutex_own());
ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
/* Reset the bit (there can be only one set bit) in the lock bitmap */
lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
@@ -2541,8 +2580,12 @@ lock_rec_reset_and_release_wait(
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
- if (lock_get_wait(lock)) {
+ if (lock_is_wait_not_by_other(lock->type_mode)) {
lock_rec_cancel(lock);
+ } else if (lock_get_wait(lock)) {
+ /* just reset LOCK_WAIT */
+ lock_rec_reset_nth_bit(lock, heap_no);
+ lock_reset_lock_and_trx_wait(lock);
} else {
lock_rec_reset_nth_bit(lock, heap_no);
}
@@ -3439,11 +3482,13 @@ lock_deadlock_start_print()
/*=======================*/
{
ut_ad(lock_mutex_own());
+ ut_ad(!srv_read_only_mode);
rewind(lock_latest_err_file);
ut_print_timestamp(lock_latest_err_file);
if (srv_print_all_deadlocks) {
+ ut_print_timestamp(stderr);
fprintf(stderr, "InnoDB: transactions deadlock detected, "
"dumping detailed information.\n");
ut_print_timestamp(stderr);
@@ -3458,10 +3503,12 @@ lock_deadlock_fputs(
/*================*/
const char* msg) /*!< in: message to print */
{
- fputs(msg, lock_latest_err_file);
+ if (!srv_read_only_mode) {
+ fputs(msg, lock_latest_err_file);
- if (srv_print_all_deadlocks) {
- fputs(msg, stderr);
+ if (srv_print_all_deadlocks) {
+ fputs(msg, stderr);
+ }
}
}
@@ -3475,24 +3522,21 @@ lock_deadlock_trx_print(
ulint max_query_len) /*!< in: max query length to print,
or 0 to use the default max length */
{
- ulint n_lock_rec;
- ulint n_lock_struct;
- ulint heap_size;
-
ut_ad(lock_mutex_own());
+ ut_ad(!srv_read_only_mode);
- n_lock_rec = lock_number_of_rows_locked(&trx->lock);
- n_lock_struct = UT_LIST_GET_LEN(trx->lock.trx_locks);
- heap_size = mem_heap_get_size(trx->lock.lock_heap);
+ ulint n_rec_locks = lock_number_of_rows_locked(&trx->lock);
+ ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
+ ulint heap_size = mem_heap_get_size(trx->lock.lock_heap);
mutex_enter(&trx_sys->mutex);
trx_print_low(lock_latest_err_file, trx, max_query_len,
- n_lock_rec, n_lock_struct, heap_size);
+ n_rec_locks, n_trx_locks, heap_size);
if (srv_print_all_deadlocks) {
trx_print_low(stderr, trx, max_query_len,
- n_lock_rec, n_lock_struct, heap_size);
+ n_rec_locks, n_trx_locks, heap_size);
}
mutex_exit(&trx_sys->mutex);
@@ -3507,6 +3551,7 @@ lock_deadlock_lock_print(
const lock_t* lock) /*!< in: record or table type lock */
{
ut_ad(lock_mutex_own());
+ ut_ad(!srv_read_only_mode);
if (lock_get_type_low(lock) == LOCK_REC) {
lock_rec_print(lock_latest_err_file, lock);
@@ -3629,6 +3674,7 @@ lock_deadlock_notify(
deadlock */
{
ut_ad(lock_mutex_own());
+ ut_ad(!srv_read_only_mode);
lock_deadlock_start_print();
@@ -3648,9 +3694,15 @@ lock_deadlock_notify(
lock_deadlock_lock_print(lock);
- lock_deadlock_fputs("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
+ /* It is possible that the joining transaction was granted its
+ lock when we rolled back some other waiting transaction. */
+
+ if (ctx->start->lock.wait_lock != 0) {
+ lock_deadlock_fputs(
+ "*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
- lock_deadlock_lock_print(ctx->start->lock.wait_lock);
+ lock_deadlock_lock_print(ctx->start->lock.wait_lock);
+ }
#ifdef UNIV_DEBUG
if (lock_print_waits) {
@@ -3669,6 +3721,7 @@ lock_deadlock_select_victim(
const lock_deadlock_ctx_t* ctx) /*!< in: deadlock context */
{
ut_ad(lock_mutex_own());
+ ut_ad(ctx->start->lock.wait_lock != 0);
ut_ad(ctx->wait_lock->trx != ctx->start);
if (trx_weight_ge(ctx->wait_lock->trx, ctx->start)) {
@@ -3694,8 +3747,10 @@ lock_deadlock_check(
{
ut_ad(lock_mutex_own());
- /* If it is the joining transaction wait lock. */
- if (lock == ctx->start->lock.wait_lock) {
+ /* If it is the joining transaction wait lock or the joining
+ transaction was granted its lock due to deadlock detection. */
+ if (lock == ctx->start->lock.wait_lock
+ || ctx->start->lock.wait_lock == NULL) {
; /* Skip */
} else if (lock == ctx->wait_lock) {
@@ -3776,7 +3831,8 @@ lock_deadlock_push(
}
/********************************************************************//**
-Looks iteratively for a deadlock.
+Looks iteratively for a deadlock. Note: the joining transaction may
+have been granted its lock by the deadlock checks.
@return 0 if no deadlock else the victim transaction id.*/
static
trx_id_t
@@ -3811,7 +3867,9 @@ lock_deadlock_search(
/* Found a cycle. */
- lock_deadlock_notify(ctx, lock);
+ if (!srv_read_only_mode) {
+ lock_deadlock_notify(ctx, lock);
+ }
return(lock_deadlock_select_victim(ctx)->id);
@@ -3882,6 +3940,7 @@ lock_deadlock_joining_trx_print(
const lock_t* lock) /*!< in: lock trx wants */
{
ut_ad(lock_mutex_own());
+ ut_ad(!srv_read_only_mode);
/* If the lock search exceeds the max step
or the max depth, the current trx will be
@@ -3968,7 +4027,9 @@ lock_deadlock_check_and_resolve(
ut_a(trx == ctx.start);
ut_a(victim_trx_id == trx->id);
- lock_deadlock_joining_trx_print(trx, lock);
+ if (!srv_read_only_mode) {
+ lock_deadlock_joining_trx_print(trx, lock);
+ }
MONITOR_INC(MONITOR_DEADLOCK);
@@ -4017,6 +4078,7 @@ lock_table_create(
ut_ad(table && trx);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(trx));
+ ut_ad(!(type_mode & LOCK_CONV_BY_OTHER));
/* Non-locking autocommit read-only transactions should not set
any locks. */
@@ -4203,7 +4265,7 @@ DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
transaction was chosen as a victim, and we got the lock immediately:
no need to wait then */
static
-ulint
+dberr_t
lock_table_enqueue_waiting(
/*=======================*/
ulint mode, /*!< in: lock mode this transaction is
@@ -4333,7 +4395,7 @@ Locks the specified database table in the mode given. If the lock cannot
be granted immediately, the query thread is put to wait.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_table(
/*=======*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
@@ -4344,7 +4406,7 @@ lock_table(
que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
- ulint err;
+ dberr_t err;
const lock_t* wait_for;
ut_ad(table && thr);
@@ -4570,11 +4632,38 @@ lock_release(
lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
if (lock_get_type_low(lock) == LOCK_REC) {
- lock_rec_dequeue_from_page(lock);
+#ifdef UNIV_DEBUG
+ /* Check if the transcation locked a record
+ in a system table in X mode. It should have set
+ the dict_op code correctly if it did. */
+ if (lock->index->table->id < DICT_HDR_FIRST_ID
+ && lock_get_mode(lock) == LOCK_X) {
+
+ ut_ad(lock_get_mode(lock) != LOCK_IX);
+ ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+ }
+#endif /* UNIV_DEBUG */
+
+ lock_rec_dequeue_from_page(lock);
} else {
+ dict_table_t* table;
+
+ table = lock->un_member.tab_lock.table;
+#ifdef UNIV_DEBUG
ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+ /* Check if the transcation locked a system table
+ in IX mode. It should have set the dict_op code
+ correctly if it did. */
+ if (table->id < DICT_HDR_FIRST_ID
+ && (lock_get_mode(lock) == LOCK_X
+ || lock_get_mode(lock) == LOCK_IX)) {
+
+ ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+ }
+#endif /* UNIV_DEBUG */
+
if (lock_get_mode(lock) != LOCK_IS
&& trx->undo_no != 0) {
@@ -4582,8 +4671,7 @@ lock_release(
block the use of the MySQL query cache for
all currently active transactions. */
- lock->un_member.tab_lock.table
- ->query_cache_inv_trx_id = max_trx_id;
+ table->query_cache_inv_trx_id = max_trx_id;
}
lock_table_dequeue(lock);
@@ -5059,7 +5147,9 @@ lock_print_info_summary(
"LATEST DETECTED DEADLOCK\n"
"------------------------\n", file);
- ut_copy_file(file, lock_latest_err_file);
+ if (!srv_read_only_mode) {
+ ut_copy_file(file, lock_latest_err_file);
+ }
}
fputs("------------\n"
@@ -5085,6 +5175,10 @@ lock_print_info_summary(
/* Should never be in this state while the system is running. */
ut_error;
+ case PURGE_STATE_DISABLED:
+ fprintf(file, "disabled");
+ break;
+
case PURGE_STATE_RUN:
fprintf(file, "running");
/* Check if it is waiting for more data to arrive. */
@@ -5418,6 +5512,8 @@ lock_rec_queue_validate(
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
ut_ad(lock_mutex_own() == locked_lock_trx_sys);
+ ut_ad(!index || dict_index_is_clust(index)
+ || !dict_index_is_online_ddl(index));
heap_no = page_rec_get_heap_no(rec);
@@ -5694,20 +5790,26 @@ lock_rec_block_validate(
If the lock exists in lock_rec_validate_page() we assert
!block->page.file_page_was_freed. */
+ buf_block_t* block;
mtr_t mtr;
- mtr_start(&mtr);
+ /* Make sure that the tablespace is not deleted while we are
+ trying to access the page. */
+ if (!fil_inc_pending_ops(space)) {
+ mtr_start(&mtr);
+ block = buf_page_get_gen(
+ space, fil_space_get_zip_size(space),
+ page_no, RW_X_LATCH, NULL,
+ BUF_GET_POSSIBLY_FREED,
+ __FILE__, __LINE__, &mtr);
- buf_block_t* block = buf_page_get_gen(
- space, fil_space_get_zip_size(space),
- page_no, RW_X_LATCH, NULL,
- BUF_GET_POSSIBLY_FREED,
- __FILE__, __LINE__, &mtr);
+ buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+ ut_ad(lock_rec_validate_page(block));
+ mtr_commit(&mtr);
- ut_ad(lock_rec_validate_page(block));
- mtr_commit(&mtr);
+ fil_decr_pending_ops(space);
+ }
}
/*********************************************************************//**
@@ -5765,7 +5867,7 @@ the query thread to the lock wait state and inserts a waiting request
for a gap x-lock to the lock queue.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_rec_insert_check_and_lock(
/*===========================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
@@ -5783,10 +5885,13 @@ lock_rec_insert_check_and_lock(
const rec_t* next_rec;
trx_t* trx;
lock_t* lock;
- ulint err;
+ dberr_t err;
ulint next_rec_heap_no;
ut_ad(block->frame == page_align(rec));
+ ut_ad(!dict_index_is_online_ddl(index)
+ || dict_index_is_clust(index)
+ || (flags & BTR_CREATE_FLAG));
if (flags & BTR_NO_LOCKING_FLAG) {
@@ -5803,11 +5908,9 @@ lock_rec_insert_check_and_lock(
to hold trx->mutex here. */
/* When inserting a record into an index, the table must be at
- least IX-locked or we must be building an index, in which case
- the table must be at least S-locked. */
- ut_ad(lock_table_has(trx, index->table, LOCK_IX)
- || (*index->name == TEMP_INDEX_PREFIX
- && lock_table_has(trx, index->table, LOCK_S)));
+ least IX-locked. When we are building an index, we would pass
+ BTR_NO_LOCKING_FLAG and skip the locking altogether. */
+ ut_ad(lock_table_has(trx, index->table, LOCK_IX));
lock = lock_rec_get_first(block, next_rec_heap_no);
@@ -5850,7 +5953,7 @@ lock_rec_insert_check_and_lock(
err = lock_rec_enqueue_waiting(
LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
- block, next_rec_heap_no, index, thr);
+ block, next_rec_heap_no, NULL, index, thr);
trx_mutex_exit(trx);
} else {
@@ -5871,6 +5974,9 @@ lock_rec_insert_check_and_lock(
page_update_max_trx_id(block,
buf_block_get_page_zip(block),
trx->id, mtr);
+ default:
+ /* We only care about the two return values. */
+ break;
}
#ifdef UNIV_DEBUG
@@ -5920,6 +6026,7 @@ lock_rec_convert_impl_to_expl(
this transaction. The transaction may have been
committed a long time ago. */
} else {
+ ut_ad(!dict_index_is_online_ddl(index));
trx_id = lock_sec_rec_some_has_impl(rec, index, offsets);
/* The transaction can be committed before the
trx_is_active(trx_id, NULL) check below, because we are not
@@ -5943,10 +6050,26 @@ lock_rec_convert_impl_to_expl(
if (impl_trx != NULL
&& !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
heap_no, impl_trx)) {
+ ulint type_mode = (LOCK_REC | LOCK_X
+ | LOCK_REC_NOT_GAP);
+
+ /* If the delete-marked record was locked already,
+ we should reserve lock waiting for impl_trx as
+ implicit lock. Because cannot lock at this moment.*/
+
+ if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))
+ && lock_rec_other_has_conflicting(
+ static_cast<enum lock_mode>
+ (LOCK_X | LOCK_REC_NOT_GAP), block,
+ heap_no, impl_trx)) {
+
+ type_mode |= (LOCK_WAIT
+ | LOCK_CONV_BY_OTHER);
+ }
lock_rec_add_to_queue(
- LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, index, impl_trx, FALSE);
+ type_mode, block, heap_no, index,
+ impl_trx, FALSE);
}
lock_mutex_exit();
@@ -5962,7 +6085,7 @@ lock wait state and inserts a waiting request for a record x-lock to the
lock queue.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_clust_rec_modify_check_and_lock(
/*=================================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -5974,7 +6097,7 @@ lock_clust_rec_modify_check_and_lock(
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
que_thr_t* thr) /*!< in: query thread */
{
- ulint err;
+ dberr_t err;
ulint heap_no;
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -6020,7 +6143,7 @@ Checks if locks of other transactions prevent an immediate modify (delete
mark or delete unmark) of a secondary index record.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_sec_rec_modify_check_and_lock(
/*===============================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -6032,13 +6155,15 @@ lock_sec_rec_modify_check_and_lock(
clustered index record first: see the
comment below */
dict_index_t* index, /*!< in: secondary index */
- que_thr_t* thr, /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread
+ (can be NULL if BTR_NO_LOCKING_FLAG) */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
- ulint err;
+ dberr_t err;
ulint heap_no;
ut_ad(!dict_index_is_clust(index));
+ ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
ut_ad(block->frame == page_align(rec));
if (flags & BTR_NO_LOCKING_FLAG) {
@@ -6103,7 +6228,7 @@ secondary index record.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-enum db_err
+dberr_t
lock_sec_rec_read_check_and_lock(
/*=============================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -6124,10 +6249,11 @@ lock_sec_rec_read_check_and_lock(
LOCK_REC_NOT_GAP */
que_thr_t* thr) /*!< in: query thread */
{
- enum db_err err;
- ulint heap_no;
+ dberr_t err;
+ ulint heap_no;
ut_ad(!dict_index_is_clust(index));
+ ut_ad(!dict_index_is_online_ddl(index));
ut_ad(block->frame == page_align(rec));
ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -6180,7 +6306,7 @@ lock on the record.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-enum db_err
+dberr_t
lock_clust_rec_read_check_and_lock(
/*===============================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -6201,8 +6327,8 @@ lock_clust_rec_read_check_and_lock(
LOCK_REC_NOT_GAP */
que_thr_t* thr) /*!< in: query thread */
{
- enum db_err err;
- ulint heap_no;
+ dberr_t err;
+ ulint heap_no;
ut_ad(dict_index_is_clust(index));
ut_ad(block->frame == page_align(rec));
@@ -6230,7 +6356,8 @@ lock_clust_rec_read_check_and_lock(
ut_ad(mode != LOCK_S
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- err = lock_rec_lock(FALSE, mode | gap_mode, block, heap_no, index, thr);
+ err = lock_rec_lock(FALSE, mode | gap_mode,
+ block, heap_no, index, thr);
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
@@ -6251,7 +6378,7 @@ lock_clust_rec_read_check_and_lock() that does not require the parameter
"offsets".
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
UNIV_INTERN
-ulint
+dberr_t
lock_clust_rec_read_check_and_lock_alt(
/*===================================*/
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
@@ -6274,7 +6401,7 @@ lock_clust_rec_read_check_and_lock_alt(
mem_heap_t* tmp_heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
- ulint err;
+ dberr_t err;
rec_offs_init(offsets_);
offsets = rec_get_offsets(rec, index, offsets,
@@ -6469,6 +6596,8 @@ lock_get_table(
{
switch (lock_get_type_low(lock)) {
case LOCK_REC:
+ ut_ad(dict_index_is_clust(lock->index)
+ || !dict_index_is_online_ddl(lock->index));
return(lock->index->table);
case LOCK_TABLE:
return(lock->un_member.tab_lock.table);
@@ -6521,6 +6650,8 @@ lock_rec_get_index(
const lock_t* lock) /*!< in: lock */
{
ut_a(lock_get_type_low(lock) == LOCK_REC);
+ ut_ad(dict_index_is_clust(lock->index)
+ || !dict_index_is_online_ddl(lock->index));
return(lock->index);
}
@@ -6536,6 +6667,8 @@ lock_rec_get_index_name(
const lock_t* lock) /*!< in: lock */
{
ut_a(lock_get_type_low(lock) == LOCK_REC);
+ ut_ad(dict_index_is_clust(lock->index)
+ || !dict_index_is_online_ddl(lock->index));
return(lock->index->name);
}
@@ -6581,6 +6714,7 @@ lock_cancel_waiting_and_release(
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(lock->trx));
+ ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
lock->trx->lock.cancel = TRUE;
@@ -6656,10 +6790,14 @@ lock_trx_release_locks(
{
assert_trx_in_list(trx);
- if (UNIV_UNLIKELY(trx_state_eq(trx, TRX_STATE_PREPARED))) {
+ if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
mutex_enter(&trx_sys->mutex);
ut_a(trx_sys->n_prepared_trx > 0);
trx_sys->n_prepared_trx--;
+ if (trx->is_recovered) {
+ ut_a(trx_sys->n_prepared_recovered_trx > 0);
+ trx_sys->n_prepared_recovered_trx--;
+ }
mutex_exit(&trx_sys->mutex);
} else {
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
@@ -6714,12 +6852,12 @@ was selected as a deadlock victim, or if it has to wait then cancel
the wait lock.
@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
UNIV_INTERN
-enum db_err
+dberr_t
lock_trx_handle_wait(
/*=================*/
trx_t* trx) /*!< in/out: trx lock state */
{
- enum db_err err;
+ dberr_t err;
lock_mutex_enter();
@@ -6800,6 +6938,8 @@ lock_table_locks_lookup(
ut_a(lock->trx == trx);
if (lock_get_type_low(lock) == LOCK_REC) {
+ ut_ad(!dict_index_is_online_ddl(lock->index)
+ || dict_index_is_clust(lock->index));
if (lock->index->table == table) {
return(lock);
}
@@ -6828,18 +6968,89 @@ lock_table_has_locks(
lock_mutex_enter();
+ has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
+
#ifdef UNIV_DEBUG
- mutex_enter(&trx_sys->mutex);
+ if (!has_locks) {
+ mutex_enter(&trx_sys->mutex);
- ut_ad(lock_table_locks_lookup(table, &trx_sys->rw_trx_list) == NULL);
- ut_ad(lock_table_locks_lookup(table, &trx_sys->ro_trx_list) == NULL);
+ ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
+ ut_ad(!lock_table_locks_lookup(table, &trx_sys->ro_trx_list));
- mutex_exit(&trx_sys->mutex);
+ mutex_exit(&trx_sys->mutex);
+ }
#endif /* UNIV_DEBUG */
- has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
-
lock_mutex_exit();
return(has_locks);
}
+
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Check if the transaction holds any locks on the sys tables
+or its records.
+@return the strongest lock found on any sys table or 0 for none */
+UNIV_INTERN
+const lock_t*
+lock_trx_has_sys_table_locks(
+/*=========================*/
+ const trx_t* trx) /*!< in: transaction to check */
+{
+ lint i;
+ const lock_t* strongest_lock = 0;
+ lock_mode strongest = LOCK_NONE;
+
+ lock_mutex_enter();
+
+ /* Find a valid mode. Note: ib_vector_size() can be 0. */
+ for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
+ const lock_t* lock;
+
+ lock = *static_cast<const lock_t**>(
+ ib_vector_get(trx->lock.table_locks, i));
+
+ if (lock != NULL
+ && dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
+
+ strongest = lock_get_mode(lock);
+ ut_ad(strongest != LOCK_NONE);
+ strongest_lock = lock;
+ break;
+ }
+ }
+
+ if (strongest == LOCK_NONE) {
+ lock_mutex_exit();
+ return(NULL);
+ }
+
+ for (/* No op */; i >= 0; --i) {
+ const lock_t* lock;
+
+ lock = *static_cast<const lock_t**>(
+ ib_vector_get(trx->lock.table_locks, i));
+
+ if (lock == NULL) {
+ continue;
+ }
+
+ ut_ad(trx == lock->trx);
+ ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
+ ut_ad(lock->un_member.tab_lock.table != NULL);
+
+ lock_mode mode = lock_get_mode(lock);
+
+ if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
+ && lock_mode_stronger_or_eq(mode, strongest)) {
+
+ strongest = mode;
+ strongest_lock = lock;
+ }
+ }
+
+ lock_mutex_exit();
+
+ return(strongest_lock);
+}
+#endif /* UNIV_DEBUG */
diff --git a/storage/innobase/lock/lock0wait.cc b/storage/innobase/lock/lock0wait.cc
index 99059f19813..fc355d8bb6d 100644
--- a/storage/innobase/lock/lock0wait.cc
+++ b/storage/innobase/lock/lock0wait.cc
@@ -33,14 +33,6 @@ Created 25/5/2010 Sunny Bains
#include "ha_prototypes.h"
#include "lock0priv.h"
-UNIV_INTERN ibool srv_lock_timeout_active = FALSE;
-UNIV_INTERN ulint srv_n_lock_wait_count = 0;
-UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
-UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
-UNIV_INTERN ulint srv_n_lock_max_wait_time = 0;
-
-UNIV_INTERN os_event_t srv_timeout_event;
-
/*********************************************************************//**
Print the contents of the lock_sys_t::waiting_threads array. */
static
@@ -156,7 +148,7 @@ lock_wait_table_reserve_slot(
slot->thr->slot = slot;
if (slot->event == NULL) {
- slot->event = os_event_create(NULL);
+ slot->event = os_event_create();
ut_a(slot->event);
}
@@ -257,8 +249,8 @@ lock_wait_suspend_thread(
slot = lock_wait_table_reserve_slot(thr, lock_wait_timeout);
if (thr->lock_state == QUE_THR_LOCK_ROW) {
- srv_n_lock_wait_count++;
- srv_n_lock_wait_current_count++;
+ srv_stats.n_lock_wait_count.inc();
+ srv_stats.n_lock_wait_current_count.inc();
if (ut_usectime(&sec, &ms) == -1) {
start_time = -1;
@@ -269,7 +261,7 @@ lock_wait_suspend_thread(
/* Wake the lock timeout monitor thread, if it is suspended */
- os_event_set(srv_timeout_event);
+ os_event_set(lock_sys->timeout_event);
lock_wait_mutex_exit();
trx_mutex_exit(trx);
@@ -282,6 +274,8 @@ lock_wait_suspend_thread(
case RW_S_LATCH:
/* Release foreign key check latch */
row_mysql_unfreeze_data_dictionary(trx);
+
+ DEBUG_SYNC_C("lock_wait_release_s_latch_before_sleep");
break;
default:
/* There should never be a lock wait when the
@@ -341,14 +335,16 @@ lock_wait_suspend_thread(
diff_time = (ulint) (finish_time - start_time);
- srv_n_lock_wait_current_count--;
- srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
+ srv_stats.n_lock_wait_current_count.dec();
+ srv_stats.n_lock_wait_time.add(diff_time);
- if (diff_time > srv_n_lock_max_wait_time &&
- /* only update the variable if we successfully
- retrieved the start and finish times. See Bug#36819. */
- start_time != -1 && finish_time != -1) {
- srv_n_lock_max_wait_time = diff_time;
+ /* Only update the variable if we successfully
+ retrieved the start and finish times. See Bug#36819. */
+ if (diff_time > lock_sys->n_lock_max_wait_time
+ && start_time != -1
+ && finish_time != -1) {
+
+ lock_sys->n_lock_max_wait_time = diff_time;
}
}
@@ -463,11 +459,15 @@ DECLARE_THREAD(lock_wait_timeout_thread)(
os_thread_create */
{
ib_int64_t sig_count = 0;
+ os_event_t event = lock_sys->timeout_event;
+
+ ut_ad(!srv_read_only_mode);
#ifdef UNIV_PFS_THREAD
pfs_register_thread(srv_lock_timeout_thread_key);
-#endif
- srv_lock_timeout_active = TRUE;
+#endif /* UNIV_PFS_THREAD */
+
+ lock_sys->timeout_thread_active = true;
do {
srv_slot_t* slot;
@@ -475,7 +475,8 @@ DECLARE_THREAD(lock_wait_timeout_thread)(
/* When someone is waiting for a lock, we wake up every second
and check if a timeout has passed for a lock wait */
- os_event_wait_time_low(srv_timeout_event, 1000000, sig_count);
+ os_event_wait_time_low(event, 1000000, sig_count);
+ sig_count = os_event_reset(event);
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
break;
@@ -500,13 +501,13 @@ DECLARE_THREAD(lock_wait_timeout_thread)(
}
}
- sig_count = os_event_reset(srv_timeout_event);
+ sig_count = os_event_reset(event);
lock_wait_mutex_exit();
} while (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP);
- srv_lock_timeout_active = FALSE;
+ lock_sys->timeout_thread_active = false;
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index 5e4a9dcf515..b6909f4771a 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -37,7 +37,6 @@ Created 12/9/1995 Heikki Tuuri
#endif
#ifndef UNIV_HOTBACKUP
-#include "ha_prototypes.h"
#include "mem0mem.h"
#include "buf0buf.h"
#include "buf0flu.h"
@@ -49,6 +48,7 @@ Created 12/9/1995 Heikki Tuuri
#include "srv0start.h"
#include "trx0sys.h"
#include "trx0trx.h"
+#include "ha_prototypes.h"
#include "srv0mon.h"
/*
@@ -223,7 +223,7 @@ loop:
log_buffer_flush_to_disk();
- srv_log_waits++;
+ srv_stats.log_waits.inc();
ut_ad(++count < 50);
@@ -328,7 +328,7 @@ part_loop:
goto part_loop;
}
- srv_log_write_requests++;
+ srv_stats.log_write_requests.inc();
}
/************************************************************//**
@@ -748,9 +748,6 @@ log_init(void)
log_sys->lsn = LOG_START_LSN;
- MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- log_sys->lsn - log_sys->last_checkpoint_lsn);
-
ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
@@ -784,11 +781,11 @@ log_init(void)
log_sys->n_pending_writes = 0;
- log_sys->no_flush_event = os_event_create(NULL);
+ log_sys->no_flush_event = os_event_create();
os_event_set(log_sys->no_flush_event);
- log_sys->one_flushed_event = os_event_create(NULL);
+ log_sys->one_flushed_event = os_event_create();
os_event_set(log_sys->one_flushed_event);
@@ -796,7 +793,6 @@ log_init(void)
log_sys->next_checkpoint_no = 0;
log_sys->last_checkpoint_lsn = log_sys->lsn;
- MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, 0);
log_sys->n_pending_checkpoint_writes = 0;
@@ -832,7 +828,7 @@ log_init(void)
/* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
- log_sys->archiving_on = os_event_create(NULL);
+ log_sys->archiving_on = os_event_create();
#endif /* UNIV_LOG_ARCHIVE */
/*----------------------------*/
@@ -1163,7 +1159,7 @@ log_group_file_header_flush(
MONITOR_INC(MONITOR_LOG_IO);
- srv_os_log_pending_writes++;
+ srv_stats.os_log_pending_writes.inc();
fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
(ulint) (dest_offset / UNIV_PAGE_SIZE),
@@ -1171,7 +1167,7 @@ log_group_file_header_flush(
OS_FILE_LOG_BLOCK_SIZE,
buf, group);
- srv_os_log_pending_writes--;
+ srv_stats.os_log_pending_writes.dec();
}
}
@@ -1238,8 +1234,9 @@ loop:
log_group_file_header_flush(group, (ulint)
(next_offset / group->file_size),
start_lsn);
- srv_os_log_written += OS_FILE_LOG_BLOCK_SIZE;
- srv_log_writes++;
+ srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE);
+
+ srv_stats.log_writes.inc();
}
if ((next_offset % group->file_size) + len > group->file_size) {
@@ -1289,7 +1286,7 @@ loop:
MONITOR_INC(MONITOR_LOG_IO);
- srv_os_log_pending_writes++;
+ srv_stats.os_log_pending_writes.inc();
ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
@@ -1298,10 +1295,10 @@ loop:
(ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
group);
- srv_os_log_pending_writes--;
+ srv_stats.os_log_pending_writes.dec();
- srv_os_log_written += write_len;
- srv_log_writes++;
+ srv_stats.os_log_written.add(write_len);
+ srv_stats.log_writes.inc();
}
if (write_len < len) {
@@ -1345,6 +1342,8 @@ log_write_up_to(
ib_uint64_t write_lsn;
ib_uint64_t flush_lsn;
+ ut_ad(!srv_read_only_mode);
+
if (recv_no_ibuf_operations) {
/* Recovery is running and no operations on the log files are
allowed yet (the variable name .._no_ibuf_.. is misleading) */
@@ -1560,6 +1559,7 @@ log_buffer_flush_to_disk(void)
{
lsn_t lsn;
+ ut_ad(!srv_read_only_mode);
mutex_enter(&(log_sys->mutex));
lsn = log_sys->lsn;
@@ -1626,15 +1626,16 @@ log_flush_margin(void)
Advances the smallest lsn for which there are unflushed dirty blocks in the
buffer pool. NOTE: this function may only be called if the calling thread owns
no synchronization objects!
-@return FALSE if there was a flush batch of the same type running,
+@return false if there was a flush batch of the same type running,
which means that we could not start this flush batch */
static
-ibool
+bool
log_preflush_pool_modified_pages(
/*=============================*/
lsn_t new_oldest) /*!< in: try to advance oldest_modified_lsn
at least to this lsn */
{
+ bool success;
ulint n_pages;
if (recv_recovery_on) {
@@ -1650,13 +1651,12 @@ log_preflush_pool_modified_pages(
recv_apply_hashed_log_recs(TRUE);
}
- n_pages = buf_flush_list(ULINT_MAX, new_oldest);
+ success = buf_flush_list(ULINT_MAX, new_oldest, &n_pages);
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
- if (n_pages == ULINT_UNDEFINED) {
-
- return(FALSE);
+ if (!success) {
+ MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
}
MONITOR_INC_VALUE_CUMULATIVE(
@@ -1665,7 +1665,7 @@ log_preflush_pool_modified_pages(
MONITOR_FLUSH_SYNC_PAGES,
n_pages);
- return(TRUE);
+ return(success);
}
/******************************************************//**
@@ -1765,6 +1765,7 @@ log_group_checkpoint(
byte* buf;
ulint i;
+ ut_ad(!srv_read_only_mode);
ut_ad(mutex_own(&(log_sys->mutex)));
#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
@@ -1952,12 +1953,13 @@ log_groups_write_checkpoint_info(void)
ut_ad(mutex_own(&(log_sys->mutex)));
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- log_group_checkpoint(group);
+ if (!srv_read_only_mode) {
+ for (group = UT_LIST_GET_FIRST(log_sys->log_groups);
+ group;
+ group = UT_LIST_GET_NEXT(log_groups, group)) {
- group = UT_LIST_GET_NEXT(log_groups, group);
+ log_group_checkpoint(group);
+ }
}
}
@@ -1982,6 +1984,8 @@ log_checkpoint(
{
lsn_t oldest_lsn;
+ ut_ad(!srv_read_only_mode);
+
if (recv_recovery_is_on()) {
recv_apply_hashed_log_recs(TRUE);
}
@@ -2088,38 +2092,6 @@ log_make_checkpoint_at(
}
/****************************************************************//**
-Checks if an asynchronous flushing of dirty pages is required in the
-background. This function is only called from the page cleaner thread.
-@return lsn to which the flushing should happen or LSN_MAX
-if flushing is not required */
-UNIV_INTERN
-lsn_t
-log_async_flush_lsn(void)
-/*=====================*/
-{
- lsn_t age;
- lsn_t oldest_lsn;
- lsn_t new_lsn = LSN_MAX;
-
- mutex_enter(&log_sys->mutex);
-
- oldest_lsn = log_buf_pool_get_oldest_modification();
-
- ut_a(log_sys->lsn >= oldest_lsn);
- age = log_sys->lsn - oldest_lsn;
-
- if (age > log_sys->max_modified_age_async) {
- /* An asynchronous preflush is required */
- ut_a(log_sys->lsn >= log_sys->max_modified_age_async);
- new_lsn = log_sys->lsn - log_sys->max_modified_age_async;
- }
-
- mutex_exit(&log_sys->mutex);
-
- return(new_lsn);
-}
-
-/****************************************************************//**
Tries to establish a big enough margin of free space in the log groups, such
that a new log entry can be catenated without an immediate need for a
checkpoint. NOTE: this function may only be called if the calling thread
@@ -2136,7 +2108,7 @@ log_checkpoint_margin(void)
lsn_t oldest_lsn;
ibool checkpoint_sync;
ibool do_checkpoint;
- ibool success;
+ bool success;
loop:
checkpoint_sync = FALSE;
do_checkpoint = FALSE;
@@ -3131,10 +3103,8 @@ logs_empty_and_mark_files_at_shutdown(void)
const char* thread_name;
ibool server_busy;
- if (srv_print_verbose_log) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Starting shutdown...\n");
- }
+ ib_logf(IB_LOG_LEVEL_INFO, "Starting shutdown...");
+
/* Wait until the master thread and all other operations are idle: our
algorithm only works if the server is idle at shutdown */
@@ -3155,9 +3125,8 @@ loop:
threads check will be done later. */
if (srv_print_verbose_log && count > 600) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for %s to exit\n",
- thread_name);
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Waiting for %s to exit", thread_name);
count = 0;
}
@@ -3174,9 +3143,8 @@ loop:
if (total_trx > 0) {
if (srv_print_verbose_log && count > 600) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for %lu "
- "active transactions to finish\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Waiting for %lu active transactions to finish",
(ulong) total_trx);
count = 0;
@@ -3221,9 +3189,9 @@ loop:
break;
}
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for %s "
- "to be suspended\n", thread_type);
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Waiting for %s to be suspended",
+ thread_type);
count = 0;
}
@@ -3239,10 +3207,9 @@ loop:
++count;
os_thread_sleep(100000);
if (srv_print_verbose_log && count > 600) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Waiting for page_cleaner to "
- "finish flushing of buffer pool\n");
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Waiting for page_cleaner to "
+ "finish flushing of buffer pool");
count = 0;
}
}
@@ -3257,10 +3224,9 @@ loop:
if (server_busy) {
if (srv_print_verbose_log && count > 600) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Pending checkpoint_writes: %lu\n"
- " InnoDB: Pending log flush writes: %lu\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Pending checkpoint_writes: %lu. "
+ "Pending log flush writes: %lu",
(ulong) log_sys->n_pending_checkpoint_writes,
(ulong) log_sys->n_pending_writes);
count = 0;
@@ -3272,9 +3238,8 @@ loop:
if (pending_io) {
if (srv_print_verbose_log && count > 600) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for %lu buffer page "
- "I/Os to complete\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Waiting for %lu buffer page I/Os to complete",
(ulong) pending_io);
count = 0;
}
@@ -3286,41 +3251,50 @@ loop:
log_archive_all();
#endif /* UNIV_LOG_ARCHIVE */
if (srv_fast_shutdown == 2) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: MySQL has requested a very fast shutdown"
- " without flushing "
- "the InnoDB buffer pool to data files."
- " At the next mysqld startup "
- "InnoDB will do a crash recovery!\n");
-
- /* In this fastest shutdown we do not flush the buffer pool:
- it is essentially a 'crash' of the InnoDB server. Make sure
- that the log is all flushed to disk, so that we can recover
- all committed transactions in a crash recovery. We must not
- write the lsn stamps to the data files, since at a startup
- InnoDB deduces from the stamps if the previous shutdown was
- clean. */
-
- log_buffer_flush_to_disk();
-
- /* Check that the background threads stay suspended */
- thread_name = srv_any_background_threads_are_active();
- if (thread_name != NULL) {
- fprintf(stderr,
- "InnoDB: Warning: background thread %s"
- " woke up during shutdown\n", thread_name);
- goto loop;
+ if (!srv_read_only_mode) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "MySQL has requested a very fast shutdown "
+ "without flushing the InnoDB buffer pool to "
+ "data files. At the next mysqld startup "
+ "InnoDB will do a crash recovery!");
+
+ /* In this fastest shutdown we do not flush the
+ buffer pool:
+
+ it is essentially a 'crash' of the InnoDB server.
+ Make sure that the log is all flushed to disk, so
+ that we can recover all committed transactions in
+ a crash recovery. We must not write the lsn stamps
+ to the data files, since at a startup InnoDB deduces
+ from the stamps if the previous shutdown was clean. */
+
+ log_buffer_flush_to_disk();
+
+ /* Check that the background threads stay suspended */
+ thread_name = srv_any_background_threads_are_active();
+
+ if (thread_name != NULL) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Background thread %s woke up "
+ "during shutdown", thread_name);
+ goto loop;
+ }
}
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
+
fil_close_all_files();
+
thread_name = srv_any_background_threads_are_active();
+
ut_a(!thread_name);
+
return;
}
- log_make_checkpoint_at(LSN_MAX, TRUE);
+ if (!srv_read_only_mode) {
+ log_make_checkpoint_at(LSN_MAX, TRUE);
+ }
mutex_enter(&log_sys->mutex);
@@ -3356,15 +3330,17 @@ loop:
/* Check that the background threads stay suspended */
thread_name = srv_any_background_threads_are_active();
if (thread_name != NULL) {
- fprintf(stderr,
- "InnoDB: Warning: background thread %s"
- " woke up during shutdown\n", thread_name);
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Background thread %s woke up during shutdown",
+ thread_name);
goto loop;
}
- fil_flush_file_spaces(FIL_TABLESPACE);
- fil_flush_file_spaces(FIL_LOG);
+ if (!srv_read_only_mode) {
+ fil_flush_file_spaces(FIL_TABLESPACE);
+ fil_flush_file_spaces(FIL_LOG);
+ }
/* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
pool: therefore it is essential that the buffer pool has been
@@ -3374,9 +3350,8 @@ loop:
if (!buf_all_freed()) {
if (srv_print_verbose_log && count > 600) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for dirty buffer "
- "pages to be flushed\n");
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Waiting for dirty buffer pages to be flushed");
count = 0;
}
@@ -3386,31 +3361,38 @@ loop:
srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
/* Make some checks that the server really is quiet */
- ut_a(srv_get_active_thread_type() == SRV_NONE);
+ srv_thread_type type = srv_get_active_thread_type();
+ ut_a(type == SRV_NONE);
+
+ bool freed = buf_all_freed();
+ ut_a(freed);
- ut_a(buf_all_freed());
ut_a(lsn == log_sys->lsn);
if (lsn < srv_start_lsn) {
- fprintf(stderr,
- "InnoDB: Error: log sequence number"
- " at shutdown " LSN_PF "\n"
- "InnoDB: is lower than at startup " LSN_PF "!\n",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Log sequence number at shutdown " LSN_PF " "
+ "is lower than at startup " LSN_PF "!",
lsn, srv_start_lsn);
}
srv_shutdown_lsn = lsn;
- fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
+ if (!srv_read_only_mode) {
+ fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
- fil_flush_file_spaces(FIL_TABLESPACE);
+ fil_flush_file_spaces(FIL_TABLESPACE);
+ }
fil_close_all_files();
/* Make some checks that the server really is quiet */
- ut_a(srv_get_active_thread_type() == SRV_NONE);
+ type = srv_get_active_thread_type();
+ ut_a(type == SRV_NONE);
+
+ freed = buf_all_freed();
+ ut_a(freed);
- ut_a(buf_all_freed());
ut_a(lsn == log_sys->lsn);
}
@@ -3544,7 +3526,7 @@ log_refresh_stats(void)
log_sys->last_printout_time = time(NULL);
}
-/**********************************************************************
+/********************************************************//**
Closes a log group. */
static
void
@@ -3574,12 +3556,12 @@ log_group_close(
mem_free(group);
}
-/**********************************************************
-Shutdown the log system but do not release all the memory. */
+/********************************************************//**
+Closes all log groups. */
UNIV_INTERN
void
-log_shutdown(void)
-/*==============*/
+log_group_close_all(void)
+/*=====================*/
{
log_group_t* group;
@@ -3593,6 +3575,16 @@ log_shutdown(void)
log_group_close(prev_group);
}
+}
+
+/********************************************************//**
+Shutdown the log system but do not release all the memory. */
+UNIV_INTERN
+void
+log_shutdown(void)
+/*==============*/
+{
+ log_group_close_all();
mem_free(log_sys->buf_ptr);
log_sys->buf_ptr = NULL;
@@ -3610,7 +3602,7 @@ log_shutdown(void)
#ifdef UNIV_LOG_ARCHIVE
rw_lock_free(&log_sys->archive_lock);
- os_event_create(log_sys->archiving_on);
+ os_event_create();
#endif /* UNIV_LOG_ARCHIVE */
#ifdef UNIV_LOG_DEBUG
@@ -3620,7 +3612,7 @@ log_shutdown(void)
recv_sys_close();
}
-/**********************************************************
+/********************************************************//**
Free the log system data structures. */
UNIV_INTERN
void
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index f914fc2676c..8cefa9e4b70 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,8 +43,6 @@ Created 9/20/1997 Heikki Tuuri
#include "trx0undo.h"
#include "trx0rec.h"
#include "fil0fil.h"
-#include "buf0dblwr.h"
-#include "srv0mon.h"
#ifndef UNIV_HOTBACKUP
# include "buf0rea.h"
# include "srv0srv.h"
@@ -158,6 +157,20 @@ UNIV_INTERN mysql_pfs_key_t trx_rollback_clean_thread_key;
UNIV_INTERN mysql_pfs_key_t recv_sys_mutex_key;
#endif /* UNIV_PFS_MUTEX */
+#ifndef UNIV_HOTBACKUP
+# ifdef UNIV_PFS_THREAD
+UNIV_INTERN mysql_pfs_key_t recv_writer_thread_key;
+# endif /* UNIV_PFS_THREAD */
+
+# ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t recv_writer_mutex_key;
+# endif /* UNIV_PFS_MUTEX */
+
+/** Flag indicating if recv_writer thread is active. */
+UNIV_INTERN bool recv_writer_thread_active = false;
+UNIV_INTERN os_thread_t recv_writer_thread_handle = 0;
+#endif /* !UNIV_HOTBACKUP */
+
/* prototypes */
#ifndef UNIV_HOTBACKUP
@@ -186,6 +199,11 @@ recv_sys_create(void)
mutex_create(recv_sys_mutex_key, &recv_sys->mutex, SYNC_RECV);
+#ifndef UNIV_HOTBACKUP
+ mutex_create(recv_writer_mutex_key, &recv_sys->writer_mutex,
+ SYNC_LEVEL_VARYING);
+#endif /* !UNIV_HOTBACKUP */
+
recv_sys->heap = NULL;
recv_sys->addr_hash = NULL;
}
@@ -214,6 +232,11 @@ recv_sys_close(void)
mem_free(recv_sys->last_block_buf_start);
}
+#ifndef UNIV_HOTBACKUP
+ ut_ad(!recv_writer_thread_active);
+ mutex_free(&recv_sys->writer_mutex);
+#endif /* !UNIV_HOTBACKUP */
+
mutex_free(&recv_sys->mutex);
mem_free(recv_sys);
@@ -290,6 +313,58 @@ recv_sys_var_init(void)
recv_max_page_lsn = 0;
}
+
+/******************************************************************//**
+recv_writer thread tasked with flushing dirty pages from the buffer
+pools.
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(recv_writer_thread)(
+/*===============================*/
+ void* arg __attribute__((unused)))
+ /*!< in: a dummy parameter required by
+ os_thread_create */
+{
+ ut_ad(!srv_read_only_mode);
+
+#ifdef UNIV_PFS_THREAD
+ pfs_register_thread(recv_writer_thread_key);
+#endif /* UNIV_PFS_THREAD */
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+ fprintf(stderr, "InnoDB: recv_writer thread running, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+#endif /* UNIV_DEBUG_THREAD_CREATION */
+
+ recv_writer_thread_active = true;
+
+ while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+
+ os_thread_sleep(100000);
+
+ mutex_enter(&recv_sys->writer_mutex);
+
+ if (!recv_recovery_on) {
+ mutex_exit(&recv_sys->writer_mutex);
+ break;
+ }
+
+ /* Flush pages from end of LRU if required */
+ buf_flush_LRU_tail();
+
+ mutex_exit(&recv_sys->writer_mutex);
+ }
+
+ recv_writer_thread_active = false;
+
+ /* We count the number of threads in os_thread_exit().
+ A created thread should always use that to exit and not
+ use return() to exit. */
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
#endif /* !UNIV_HOTBACKUP */
/************************************************************
@@ -310,9 +385,7 @@ recv_sys_init(
flush_list during recovery process.
As this initialization is done while holding the buffer pool
mutex we perform it before acquiring recv_sys->mutex. */
-#ifndef UNIV_HOTBACKUP
buf_flush_init_flush_rbt();
-#endif /* !UNIV_HOTBACKUP */
mutex_enter(&(recv_sys->mutex));
@@ -406,6 +479,7 @@ recv_sys_debug_free(void)
}
# endif /* UNIV_LOG_DEBUG */
+# ifdef UNIV_LOG_ARCHIVE
/********************************************************//**
Truncates possible corrupted or extra records from a log group. */
static
@@ -427,7 +501,6 @@ recv_truncate_group(
lsn_t finish_lsn1;
lsn_t finish_lsn2;
lsn_t finish_lsn;
- ulint i;
if (archived_lsn == LSN_MAX) {
/* Checkpoint was taken in the NOARCHIVELOG mode */
@@ -455,11 +528,7 @@ recv_truncate_group(
ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
- /* Write the log buffer full of zeros */
- for (i = 0; i < RECV_SCAN_SIZE; i++) {
-
- *(log_sys->buf + i) = '\0';
- }
+ memset(log_sys->buf, 0, RECV_SCAN_SIZE);
start_lsn = ut_uint64_align_down(recovered_lsn,
OS_FILE_LOG_BLOCK_SIZE);
@@ -499,11 +568,7 @@ recv_truncate_group(
return;
}
- /* Write the log buffer full of zeros */
- for (i = 0; i < RECV_SCAN_SIZE; i++) {
-
- *(log_sys->buf + i) = '\0';
- }
+ memset(log_sys->buf, 0, RECV_SCAN_SIZE);
start_lsn = end_lsn;
}
@@ -560,6 +625,7 @@ recv_copy_group(
start_lsn = end_lsn;
}
}
+# endif /* UNIV_LOG_ARCHIVE */
/********************************************************//**
Copies a log segment from the most up-to-date log group to the other log
@@ -570,10 +636,12 @@ static
void
recv_synchronize_groups(
/*====================*/
- log_group_t* up_to_date_group) /*!< in: the most up-to-date
+#ifdef UNIV_LOG_ARCHIVE
+ log_group_t* up_to_date_group /*!< in: the most up-to-date
log group */
+#endif
+ )
{
- log_group_t* group;
lsn_t start_lsn;
lsn_t end_lsn;
lsn_t recovered_lsn;
@@ -590,11 +658,17 @@ recv_synchronize_groups(
ut_a(start_lsn != end_lsn);
log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
- up_to_date_group, start_lsn, end_lsn);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
+#ifdef UNIV_LOG_ARCHIVE
+ up_to_date_group,
+#else /* UNIV_LOG_ARCHIVE */
+ UT_LIST_GET_FIRST(log_sys->log_groups),
+#endif /* UNIV_LOG_ARCHIVE */
+ start_lsn, end_lsn);
- while (group) {
+ for (log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups);
+ group;
+ group = UT_LIST_GET_NEXT(log_groups, group)) {
+#ifdef UNIV_LOG_ARCHIVE
if (group != up_to_date_group) {
/* Copy log data if needed */
@@ -602,13 +676,11 @@ recv_synchronize_groups(
recv_copy_group(group, up_to_date_group,
recovered_lsn);
}
-
+#endif /* UNIV_LOG_ARCHIVE */
/* Update the fields in the group struct to correspond to
recovered_lsn */
log_group_set_fields(group, recovered_lsn);
-
- group = UT_LIST_GET_NEXT(log_groups, group);
}
/* Copy the checkpoint info to the groups; remember that we have
@@ -661,8 +733,8 @@ recv_check_cp_is_consistent(
/********************************************************//**
Looks for the maximum consistent checkpoint from the log groups.
@return error code or DB_SUCCESS */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
recv_find_max_checkpoint(
/*=====================*/
log_group_t** max_group, /*!< out: max group */
@@ -958,8 +1030,11 @@ recv_parse_or_apply_log_rec_body(
not NULL, then the log record is
applied to the page, and the log
record should be complete then */
- mtr_t* mtr) /*!< in: mtr or NULL; should be non-NULL
+ mtr_t* mtr, /*!< in: mtr or NULL; should be non-NULL
if and only if block is non-NULL */
+ ulint space_id)
+ /*!< in: tablespace id obtained by
+ parsing initial log record */
{
dict_index_t* index = NULL;
page_t* page;
@@ -1151,18 +1226,22 @@ recv_parse_or_apply_log_rec_body(
ptr, end_ptr, block, index, mtr);
}
break;
- case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE:
+ case MLOG_PAGE_REORGANIZE:
+ case MLOG_COMP_PAGE_REORGANIZE:
+ case MLOG_ZIP_PAGE_REORGANIZE:
ut_ad(!page || page_type == FIL_PAGE_INDEX);
if (NULL != (ptr = mlog_parse_index(
ptr, end_ptr,
- type == MLOG_COMP_PAGE_REORGANIZE,
+ type != MLOG_PAGE_REORGANIZE,
&index))) {
ut_a(!page
|| (ibool)!!page_is_comp(page)
== dict_table_is_comp(index->table));
- ptr = btr_parse_page_reorganize(ptr, end_ptr, index,
- block, mtr);
+ ptr = btr_parse_page_reorganize(
+ ptr, end_ptr, index,
+ type == MLOG_ZIP_PAGE_REORGANIZE,
+ block, mtr);
}
break;
case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
@@ -1231,8 +1310,11 @@ recv_parse_or_apply_log_rec_body(
ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
break;
- case MLOG_FILE_CREATE:
case MLOG_FILE_RENAME:
+ ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type,
+ space_id, 0);
+ break;
+ case MLOG_FILE_CREATE:
case MLOG_FILE_DELETE:
case MLOG_FILE_CREATE2:
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
@@ -1257,6 +1339,16 @@ recv_parse_or_apply_log_rec_body(
ptr = page_zip_parse_compress(ptr, end_ptr,
page, page_zip);
break;
+ case MLOG_ZIP_PAGE_COMPRESS_NO_DATA:
+ if (NULL != (ptr = mlog_parse_index(
+ ptr, end_ptr, TRUE, &index))) {
+
+ ut_a(!page || ((ibool)!!page_is_comp(page)
+ == dict_table_is_comp(index->table)));
+ ptr = page_zip_parse_compress_no_data(
+ ptr, end_ptr, page, page_zip, index);
+ }
+ break;
default:
ptr = NULL;
recv_sys->found_corrupt_log = TRUE;
@@ -1611,7 +1703,8 @@ recv_recover_page_func(
recv_parse_or_apply_log_rec_body(recv->type, buf,
buf + recv->len,
- block, &mtr);
+ block, &mtr,
+ recv_addr->space);
end_lsn = recv->start_lsn + recv->len;
mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
@@ -1740,7 +1833,6 @@ recv_apply_hashed_log_recs(
{
recv_addr_t* recv_addr;
ulint i;
- ulint n_pages;
ibool has_printed = FALSE;
mtr_t mtr;
loop:
@@ -1778,11 +1870,11 @@ loop:
if (recv_addr->state == RECV_NOT_PROCESSED) {
if (!has_printed) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Starting an"
- " apply batch of log records"
- " to the database...\n"
- "InnoDB: Progress in percents: ",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Starting an apply batch"
+ " of log records"
+ " to the database...");
+ fputs("InnoDB: Progress in percent: ",
stderr);
has_printed = TRUE;
}
@@ -1839,6 +1931,8 @@ loop:
}
if (!allow_ibuf) {
+ bool success;
+
/* Flush all the file pages to disk and invalidate them in
the buffer pool */
@@ -1846,13 +1940,24 @@ loop:
mutex_exit(&(recv_sys->mutex));
mutex_exit(&(log_sys->mutex));
- n_pages = buf_flush_list(ULINT_MAX, LSN_MAX);
- ut_a(n_pages != ULINT_UNDEFINED);
+ /* Stop the recv_writer thread from issuing any LRU
+ flush batches. */
+ mutex_enter(&recv_sys->writer_mutex);
+
+ /* Wait for any currently run batch to end. */
+ buf_flush_wait_LRU_batch_end();
+
+ success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
+
+ ut_a(success);
buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
buf_pool_invalidate();
+ /* Allow batches from recv_writer thread. */
+ mutex_exit(&recv_sys->writer_mutex);
+
mutex_enter(&(log_sys->mutex));
mutex_enter(&(recv_sys->mutex));
ut_d(recv_no_log_write = FALSE);
@@ -1892,9 +1997,10 @@ recv_apply_log_recs_for_backup(void)
block = back_block1;
- fputs("InnoDB: Starting an apply batch of log records"
- " to the database...\n"
- "InnoDB: Progress in percents: ", stderr);
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Starting an apply batch of log records to the database...");
+
+ fputs("InnoDB: Progress in percent: ", stderr);
n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
@@ -2079,7 +2185,7 @@ recv_parse_log_rec(
#endif /* UNIV_LOG_LSN_DEBUG */
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
- NULL, NULL);
+ NULL, NULL, *space);
if (UNIV_UNLIKELY(new_ptr == NULL)) {
return(0);
@@ -2686,11 +2792,21 @@ recv_scan_log_recs(
if (recv_log_scan_is_startup_type
&& !recv_needed_recovery) {
- fprintf(stderr,
- "InnoDB: Log scan progressed"
- " past the checkpoint lsn " LSN_PF "\n",
- recv_sys->scanned_lsn);
- recv_init_crash_recovery();
+ if (!srv_read_only_mode) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Log scan progressed past the "
+ "checkpoint lsn " LSN_PF "",
+ recv_sys->scanned_lsn);
+
+ recv_init_crash_recovery();
+ } else {
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Recovery skipped, "
+ "--innodb-read-only set!");
+
+ return(TRUE);
+ }
}
#endif /* !UNIV_HOTBACKUP */
@@ -2838,20 +2954,15 @@ void
recv_init_crash_recovery(void)
/*==========================*/
{
+ ut_ad(!srv_read_only_mode);
ut_a(!recv_needed_recovery);
recv_needed_recovery = TRUE;
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Database was not"
- " shut down normally!\n"
- "InnoDB: Starting crash recovery.\n");
-
- fprintf(stderr,
- "InnoDB: Reading tablespace information"
- " from the .ibd files...\n");
+ ib_logf(IB_LOG_LEVEL_INFO, "Database was not shutdown normally!");
+ ib_logf(IB_LOG_LEVEL_INFO, "Starting crash recovery.");
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Reading tablespace information from the .ibd files...");
fil_load_single_table_tablespaces();
@@ -2862,11 +2973,12 @@ recv_init_crash_recovery(void)
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
- fprintf(stderr,
- "InnoDB: Restoring possible"
- " half-written data pages from"
- " the doublewrite\n"
- "InnoDB: buffer...\n");
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Restoring possible half-written data pages ");
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "from the doublewrite buffer...");
+
buf_dblwr_init_or_restore_pages(TRUE);
}
}
@@ -2878,7 +2990,7 @@ recv_recovery_from_checkpoint_finish should be called later to complete
the recovery and free the resources used in it.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
recv_recovery_from_checkpoint_start_func(
/*=====================================*/
#ifdef UNIV_LOG_ARCHIVE
@@ -2890,19 +3002,18 @@ recv_recovery_from_checkpoint_start_func(
{
log_group_t* group;
log_group_t* max_cp_group;
- log_group_t* up_to_date_group;
ulint max_cp_field;
lsn_t checkpoint_lsn;
ib_uint64_t checkpoint_no;
- lsn_t old_scanned_lsn;
lsn_t group_scanned_lsn = 0;
lsn_t contiguous_lsn;
#ifdef UNIV_LOG_ARCHIVE
+ log_group_t* up_to_date_group;
lsn_t archived_lsn;
#endif /* UNIV_LOG_ARCHIVE */
byte* buf;
byte log_hdr_buf[LOG_FILE_HDR_SIZE];
- ulint err;
+ dberr_t err;
#ifdef UNIV_LOG_ARCHIVE
ut_ad(type != LOG_CHECKPOINT || limit_lsn == LSN_MAX);
@@ -2923,10 +3034,10 @@ recv_recovery_from_checkpoint_start_func(
}
if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
- fprintf(stderr,
- "InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n");
- fprintf(stderr,
- "InnoDB: Skipping log redo\n");
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "The user has set SRV_FORCE_NO_LOG_REDO on, "
+ "skipping log redo");
return(DB_SUCCESS);
}
@@ -2967,17 +3078,24 @@ recv_recovery_from_checkpoint_start_func(
if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
(byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
+
+ if (srv_read_only_mode) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot restore from ibbackup, InnoDB running "
+ "in read-only mode!");
+
+ return(DB_ERROR);
+ }
+
/* This log file was created by ibbackup --restore: print
a note to the user about it */
- fprintf(stderr,
- "InnoDB: The log file was created by"
- " ibbackup --apply-log at\n"
- "InnoDB: %s\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "The log file was created by ibbackup --apply-log "
+ "at %s. The following crash recovery is part of a "
+ "normal restore.",
log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
- fprintf(stderr,
- "InnoDB: NOTE: the following crash recovery"
- " is part of a normal restore.\n");
/* Wipe over the label now */
@@ -3017,9 +3135,9 @@ recv_recovery_from_checkpoint_start_func(
contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
OS_FILE_LOG_BLOCK_SIZE);
+#ifdef UNIV_LOG_ARCHIVE
if (TYPE_CHECKPOINT) {
up_to_date_group = max_cp_group;
-#ifdef UNIV_LOG_ARCHIVE
} else {
ulint capacity;
@@ -3055,8 +3173,8 @@ recv_recovery_from_checkpoint_start_func(
group->scanned_lsn = group_scanned_lsn;
up_to_date_group = group;
-#endif /* UNIV_LOG_ARCHIVE */
}
+#endif /* UNIV_LOG_ARCHIVE */
ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
@@ -3071,19 +3189,21 @@ recv_recovery_from_checkpoint_start_func(
/* Set the flag to publish that we are doing startup scan. */
recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
while (group) {
- old_scanned_lsn = recv_sys->scanned_lsn;
+#ifdef UNIV_LOG_ARCHIVE
+ lsn_t old_scanned_lsn = recv_sys->scanned_lsn;
+#endif /* UNIV_LOG_ARCHIVE */
recv_group_scan_log_recs(group, &contiguous_lsn,
&group_scanned_lsn);
group->scanned_lsn = group_scanned_lsn;
+#ifdef UNIV_LOG_ARCHIVE
if (old_scanned_lsn < group_scanned_lsn) {
/* We found a more up-to-date group */
up_to_date_group = group;
}
-#ifdef UNIV_LOG_ARCHIVE
if ((type == LOG_ARCHIVE)
&& (group == recv_sys->archive_group)) {
group = UT_LIST_GET_NEXT(log_groups, group);
@@ -3104,70 +3224,73 @@ recv_recovery_from_checkpoint_start_func(
|| checkpoint_lsn != min_flushed_lsn) {
if (checkpoint_lsn < max_flushed_lsn) {
- fprintf(stderr,
- "InnoDB: #########################"
- "#################################\n"
- "InnoDB: "
- "WARNING!\n"
- "InnoDB: The log sequence number"
- " in ibdata files is higher\n"
- "InnoDB: than the log sequence number"
- " in the ib_logfiles! Are you sure\n"
- "InnoDB: you are using the right"
- " ib_logfiles to start up"
- " the database?\n"
- "InnoDB: Log sequence number in"
- " ib_logfiles is " LSN_PF ", log\n"
- "InnoDB: sequence numbers stamped"
- " to ibdata file headers are between\n"
- "InnoDB: " LSN_PF " and " LSN_PF ".\n"
- "InnoDB: #########################"
- "#################################\n",
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "The log sequence number "
+ "in the ibdata files is higher "
+ "than the log sequence number "
+ "in the ib_logfiles! Are you sure "
+ "you are using the right "
+ "ib_logfiles to start up the database. "
+ "Log sequence number in the "
+ "ib_logfiles is " LSN_PF ", log"
+ "sequence numbers stamped "
+ "to ibdata file headers are between "
+ "" LSN_PF " and " LSN_PF ".",
checkpoint_lsn,
min_flushed_lsn,
max_flushed_lsn);
}
if (!recv_needed_recovery) {
- fprintf(stderr,
- "InnoDB: The log sequence number"
- " in ibdata files does not match\n"
- "InnoDB: the log sequence number"
- " in the ib_logfiles!\n");
- recv_init_crash_recovery();
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "The log sequence numbers "
+ LSN_PF " and " LSN_PF
+ " in ibdata files do not match"
+ " the log sequence number "
+ LSN_PF
+ " in the ib_logfiles!",
+ min_flushed_lsn,
+ max_flushed_lsn,
+ checkpoint_lsn);
+
+ if (!srv_read_only_mode) {
+ recv_init_crash_recovery();
+ } else {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Can't initiate database "
+ "recovery, running "
+ "in read-only-mode.");
+ return(DB_READ_ONLY);
+ }
}
}
- if (!recv_needed_recovery) {
- /* Init the doublewrite buffer memory structure */
- buf_dblwr_init_or_restore_pages(FALSE);
+ if (!srv_read_only_mode) {
+ if (recv_needed_recovery) {
+ /* Spawn the background thread to
+ flush dirty pages from the buffer
+ pools. */
+ recv_writer_thread_handle =
+ os_thread_create(
+ recv_writer_thread, 0, 0);
+ } else {
+ /* Init the doublewrite buffer memory
+ structure */
+ buf_dblwr_init_or_restore_pages(FALSE);
+ }
}
}
/* We currently have only one log group */
- if (group_scanned_lsn < checkpoint_lsn) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: We were only able to scan the log"
- " up to\n"
- "InnoDB: " LSN_PF ", but a checkpoint was at "
- LSN_PF ".\n"
- "InnoDB: It is possible that"
- " the database is now corrupt!\n",
- group_scanned_lsn,
- checkpoint_lsn);
- }
-
- if (group_scanned_lsn < recv_max_page_lsn) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: We were only able to scan the log"
- " up to " LSN_PF "\n"
- "InnoDB: but a database page a had an lsn " LSN_PF "."
- " It is possible that the\n"
- "InnoDB: database is now corrupt!\n",
- group_scanned_lsn,
- recv_max_page_lsn);
+ if (group_scanned_lsn < checkpoint_lsn
+ || group_scanned_lsn < recv_max_page_lsn) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "We scanned the log up to "
+ LSN_PF ". A checkpoint was at " LSN_PF
+ " and the maximum LSN on a database page was " LSN_PF
+ ". It is possible that the database is now corrupt!",
+ group_scanned_lsn, checkpoint_lsn, recv_max_page_lsn);
}
if (recv_sys->recovered_lsn < checkpoint_lsn) {
@@ -3179,7 +3302,10 @@ recv_recovery_from_checkpoint_start_func(
return(DB_SUCCESS);
}
- ut_error;
+ /* No harm in trying to do RO access. */
+ if (!srv_read_only_mode) {
+ ut_error;
+ }
return(DB_ERROR);
}
@@ -3192,9 +3318,11 @@ recv_recovery_from_checkpoint_start_func(
#ifdef UNIV_LOG_ARCHIVE
log_sys->archived_lsn = archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
recv_synchronize_groups(up_to_date_group);
+#else /* UNIV_LOG_ARCHIVE */
+ recv_synchronize_groups();
+#endif /* UNIV_LOG_ARCHIVE */
if (!recv_needed_recovery) {
ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
@@ -3225,13 +3353,13 @@ recv_recovery_from_checkpoint_start_func(
}
#endif /* UNIV_LOG_ARCHIVE */
- mutex_enter(&(recv_sys->mutex));
+ mutex_enter(&recv_sys->mutex);
recv_sys->apply_log_recs = TRUE;
- mutex_exit(&(recv_sys->mutex));
+ mutex_exit(&recv_sys->mutex);
- mutex_exit(&(log_sys->mutex));
+ mutex_exit(&log_sys->mutex);
recv_lsn_checks_on = TRUE;
@@ -3287,10 +3415,40 @@ recv_recovery_from_checkpoint_finish(void)
"InnoDB: a backup!\n");
}
- /* Free the resources of the recovery system */
+ /* Make sure that the recv_writer thread is done. This is
+ required because it grabs various mutexes and we want to
+ ensure that when we enable sync_order_checks there is no
+ mutex currently held by any thread. */
+ mutex_enter(&recv_sys->writer_mutex);
+ /* Free the resources of the recovery system */
recv_recovery_on = FALSE;
+ /* By acquring the mutex we ensure that the recv_writer thread
+ won't trigger any more LRU batchtes. Now wait for currently
+ in progress batches to finish. */
+ buf_flush_wait_LRU_batch_end();
+
+ mutex_exit(&recv_sys->writer_mutex);
+
+ ulint count = 0;
+ while (recv_writer_thread_active) {
+ ++count;
+ os_thread_sleep(100000);
+ if (srv_print_verbose_log && count > 600) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Waiting for recv_writer to "
+ "finish flushing of buffer pool");
+ count = 0;
+ }
+ }
+
+#ifdef __WIN__
+ if (recv_writer_thread_handle) {
+ CloseHandle(recv_writer_thread_handle);
+ }
+#endif /* __WIN__ */
+
#ifndef UNIV_LOG_DEBUG
recv_sys_debug_free();
#endif
@@ -3310,20 +3468,22 @@ void
recv_recovery_rollback_active(void)
/*===============================*/
{
- int i;
-
#ifdef UNIV_SYNC_DEBUG
/* Wait for a while so that created threads have time to suspend
themselves before we switch the latching order checks on */
os_thread_sleep(1000000);
+ ut_ad(!recv_writer_thread_active);
+
/* Switch latching order checks on in sync0sync.cc */
sync_order_checks_on = TRUE;
#endif
/* We can't start any (DDL) transactions if UNDO logging
has been disabled, additionally disable ROLLBACK of recovered
user transactions. */
- if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
+ if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
+ && !srv_read_only_mode) {
+
/* Drop partially created indexes. */
row_merge_drop_temp_indexes();
/* Drop temporary tables. */
@@ -3338,7 +3498,7 @@ recv_recovery_rollback_active(void)
/* Rollback the uncommitted transactions which have no user
session */
- os_thread_create(trx_rollback_or_clean_all_recovered, &i, NULL);
+ os_thread_create(trx_rollback_or_clean_all_recovered, 0, 0);
}
}
@@ -3348,18 +3508,18 @@ UNIV_INTERN
void
recv_reset_logs(
/*============*/
- lsn_t lsn, /*!< in: reset to this lsn
- rounded up to be divisible by
- OS_FILE_LOG_BLOCK_SIZE, after
- which we add
- LOG_BLOCK_HDR_SIZE */
#ifdef UNIV_LOG_ARCHIVE
ulint arch_log_no, /*!< in: next archived log file number */
-#endif /* UNIV_LOG_ARCHIVE */
- ibool new_logs_created)/*!< in: TRUE if resetting logs
+ ibool new_logs_created,/*!< in: TRUE if resetting logs
is done at the log creation;
FALSE if it is done after
archive recovery */
+#endif /* UNIV_LOG_ARCHIVE */
+ lsn_t lsn) /*!< in: reset to this lsn
+ rounded up to be divisible by
+ OS_FILE_LOG_BLOCK_SIZE, after
+ which we add
+ LOG_BLOCK_HDR_SIZE */
{
log_group_t* group;
@@ -3375,12 +3535,12 @@ recv_reset_logs(
#ifdef UNIV_LOG_ARCHIVE
group->archived_file_no = arch_log_no;
group->archived_offset = 0;
-#endif /* UNIV_LOG_ARCHIVE */
if (!new_logs_created) {
recv_truncate_group(group, group->lsn, group->lsn,
group->lsn, group->lsn);
}
+#endif /* UNIV_LOG_ARCHIVE */
group = UT_LIST_GET_NEXT(log_groups, group);
}
@@ -3805,7 +3965,7 @@ recv_recovery_from_archive_start(
recv_apply_hashed_log_recs(FALSE);
- recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE);
+ recv_reset_logs(0, FALSE, recv_sys->recovered_lsn);
}
mutex_exit(&(log_sys->mutex));
diff --git a/storage/innobase/mem/mem0dbg.cc b/storage/innobase/mem/mem0dbg.cc
index 83e14ad6071..308c2979551 100644
--- a/storage/innobase/mem/mem0dbg.cc
+++ b/storage/innobase/mem/mem0dbg.cc
@@ -30,7 +30,7 @@ Created 6/9/1994 Heikki Tuuri
/* The mutex which protects in the debug version the hash table
containing the list of live memory heaps, and also the global
variables below. */
-UNIV_INTERN mutex_t mem_hash_mutex;
+UNIV_INTERN ib_mutex_t mem_hash_mutex;
#ifdef UNIV_PFS_MUTEX
/* Key to register mem_hash_mutex with performance schema */
@@ -58,8 +58,7 @@ static ibool mem_hash_initialized = FALSE;
/* The node of the list containing currently allocated memory heaps */
-typedef struct mem_hash_node_struct mem_hash_node_t;
-struct mem_hash_node_struct {
+struct mem_hash_node_t {
UT_LIST_NODE_T(mem_hash_node_t)
list; /*!< hash list node */
mem_heap_t* heap; /*!< memory heap */
diff --git a/storage/innobase/mem/mem0pool.cc b/storage/innobase/mem/mem0pool.cc
index 2135926a26f..fe9a84d21fa 100644
--- a/storage/innobase/mem/mem0pool.cc
+++ b/storage/innobase/mem/mem0pool.cc
@@ -100,12 +100,12 @@ pool, and after that its locks will grow into the buffer pool. */
/** Data structure for a memory pool. The space is allocated using the buddy
algorithm, where free list i contains areas of size 2 to power i. */
-struct mem_pool_struct{
+struct mem_pool_t{
byte* buf; /*!< memory pool */
ulint size; /*!< memory common pool size */
ulint reserved; /*!< amount of currently allocated
memory */
- mutex_t mutex; /*!< mutex protecting this struct */
+ ib_mutex_t mutex; /*!< mutex protecting this struct */
UT_LIST_BASE_NODE_T(mem_area_t)
free_list[64]; /*!< lists of free memory areas: an
area is put to the list whose number
@@ -116,7 +116,7 @@ struct mem_pool_struct{
UNIV_INTERN mem_pool_t* mem_comm_pool = NULL;
#ifdef UNIV_PFS_MUTEX
-/* Key to register mutex in mem_pool_struct with performance schema */
+/* Key to register mutex in mem_pool_t with performance schema */
UNIV_INTERN mysql_pfs_key_t mem_pool_mutex_key;
#endif /* UNIV_PFS_MUTEX */
diff --git a/storage/innobase/mtr/mtr0log.cc b/storage/innobase/mtr/mtr0log.cc
index d549de8802e..5335cb4c9ef 100644
--- a/storage/innobase/mtr/mtr0log.cc
+++ b/storage/innobase/mtr/mtr0log.cc
@@ -240,8 +240,8 @@ mlog_parse_nbytes(
}
/********************************************************//**
-Writes 1 - 4 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
+Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log if mtr is not NULL. */
UNIV_INTERN
void
mlog_write_ulint(
@@ -251,8 +251,6 @@ mlog_write_ulint(
byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
- byte* log_ptr;
-
switch (type) {
case MLOG_1BYTE:
mach_write_to_1(ptr, val);
@@ -267,27 +265,29 @@ mlog_write_ulint(
ut_error;
}
- log_ptr = mlog_open(mtr, 11 + 2 + 5);
+ if (mtr != 0) {
+ byte* log_ptr = mlog_open(mtr, 11 + 2 + 5);
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
+ /* If no logging is requested, we may return now */
- return;
- }
+ if (log_ptr != 0) {
- log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
+ log_ptr = mlog_write_initial_log_record_fast(
+ ptr, type, log_ptr, mtr);
- mach_write_to_2(log_ptr, page_offset(ptr));
- log_ptr += 2;
+ mach_write_to_2(log_ptr, page_offset(ptr));
+ log_ptr += 2;
- log_ptr += mach_write_compressed(log_ptr, val);
+ log_ptr += mach_write_compressed(log_ptr, val);
- mlog_close(mtr, log_ptr);
+ mlog_close(mtr, log_ptr);
+ }
+ }
}
/********************************************************//**
-Writes 8 bytes to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
+Writes 8 bytes to a file page. Writes the corresponding log
+record to the mini-transaction log, only if mtr is not NULL */
UNIV_INTERN
void
mlog_write_ull(
@@ -296,29 +296,25 @@ mlog_write_ull(
ib_uint64_t val, /*!< in: value to write */
mtr_t* mtr) /*!< in: mini-transaction handle */
{
- byte* log_ptr;
-
- ut_ad(ptr && mtr);
-
mach_write_to_8(ptr, val);
- log_ptr = mlog_open(mtr, 11 + 2 + 9);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
+ if (mtr != 0) {
+ byte* log_ptr = mlog_open(mtr, 11 + 2 + 9);
- return;
- }
+ /* If no logging is requested, we may return now */
+ if (log_ptr != 0) {
- log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_8BYTES,
- log_ptr, mtr);
+ log_ptr = mlog_write_initial_log_record_fast(
+ ptr, MLOG_8BYTES, log_ptr, mtr);
- mach_write_to_2(log_ptr, page_offset(ptr));
- log_ptr += 2;
+ mach_write_to_2(log_ptr, page_offset(ptr));
+ log_ptr += 2;
- log_ptr += mach_ull_write_compressed(log_ptr, val);
+ log_ptr += mach_ull_write_compressed(log_ptr, val);
- mlog_close(mtr, log_ptr);
+ mlog_close(mtr, log_ptr);
+ }
+ }
}
#ifndef UNIV_HOTBACKUP
@@ -439,12 +435,13 @@ UNIV_INTERN
byte*
mlog_open_and_write_index(
/*======================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* rec, /*!< in: index record or page */
- dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: log item type */
- ulint size) /*!< in: requested buffer size in bytes
- (if 0, calls mlog_close() and returns NULL) */
+ mtr_t* mtr, /*!< in: mtr */
+ const byte* rec, /*!< in: index record or page */
+ const dict_index_t* index, /*!< in: record descriptor */
+ byte type, /*!< in: log item type */
+ ulint size) /*!< in: requested buffer size in bytes
+ (if 0, calls mlog_close() and
+ returns NULL) */
{
byte* log_ptr;
const byte* log_start;
diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc
index 4832e8c7710..10b4686b720 100644
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@@ -142,9 +142,9 @@ mtr_memo_slot_note_modification(
mtr_t* mtr, /*!< in: mtr */
mtr_memo_slot_t* slot) /*!< in: memo slot */
{
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->modifications);
+ ut_ad(!srv_read_only_mode);
+ ut_ad(mtr->magic_n == MTR_MAGIC_N);
if (slot->object != NULL && slot->type == MTR_MEMO_PAGE_X_FIX) {
buf_block_t* block = (buf_block_t*) slot->object;
@@ -170,7 +170,7 @@ mtr_memo_note_modifications(
dyn_array_t* memo;
ulint offset;
- ut_ad(mtr);
+ ut_ad(!srv_read_only_mode);
ut_ad(mtr->magic_n == MTR_MAGIC_N);
ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
commit */
@@ -191,19 +191,51 @@ mtr_memo_note_modifications(
}
/************************************************************//**
+Append the dirty pages to the flush list. */
+static
+void
+mtr_add_dirtied_pages_to_flush_list(
+/*================================*/
+ mtr_t* mtr) /*!< in/out: mtr */
+{
+ ut_ad(!srv_read_only_mode);
+
+ /* No need to acquire log_flush_order_mutex if this mtr has
+ not dirtied a clean page. log_flush_order_mutex is used to
+ ensure ordered insertions in the flush_list. We need to
+ insert in the flush_list iff the page in question was clean
+ before modifications. */
+ if (mtr->made_dirty) {
+ log_flush_order_mutex_enter();
+ }
+
+ /* It is now safe to release the log mutex because the
+ flush_order mutex will ensure that we are the first one
+ to insert into the flush list. */
+ log_release();
+
+ if (mtr->modifications) {
+ mtr_memo_note_modifications(mtr);
+ }
+
+ if (mtr->made_dirty) {
+ log_flush_order_mutex_exit();
+ }
+}
+
+/************************************************************//**
Writes the contents of a mini-transaction log, if any, to the database log. */
static
void
mtr_log_reserve_and_write(
/*======================*/
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mtr */
{
dyn_array_t* mlog;
- dyn_block_t* block;
ulint data_size;
byte* first_data;
- ut_ad(mtr);
+ ut_ad(!srv_read_only_mode);
mlog = &(mtr->log);
@@ -217,14 +249,21 @@ mtr_log_reserve_and_write(
}
if (mlog->heap == NULL) {
+ ulint len;
+
+ len = mtr->log_mode != MTR_LOG_NO_REDO
+ ? dyn_block_get_used(mlog) : 0;
+
mtr->end_lsn = log_reserve_and_write_fast(
- first_data, dyn_block_get_used(mlog),
- &mtr->start_lsn);
+ first_data, len, &mtr->start_lsn);
+
if (mtr->end_lsn) {
/* Success. We have the log mutex.
Add pages to flush list and exit */
- goto func_exit;
+ mtr_add_dirtied_pages_to_flush_list(mtr);
+
+ return;
}
}
@@ -235,43 +274,24 @@ mtr_log_reserve_and_write(
if (mtr->log_mode == MTR_LOG_ALL) {
- block = mlog;
+ for (dyn_block_t* block = mlog;
+ block != 0;
+ block = dyn_array_get_next_block(mlog, block)) {
- while (block != NULL) {
- log_write_low(dyn_block_get_data(block),
- dyn_block_get_used(block));
- block = dyn_array_get_next_block(mlog, block);
+ log_write_low(
+ dyn_block_get_data(block),
+ dyn_block_get_used(block));
}
+
} else {
- ut_ad(mtr->log_mode == MTR_LOG_NONE);
+ ut_ad(mtr->log_mode == MTR_LOG_NONE
+ || mtr->log_mode == MTR_LOG_NO_REDO);
/* Do nothing */
}
mtr->end_lsn = log_close();
-func_exit:
-
- /* No need to acquire log_flush_order_mutex if this mtr has
- not dirtied a clean page. log_flush_order_mutex is used to
- ensure ordered insertions in the flush_list. We need to
- insert in the flush_list iff the page in question was clean
- before modifications. */
- if (mtr->made_dirty) {
- log_flush_order_mutex_enter();
- }
-
- /* It is now safe to release the log mutex because the
- flush_order mutex will ensure that we are the first one
- to insert into the flush list. */
- log_release();
-
- if (mtr->modifications) {
- mtr_memo_note_modifications(mtr);
- }
-
- if (mtr->made_dirty) {
- log_flush_order_mutex_exit();
- }
+ mtr_add_dirtied_pages_to_flush_list(mtr);
}
#endif /* !UNIV_HOTBACKUP */
@@ -294,6 +314,7 @@ mtr_commit(
ut_ad(!recv_no_log_write);
if (mtr->modifications && mtr->n_log_recs) {
+ ut_ad(!srv_read_only_mode);
mtr_log_reserve_and_write(mtr);
}
@@ -376,14 +397,8 @@ mtr_read_ulint(
ut_ad(mtr->state == MTR_ACTIVE);
ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX)
|| mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
- if (type == MLOG_1BYTE) {
- return(mach_read_from_1(ptr));
- } else if (type == MLOG_2BYTES) {
- return(mach_read_from_2(ptr));
- } else {
- ut_ad(type == MLOG_4BYTES);
- return(mach_read_from_4(ptr));
- }
+
+ return(mach_read_ulint(ptr, type));
}
#ifdef UNIV_DEBUG
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 62cde1cf728..5f0dc0d3667 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -1,6 +1,6 @@
/***********************************************************************
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Portions of this file contain modifications contributed and copyrighted
@@ -60,24 +60,29 @@ Created 10/21/1995 Heikki Tuuri
#include <libaio.h>
#endif
+/** Insert buffer segment id */
+static const ulint IO_IBUF_SEGMENT = 0;
+
+/** Log segment id */
+static const ulint IO_LOG_SEGMENT = 1;
+
/* This specifies the file permissions InnoDB uses when it creates files in
Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
my_umask */
#ifndef __WIN__
/** Umask for creating files */
-UNIV_INTERN ulint os_innodb_umask
- = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
+UNIV_INTERN ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
#else
/** Umask for creating files */
-UNIV_INTERN ulint os_innodb_umask = 0;
-#endif
+UNIV_INTERN ulint os_innodb_umask = 0;
+#endif /* __WIN__ */
#ifndef UNIV_HOTBACKUP
/* We use these mutexes to protect lseek + file i/o operation, if the
OS does not provide an atomic pread or pwrite, or similar */
#define OS_FILE_N_SEEK_MUTEXES 16
-UNIV_INTERN os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
+UNIV_INTERN os_ib_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
/* In simulated aio, merge at most this many consecutive i/os */
#define OS_AIO_MERGE_N_CONSECUTIVE 64
@@ -147,10 +152,7 @@ UNIV_INTERN mysql_pfs_key_t innodb_file_temp_key;
#endif /* UNIV_PFS_IO */
/** The asynchronous i/o array slot structure */
-typedef struct os_aio_slot_struct os_aio_slot_t;
-
-/** The asynchronous i/o array slot structure */
-struct os_aio_slot_struct{
+struct os_aio_slot_t{
ibool is_read; /*!< TRUE if a read operation */
ulint pos; /*!< index of the slot in the aio
array */
@@ -182,15 +184,12 @@ struct os_aio_slot_struct{
struct iocb control; /* Linux control block for aio */
int n_bytes; /* bytes written/read. */
int ret; /* AIO return code */
-#endif
+#endif /* WIN_ASYNC_IO */
};
/** The asynchronous i/o array structure */
-typedef struct os_aio_array_struct os_aio_array_t;
-
-/** The asynchronous i/o array structure */
-struct os_aio_array_struct{
- os_mutex_t mutex; /*!< the mutex protecting the aio array */
+struct os_aio_array_t{
+ os_ib_mutex_t mutex; /*!< the mutex protecting the aio array */
os_event_t not_full;
/*!< The event which is set to the
signaled state when there is space in
@@ -223,7 +222,7 @@ struct os_aio_array_struct{
order. This can be used in
WaitForMultipleObjects; used only in
Windows */
-#endif
+#endif /* __WIN__ */
#if defined(LINUX_NATIVE_AIO)
io_context_t* aio_ctx;
@@ -235,7 +234,7 @@ struct os_aio_array_struct{
There is one such event for each
possible pending IO. The size of the
array is equal to n_slots. */
-#endif
+#endif /* LINUX_NATIV_AIO */
};
#if defined(LINUX_NATIVE_AIO)
@@ -283,7 +282,7 @@ UNIV_INTERN ibool os_has_said_disk_full = FALSE;
#if !defined(UNIV_HOTBACKUP) \
&& (!defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8)
/** The mutex protecting the following counts of pending I/O operations */
-static os_mutex_t os_file_count_mutex;
+static os_ib_mutex_t os_file_count_mutex;
#endif /* !UNIV_HOTBACKUP && (!HAVE_ATOMIC_BUILTINS || UNIV_WORD_SIZE < 8) */
/** Number of pending os_file_pread() operations */
@@ -336,7 +335,7 @@ ulint
os_get_os_version(void)
/*===================*/
{
- OSVERSIONINFO os_info;
+ OSVERSIONINFO os_info;
os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
@@ -350,15 +349,15 @@ os_get_os_version(void)
switch (os_info.dwMajorVersion) {
case 3:
case 4:
- return OS_WINNT;
+ return(OS_WINNT);
case 5:
- return (os_info.dwMinorVersion == 0) ? OS_WIN2000
- : OS_WINXP;
+ return (os_info.dwMinorVersion == 0)
+ ? OS_WIN2000 : OS_WINXP;
case 6:
- return (os_info.dwMinorVersion == 0) ? OS_WINVISTA
- : OS_WIN7;
+ return (os_info.dwMinorVersion == 0)
+ ? OS_WINVISTA : OS_WIN7;
default:
- return OS_WIN7;
+ return(OS_WIN7);
}
} else {
ut_error;
@@ -377,16 +376,17 @@ static
ulint
os_file_get_last_error_low(
/*=======================*/
- ibool report_all_errors, /*!< in: TRUE if we want an error
+ bool report_all_errors, /*!< in: TRUE if we want an error
message printed of all errors */
- ibool on_error_silent) /*!< in: TRUE then don't print any
+ bool on_error_silent) /*!< in: TRUE then don't print any
diagnostic to the log */
{
- ulint err;
-
#ifdef __WIN__
- err = (ulint) GetLastError();
+ ulint err = (ulint) GetLastError();
+ if (err == ERROR_SUCCESS) {
+ return(0);
+ }
if (report_all_errors
|| (!on_error_silent
@@ -469,15 +469,18 @@ os_file_get_last_error_low(
return(100 + err);
}
#else
- err = (ulint) errno;
+ int err = errno;
+ if (err == 0) {
+ return(0);
+ }
if (report_all_errors
|| (err != ENOSPC && err != EEXIST && !on_error_silent)) {
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: Operating system error number %lu"
- " in a file operation.\n", (ulong) err);
+ " InnoDB: Operating system error number %d"
+ " in a file operation.\n", err);
if (err == ENOENT) {
fprintf(stderr,
@@ -497,11 +500,11 @@ os_file_get_last_error_low(
" the access rights to\n"
"InnoDB: the directory.\n");
} else {
- if (strerror((int) err) != NULL) {
+ if (strerror(err) != NULL) {
fprintf(stderr,
- "InnoDB: Error number %lu"
+ "InnoDB: Error number %d"
" means '%s'.\n",
- err, strerror((int) err));
+ err, strerror(err));
}
@@ -552,10 +555,10 @@ UNIV_INTERN
ulint
os_file_get_last_error(
/*===================*/
- ibool report_all_errors) /*!< in: TRUE if we want an error
+ bool report_all_errors) /*!< in: TRUE if we want an error
message printed of all errors */
{
- return(os_file_get_last_error_low(report_all_errors, FALSE));
+ return(os_file_get_last_error_low(report_all_errors, false));
}
/****************************************************************//**
@@ -577,7 +580,7 @@ os_file_handle_error_cond_exit(
{
ulint err;
- err = os_file_get_last_error_low(FALSE, on_error_silent);
+ err = os_file_get_last_error_low(false, on_error_silent);
switch (err) {
case OS_FILE_DISK_FULL:
@@ -645,7 +648,8 @@ os_file_handle_error_cond_exit(
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: File operation call: "
- "'%s'.\n", operation);
+ "'%s' returned OS error " ULINTPF ".\n",
+ operation, err);
}
if (should_exit) {
@@ -654,7 +658,9 @@ os_file_handle_error_cond_exit(
"operation.\n");
fflush(stderr);
- ut_error;
+
+ ut_ad(0); /* Report call stack, etc only in debug code. */
+ exit(1);
}
}
@@ -712,19 +718,23 @@ os_file_lock(
const char* name) /*!< in: file name */
{
struct flock lk;
+
+ ut_ad(!srv_read_only_mode);
+
lk.l_type = F_WRLCK;
lk.l_whence = SEEK_SET;
lk.l_start = lk.l_len = 0;
+
if (fcntl(fd, F_SETLK, &lk) == -1) {
- fprintf(stderr,
- "InnoDB: Unable to lock %s, error: %d\n", name, errno);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unable to lock %s, error: %d", name, errno);
if (errno == EAGAIN || errno == EACCES) {
- fprintf(stderr,
- "InnoDB: Check that you do not already have"
- " another mysqld process\n"
- "InnoDB: using the same InnoDB data"
- " or log files.\n");
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Check that you do not already have "
+ "another mysqld process using the "
+ "same InnoDB data or log files.");
}
return(-1);
@@ -742,13 +752,11 @@ void
os_io_init_simple(void)
/*===================*/
{
- ulint i;
-
#if !defined(HAVE_ATOMIC_BUILTINS) || UNIV_WORD_SIZE < 8
os_file_count_mutex = os_mutex_create();
#endif /* !HAVE_ATOMIC_BUILTINS || UNIV_WORD_SIZE < 8 */
- for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
+ for (ulint i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
os_file_seek_mutexes[i] = os_mutex_create();
}
}
@@ -765,6 +773,8 @@ os_file_create_tmpfile(void)
FILE* file = NULL;
int fd = innobase_mysql_tmpfile();
+ ut_ad(!srv_read_only_mode);
+
if (fd >= 0) {
file = fdopen(fd, "w+b");
}
@@ -840,7 +850,7 @@ os_file_opendir(
}
return(dir);
-#endif
+#endif /* __WIN__ */
}
/***********************************************************************//**
@@ -874,7 +884,7 @@ os_file_closedir(
}
return(ret);
-#endif
+#endif /* __WIN__ */
}
/***********************************************************************//**
@@ -1054,10 +1064,12 @@ next_file:
}
/*****************************************************************//**
-This function attempts to create a directory named pathname. The new directory
-gets default permissions. On Unix the permissions are (0770 & ~umask). If the
-directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true.
+This function attempts to create a directory named pathname. The new
+directory gets default permissions. On Unix the permissions are
+(0770 & ~umask). If the directory exists already, nothing is done and
+the call succeeds, unless the fail_if_exists arguments is true.
+If another error occurs, such as a permission error, this does not crash,
+but reports the error and returns FALSE.
@return TRUE if call succeeds, FALSE on error */
UNIV_INTERN
ibool
@@ -1075,13 +1087,14 @@ os_file_create_directory(
if (!(rcode != 0
|| (GetLastError() == ERROR_ALREADY_EXISTS
&& !fail_if_exists))) {
- /* failure */
- os_file_handle_error(pathname, "CreateDirectory");
+
+ os_file_handle_error_no_exit(
+ pathname, "CreateDirectory", FALSE);
return(FALSE);
}
- return (TRUE);
+ return(TRUE);
#else
int rcode;
@@ -1089,13 +1102,13 @@ os_file_create_directory(
if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
/* failure */
- os_file_handle_error(pathname, "mkdir");
+ os_file_handle_error_no_exit(pathname, "mkdir", FALSE);
return(FALSE);
}
return (TRUE);
-#endif
+#endif /* __WIN__ */
}
/****************************************************************//**
@@ -1115,129 +1128,180 @@ os_file_create_simple_func(
OS_FILE_READ_WRITE */
ibool* success)/*!< out: TRUE if succeed, FALSE if error */
{
-#ifdef __WIN__
os_file_t file;
- DWORD create_flag;
+ ibool retry;
+
+#ifdef __WIN__
DWORD access;
+ DWORD create_flag;
DWORD attributes = 0;
- ibool retry;
ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
-try_again:
- ut_a(name);
if (create_mode == OS_FILE_OPEN) {
+
+ create_flag = OPEN_EXISTING;
+
+ } else if (srv_read_only_mode) {
+
create_flag = OPEN_EXISTING;
+
} else if (create_mode == OS_FILE_CREATE) {
+
create_flag = CREATE_NEW;
+
} else if (create_mode == OS_FILE_CREATE_PATH) {
- /* create subdirs along the path if needed */
+
+ ut_a(!srv_read_only_mode);
+
+ /* Create subdirs along the path if needed */
*success = os_file_create_subdirs_if_needed(name);
+
if (!*success) {
- ut_error;
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unable to create subdirectories '%s'",
+ name);
+
+ return((os_file_t) -1);
}
+
create_flag = CREATE_NEW;
create_mode = OS_FILE_CREATE;
+
} else {
- create_flag = 0;
- ut_error;
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unknown file create mode (%lu) for file '%s'",
+ create_mode, name);
+
+ return((os_file_t) -1);
}
if (access_type == OS_FILE_READ_ONLY) {
access = GENERIC_READ;
+ } else if (srv_read_only_mode) {
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "read only mode set. Unable to "
+ "open file '%s' in RW mode, trying RO mode", name);
+
+ access = GENERIC_READ;
+
} else if (access_type == OS_FILE_READ_WRITE) {
access = GENERIC_READ | GENERIC_WRITE;
} else {
- access = 0;
- ut_error;
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unknown file access type (%lu) for file '%s'",
+ access_type, name);
+
+ return((os_file_t) -1);
}
- file = CreateFile((LPCTSTR) name,
- access,
- FILE_SHARE_READ | FILE_SHARE_WRITE,
- /* file can be read and written also
- by other processes */
- NULL, /* default security attributes */
- create_flag,
- attributes,
- NULL); /*!< no template file */
+ do {
+ /* Use default security attributes and no template file. */
- if (file == INVALID_HANDLE_VALUE) {
- *success = FALSE;
+ file = CreateFile(
+ (LPCTSTR) name, access, FILE_SHARE_READ, NULL,
+ create_flag, attributes, NULL);
+
+ if (file == INVALID_HANDLE_VALUE) {
+
+ *success = FALSE;
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_OPEN ?
- "open" : "create");
- if (retry) {
- goto try_again;
+ retry = os_file_handle_error(
+ name, create_mode == OS_FILE_OPEN ?
+ "open" : "create");
+
+ } else {
+ *success = TRUE;
+ retry = false;
}
- } else {
- *success = TRUE;
- }
- return(file);
+ } while (retry);
+
#else /* __WIN__ */
- os_file_t file;
int create_flag;
- ibool retry;
ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
-try_again:
- ut_a(name);
-
if (create_mode == OS_FILE_OPEN) {
+
if (access_type == OS_FILE_READ_ONLY) {
create_flag = O_RDONLY;
+ } else if (srv_read_only_mode) {
+ create_flag = O_RDONLY;
} else {
create_flag = O_RDWR;
}
+
+ } else if (srv_read_only_mode) {
+
+ create_flag = O_RDONLY;
+
} else if (create_mode == OS_FILE_CREATE) {
+
create_flag = O_RDWR | O_CREAT | O_EXCL;
+
} else if (create_mode == OS_FILE_CREATE_PATH) {
- /* create subdirs along the path if needed */
+
+ /* Create subdirs along the path if needed */
+
*success = os_file_create_subdirs_if_needed(name);
+
if (!*success) {
- return (-1);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unable to create subdirectories '%s'",
+ name);
+
+ return((os_file_t) -1);
}
+
create_flag = O_RDWR | O_CREAT | O_EXCL;
create_mode = OS_FILE_CREATE;
} else {
- create_flag = 0;
- ut_error;
- }
- if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR
- | S_IRGRP | S_IWGRP);
- } else {
- file = open(name, create_flag);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unknown file create mode (%lu) for file '%s'",
+ create_mode, name);
+
+ return((os_file_t) -1);
}
- if (file == -1) {
- *success = FALSE;
+ do {
+ file = ::open(name, create_flag, os_innodb_umask);
+
+ if (file == -1) {
+ *success = FALSE;
- retry = os_file_handle_error(name,
- create_mode == OS_FILE_OPEN ?
- "open" : "create");
- if (retry) {
- goto try_again;
+ retry = os_file_handle_error(
+ name,
+ create_mode == OS_FILE_OPEN
+ ? "open" : "create");
+ } else {
+ *success = TRUE;
+ retry = false;
}
+
+ } while (retry);
+
#ifdef USE_FILE_LOCK
- } else if (access_type == OS_FILE_READ_WRITE
- && os_file_lock(file, name)) {
+ if (!srv_read_only_mode
+ && *success
+ && access_type == OS_FILE_READ_WRITE
+ && os_file_lock(file, name)) {
+
*success = FALSE;
close(file);
file = -1;
-#endif
- } else {
- *success = TRUE;
}
+#endif /* USE_FILE_LOCK */
- return(file);
#endif /* __WIN__ */
+
+ return(file);
}
/****************************************************************//**
@@ -1259,12 +1323,13 @@ os_file_create_simple_no_error_handling_func(
used by a backup program reading the file */
ibool* success)/*!< out: TRUE if succeed, FALSE if error */
{
-#ifdef __WIN__
os_file_t file;
- DWORD create_flag;
+
+#ifdef __WIN__
DWORD access;
+ DWORD create_flag;
DWORD attributes = 0;
- DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
+ DWORD share_mode = FILE_SHARE_READ;
ut_a(name);
@@ -1273,46 +1338,53 @@ os_file_create_simple_no_error_handling_func(
if (create_mode == OS_FILE_OPEN) {
create_flag = OPEN_EXISTING;
+ } else if (srv_read_only_mode) {
+ create_flag = OPEN_EXISTING;
} else if (create_mode == OS_FILE_CREATE) {
create_flag = CREATE_NEW;
} else {
- create_flag = 0;
- ut_error;
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unknown file create mode (%lu) for file '%s'",
+ create_mode, name);
+
+ return((os_file_t) -1);
}
if (access_type == OS_FILE_READ_ONLY) {
access = GENERIC_READ;
+ } else if (srv_read_only_mode) {
+ access = GENERIC_READ;
} else if (access_type == OS_FILE_READ_WRITE) {
access = GENERIC_READ | GENERIC_WRITE;
} else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
+
+ ut_a(!srv_read_only_mode);
+
access = GENERIC_READ;
- share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
- | FILE_SHARE_WRITE; /*!< A backup program has to give
- mysqld the maximum freedom to
- do what it likes with the
- file */
+
+ /*!< A backup program has to give mysqld the maximum
+ freedom to do what it likes with the file */
+
+ share_mode |= FILE_SHARE_DELETE | FILE_SHARE_WRITE;
} else {
- access = 0;
- ut_error;
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unknown file access type (%lu) for file '%s'",
+ access_type, name);
+
+ return((os_file_t) -1);
}
file = CreateFile((LPCTSTR) name,
access,
share_mode,
- NULL, /* default security attributes */
+ NULL, // Security attributes
create_flag,
attributes,
- NULL); /*!< no template file */
-
- if (file == INVALID_HANDLE_VALUE) {
- *success = FALSE;
- } else {
- *success = TRUE;
- }
+ NULL); // No template file
- return(file);
+ *success = (file != INVALID_HANDLE_VALUE);
#else /* __WIN__ */
- os_file_t file;
int create_flag;
ut_a(name);
@@ -1321,40 +1393,59 @@ os_file_create_simple_no_error_handling_func(
ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
if (create_mode == OS_FILE_OPEN) {
+
if (access_type == OS_FILE_READ_ONLY) {
+
+ create_flag = O_RDONLY;
+
+ } else if (srv_read_only_mode) {
+
create_flag = O_RDONLY;
+
} else {
+
+ ut_a(access_type == OS_FILE_READ_WRITE
+ || access_type == OS_FILE_READ_ALLOW_DELETE);
+
create_flag = O_RDWR;
}
+
+ } else if (srv_read_only_mode) {
+
+ create_flag = O_RDONLY;
+
} else if (create_mode == OS_FILE_CREATE) {
+
create_flag = O_RDWR | O_CREAT | O_EXCL;
- } else {
- create_flag = 0;
- ut_error;
- }
- if (create_mode == OS_FILE_CREATE) {
- file = open(name, create_flag, S_IRUSR | S_IWUSR
- | S_IRGRP | S_IWGRP);
} else {
- file = open(name, create_flag);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unknown file create mode (%lu) for file '%s'",
+ create_mode, name);
+
+ return((os_file_t) -1);
}
- if (file == -1) {
- *success = FALSE;
+ file = ::open(name, create_flag, os_innodb_umask);
+
+ *success = file == -1 ? FALSE : TRUE;
+
#ifdef USE_FILE_LOCK
- } else if (access_type == OS_FILE_READ_WRITE
- && os_file_lock(file, name)) {
+ if (!srv_read_only_mode
+ && *success
+ && access_type == OS_FILE_READ_WRITE
+ && os_file_lock(file, name)) {
+
*success = FALSE;
close(file);
file = -1;
-#endif
- } else {
- *success = TRUE;
+
}
+#endif /* USE_FILE_LOCK */
- return(file);
#endif /* __WIN__ */
+
+ return(file);
}
/****************************************************************//**
@@ -1364,42 +1455,41 @@ void
os_file_set_nocache(
/*================*/
int fd /*!< in: file descriptor to alter */
- __attribute__((unused)),
- const char* file_name /*!< in: used in the diagnostic message */
- __attribute__((unused)),
+ __attribute__((unused)),
+ const char* file_name /*!< in: used in the diagnostic
+ message */
+ __attribute__((unused)),
const char* operation_name __attribute__((unused)))
- /*!< in: "open" or "create"; used in the
- diagnostic message */
+ /*!< in: "open" or "create"; used
+ in the diagnostic message */
{
/* some versions of Solaris may not have DIRECTIO_ON */
#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
if (directio(fd, DIRECTIO_ON) == -1) {
- int errno_save;
- errno_save = (int) errno;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Failed to set DIRECTIO_ON "
- "on file %s: %s: %s, continuing anyway\n",
+ int errno_save = errno;
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Failed to set DIRECTIO_ON on file %s: %s: %s, "
+ "continuing anyway.",
file_name, operation_name, strerror(errno_save));
}
#elif defined(O_DIRECT)
if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
- int errno_save;
- errno_save = (int) errno;
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Failed to set O_DIRECT "
- "on file %s: %s: %s, continuing anyway\n",
+ int errno_save = errno;
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Failed to set O_DIRECT on file %s: %s: %s, "
+ "continuing anyway",
file_name, operation_name, strerror(errno_save));
+
if (errno_save == EINVAL) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: O_DIRECT is known to result in "
- "'Invalid argument' on Linux on tmpfs, "
- "see MySQL Bug#26662\n");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "O_DIRECT is known to result in 'Invalid "
+ "argument' on Linux on tmpfs, see MySQL "
+ "Bug#26662");
}
}
-#endif
+#endif /* defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) */
}
/****************************************************************//**
@@ -1425,138 +1515,155 @@ os_file_create_func(
ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success)/*!< out: TRUE if succeed, FALSE if error */
{
+ os_file_t file;
+ ibool retry;
ibool on_error_no_exit;
ibool on_error_silent;
#ifdef __WIN__
- os_file_t file;
- DWORD share_mode = FILE_SHARE_READ;
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_disk_full",
+ *success = FALSE;
+ SetLastError(ERROR_DISK_FULL);
+ return((os_file_t) -1);
+ );
+#else /* __WIN__ */
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_disk_full",
+ *success = FALSE;
+ errno = ENOSPC;
+ return((os_file_t) -1);
+ );
+#endif /* __WIN__ */
+
+#ifdef __WIN__
DWORD create_flag;
- DWORD attributes;
- ibool retry;
+ DWORD share_mode = FILE_SHARE_READ;
on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
? TRUE : FALSE;
+
on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT
? TRUE : FALSE;
create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT;
create_mode &= ~OS_FILE_ON_ERROR_SILENT;
+ if (create_mode == OS_FILE_OPEN_RAW) {
- DBUG_EXECUTE_IF(
- "ib_create_table_fail_disk_full",
- *success = FALSE;
- SetLastError(ERROR_DISK_FULL);
- return((os_file_t) -1);
- );
-try_again:
- ut_a(name);
+ ut_a(!srv_read_only_mode);
- if (create_mode == OS_FILE_OPEN_RAW) {
create_flag = OPEN_EXISTING;
- share_mode = FILE_SHARE_WRITE;
+
+ /* On Windows Physical devices require admin privileges and
+ have to have the write-share mode set. See the remarks
+ section for the CreateFile() function documentation in MSDN. */
+
+ share_mode |= FILE_SHARE_WRITE;
+
} else if (create_mode == OS_FILE_OPEN
|| create_mode == OS_FILE_OPEN_RETRY) {
+
create_flag = OPEN_EXISTING;
+
+ } else if (srv_read_only_mode) {
+
+ create_flag = OPEN_EXISTING;
+
} else if (create_mode == OS_FILE_CREATE) {
+
create_flag = CREATE_NEW;
+
} else if (create_mode == OS_FILE_OVERWRITE) {
+
create_flag = CREATE_ALWAYS;
+
} else {
- create_flag = 0;
- ut_error;
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unknown file create mode (%lu) for file '%s'",
+ create_mode, name);
+
+ return((os_file_t) -1);
}
+ DWORD attributes = 0;
+
+#ifdef UNIV_HOTBACKUP
+ attributes |= FILE_FLAG_NO_BUFFERING;
+#else
if (purpose == OS_FILE_AIO) {
+
+#ifdef WIN_ASYNC_IO
/* If specified, use asynchronous (overlapped) io and no
buffering of writes in the OS */
- attributes = 0;
-#ifdef WIN_ASYNC_IO
+
if (srv_use_native_aio) {
- attributes = attributes | FILE_FLAG_OVERLAPPED;
+ attributes |= FILE_FLAG_OVERLAPPED;
}
-#endif
-#ifdef UNIV_NON_BUFFERED_IO
-# ifndef UNIV_HOTBACKUP
- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
- /* Do not use unbuffered i/o to log files because
- value 2 denotes that we do not flush the log at every
- commit, but only once per second */
- } else if (srv_win_file_flush_method
- == SRV_WIN_IO_UNBUFFERED) {
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
- }
-# else /* !UNIV_HOTBACKUP */
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_NON_BUFFERED_IO */
+#endif /* WIN_ASYNC_IO */
+
} else if (purpose == OS_FILE_NORMAL) {
- attributes = 0;
+ /* Use default setting. */
+ } else {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unknown purpose flag (%lu) while opening file '%s'",
+ purpose, name);
+
+ return((os_file_t)(-1));
+ }
+
#ifdef UNIV_NON_BUFFERED_IO
-# ifndef UNIV_HOTBACKUP
- if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
- /* Do not use unbuffered i/o to log files because
- value 2 denotes that we do not flush the log at every
- commit, but only once per second */
- } else if (srv_win_file_flush_method
- == SRV_WIN_IO_UNBUFFERED) {
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
- }
-# else /* !UNIV_HOTBACKUP */
- attributes = attributes | FILE_FLAG_NO_BUFFERING;
-# endif /* !UNIV_HOTBACKUP */
+ // TODO: Create a bug, this looks wrong. The flush log
+ // parameter is dynamic.
+ if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) {
+
+ /* Do not use unbuffered i/o for the log files because
+ value 2 denotes that we do not flush the log at every
+ commit, but only once per second */
+
+ } else if (srv_win_file_flush_method == SRV_WIN_IO_UNBUFFERED) {
+
+ attributes |= FILE_FLAG_NO_BUFFERING;
+ }
#endif /* UNIV_NON_BUFFERED_IO */
- } else {
- attributes = 0;
- ut_error;
+
+#endif /* UNIV_HOTBACKUP */
+ DWORD access = GENERIC_READ;
+
+ if (!srv_read_only_mode) {
+ access |= GENERIC_WRITE;
}
- file = CreateFile((LPCTSTR) name,
- GENERIC_READ | GENERIC_WRITE, /* read and write
- access */
- share_mode, /* File can be read also by other
- processes; we must give the read
- permission because of ibbackup. We do
- not give the write permission to
- others because if one would succeed to
- start 2 instances of mysqld on the
- SAME files, that could cause severe
- database corruption! When opening
- raw disk partitions, Microsoft manuals
- say that we must give also the write
- permission. */
- NULL, /* default security attributes */
- create_flag,
- attributes,
- NULL); /*!< no template file */
+ do {
+ /* Use default security attributes and no template file. */
+ file = CreateFile(
+ (LPCTSTR) name, access, share_mode, NULL,
+ create_flag, attributes, NULL);
- if (file == INVALID_HANDLE_VALUE) {
- const char* operation;
+ if (file == INVALID_HANDLE_VALUE) {
+ const char* operation;
- operation = create_mode == OS_FILE_CREATE ? "create" : "open";
+ operation = (create_mode == OS_FILE_CREATE
+ && !srv_read_only_mode)
+ ? "create" : "open";
- *success = FALSE;
+ *success = FALSE;
- if (on_error_no_exit) {
- retry = os_file_handle_error_no_exit(
- name, operation, on_error_silent);
+ if (on_error_no_exit) {
+ retry = os_file_handle_error_no_exit(
+ name, operation, on_error_silent);
+ } else {
+ retry = os_file_handle_error(name, operation);
+ }
} else {
- retry = os_file_handle_error(name, operation);
+ *success = TRUE;
+ retry = FALSE;
}
- if (retry) {
- goto try_again;
- }
- } else {
- *success = TRUE;
- }
+ } while (retry);
- return(file);
#else /* __WIN__ */
- os_file_t file;
int create_flag;
- ibool retry;
const char* mode_str = NULL;
on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
@@ -1567,28 +1674,36 @@ try_again:
create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT;
create_mode &= ~OS_FILE_ON_ERROR_SILENT;
- DBUG_EXECUTE_IF(
- "ib_create_table_fail_disk_full",
- *success = FALSE;
- errno = ENOSPC;
- return((os_file_t) -1);
- );
-try_again:
- ut_a(name);
-
- if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW
+ if (create_mode == OS_FILE_OPEN
+ || create_mode == OS_FILE_OPEN_RAW
|| create_mode == OS_FILE_OPEN_RETRY) {
+
+ mode_str = "OPEN";
+
+ create_flag = srv_read_only_mode ? O_RDONLY : O_RDWR;
+
+ } else if (srv_read_only_mode) {
+
mode_str = "OPEN";
- create_flag = O_RDWR;
+
+ create_flag = O_RDONLY;
+
} else if (create_mode == OS_FILE_CREATE) {
+
mode_str = "CREATE";
create_flag = O_RDWR | O_CREAT | O_EXCL;
+
} else if (create_mode == OS_FILE_OVERWRITE) {
+
mode_str = "OVERWRITE";
create_flag = O_RDWR | O_CREAT | O_TRUNC;
+
} else {
- create_flag = 0;
- ut_error;
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unknown file create mode (%lu) for file '%s'",
+ create_mode, name);
+
+ return((os_file_t) -1);
}
ut_a(type == OS_LOG_FILE || type == OS_DATA_FILE);
@@ -1598,69 +1713,75 @@ try_again:
/* We let O_SYNC only affect log files; note that we map O_DSYNC to
O_SYNC because the datasync options seemed to corrupt files in 2001
in both Linux and Solaris */
- if (type == OS_LOG_FILE
- && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
-# if 0
- fprintf(stderr, "Using O_SYNC for file %s\n", name);
-# endif
+ if (!srv_read_only_mode
+ && type == OS_LOG_FILE
+ && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
- create_flag = create_flag | O_SYNC;
+ create_flag |= O_SYNC;
}
#endif /* O_SYNC */
- file = open(name, create_flag, os_innodb_umask);
-
- if (file == -1) {
- const char* operation;
+ do {
+ file = ::open(name, create_flag, os_innodb_umask);
- operation = create_mode == OS_FILE_CREATE ? "create" : "open";
+ if (file == -1) {
+ const char* operation;
- *success = FALSE;
+ operation = (create_mode == OS_FILE_CREATE
+ && !srv_read_only_mode)
+ ? "create" : "open";
- if (on_error_no_exit) {
- retry = os_file_handle_error_no_exit(
- name, operation, on_error_silent);
- } else {
- retry = os_file_handle_error(name, operation);
- }
+ *success = FALSE;
- if (retry) {
- goto try_again;
+ if (on_error_no_exit) {
+ retry = os_file_handle_error_no_exit(
+ name, operation, on_error_silent);
+ } else {
+ retry = os_file_handle_error(name, operation);
+ }
} else {
- return(file /* -1 */);
+ *success = TRUE;
+ retry = false;
}
- }
- /* else */
- *success = TRUE;
+ } while (retry);
/* We disable OS caching (O_DIRECT) only on data files */
- if (type != OS_LOG_FILE
- && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
+
+ if (!srv_read_only_mode
+ && *success
+ && type != OS_LOG_FILE
+ && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
+ || srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) {
os_file_set_nocache(file, name, mode_str);
}
#ifdef USE_FILE_LOCK
- if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) {
+ if (!srv_read_only_mode
+ && *success
+ && create_mode != OS_FILE_OPEN_RAW
+ && os_file_lock(file, name)) {
if (create_mode == OS_FILE_OPEN_RETRY) {
- int i;
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Retrying to lock"
- " the first data file\n",
- stderr);
- for (i = 0; i < 100; i++) {
+
+ ut_a(!srv_read_only_mode);
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Retrying to lock the first data file");
+
+ for (int i = 0; i < 100; i++) {
os_thread_sleep(1000000);
+
if (!os_file_lock(file, name)) {
*success = TRUE;
return(file);
}
}
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Unable to open the first data file\n",
- stderr);
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Unable to open the first data file");
}
*success = FALSE;
@@ -1669,22 +1790,23 @@ try_again:
}
#endif /* USE_FILE_LOCK */
- return(file);
#endif /* __WIN__ */
+
+ return(file);
}
/***********************************************************************//**
Deletes a file if it exists. The file has to be closed before calling this.
@return TRUE if success */
UNIV_INTERN
-ibool
+bool
os_file_delete_if_exists(
/*=====================*/
const char* name) /*!< in: file path as a null-terminated
string */
{
#ifdef __WIN__
- BOOL ret;
+ bool ret;
ulint count = 0;
loop:
/* In Windows, deleting an .ibd file may fail if ibbackup is copying
@@ -1693,31 +1815,30 @@ loop:
ret = DeleteFile((LPCTSTR) name);
if (ret) {
- return(TRUE);
+ return(true);
}
- if (GetLastError() == ERROR_FILE_NOT_FOUND) {
+ DWORD lasterr = GetLastError();
+ if (lasterr == ERROR_FILE_NOT_FOUND
+ || lasterr == ERROR_PATH_NOT_FOUND) {
/* the file does not exist, this not an error */
- return(TRUE);
+ return(true);
}
count++;
if (count > 100 && 0 == (count % 10)) {
- fprintf(stderr,
- "InnoDB: Warning: cannot delete file %s\n"
- "InnoDB: Are you running ibbackup"
- " to back up the file?\n", name);
+ os_file_get_last_error(true); /* print error information */
- os_file_get_last_error(TRUE); /* print error information */
+ ib_logf(IB_LOG_LEVEL_WARN, "Delete of file %s failed.", name);
}
os_thread_sleep(1000000); /* sleep for a second */
if (count > 2000) {
- return(FALSE);
+ return(false);
}
goto loop;
@@ -1729,18 +1850,18 @@ loop:
if (ret != 0 && errno != ENOENT) {
os_file_handle_error_no_exit(name, "delete", FALSE);
- return(FALSE);
+ return(false);
}
- return(TRUE);
-#endif
+ return(true);
+#endif /* __WIN__ */
}
/***********************************************************************//**
Deletes a file. The file has to be closed before calling this.
@return TRUE if success */
UNIV_INTERN
-ibool
+bool
os_file_delete(
/*===========*/
const char* name) /*!< in: file path as a null-terminated
@@ -1756,32 +1877,32 @@ loop:
ret = DeleteFile((LPCTSTR) name);
if (ret) {
- return(TRUE);
+ return(true);
}
if (GetLastError() == ERROR_FILE_NOT_FOUND) {
/* If the file does not exist, we classify this as a 'mild'
error and return */
- return(FALSE);
+ return(false);
}
count++;
if (count > 100 && 0 == (count % 10)) {
+ os_file_get_last_error(true); /* print error information */
+
fprintf(stderr,
"InnoDB: Warning: cannot delete file %s\n"
"InnoDB: Are you running ibbackup"
" to back up the file?\n", name);
-
- os_file_get_last_error(TRUE); /* print error information */
}
os_thread_sleep(1000000); /* sleep for a second */
if (count > 2000) {
- return(FALSE);
+ return(false);
}
goto loop;
@@ -1793,10 +1914,10 @@ loop:
if (ret != 0) {
os_file_handle_error_no_exit(name, "delete", FALSE);
- return(FALSE);
+ return(false);
}
- return(TRUE);
+ return(true);
#endif
}
@@ -1813,6 +1934,19 @@ os_file_rename_func(
string */
const char* newpath)/*!< in: new file path */
{
+#ifdef UNIV_DEBUG
+ os_file_type_t type;
+ ibool exists;
+
+ /* New path must not exist. */
+ ut_ad(os_file_status(newpath, &exists, &type));
+ ut_ad(!exists);
+
+ /* Old path must exist. */
+ ut_ad(os_file_status(oldpath, &exists, &type));
+ ut_ad(exists);
+#endif /* UNIV_DEBUG */
+
#ifdef __WIN__
BOOL ret;
@@ -1837,7 +1971,7 @@ os_file_rename_func(
}
return(TRUE);
-#endif
+#endif /* __WIN__ */
}
/***********************************************************************//**
@@ -1877,7 +2011,7 @@ os_file_close_func(
}
return(TRUE);
-#endif
+#endif /* __WIN__ */
}
#ifdef UNIV_HOTBACKUP
@@ -1913,7 +2047,7 @@ os_file_close_no_error_handling(
}
return(TRUE);
-#endif
+#endif /* __WIN__ */
}
#endif /* UNIV_HOTBACKUP */
@@ -1942,7 +2076,7 @@ os_file_get_size(
return(offset);
#else
return((os_offset_t) lseek(file, 0, SEEK_END));
-#endif
+#endif /* __WIN__ */
}
/***********************************************************************//**
@@ -2175,10 +2309,7 @@ os_file_flush_func(
return(TRUE);
}
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: the OS said file flush did not succeed\n");
+ ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed");
os_file_handle_error(NULL, "flush");
@@ -2215,9 +2346,9 @@ os_file_pread(
offs = (off_t) offset;
if (sizeof(off_t) <= 4) {
- if (UNIV_UNLIKELY(offset != (os_offset_t) offs)) {
- fprintf(stderr,
- "InnoDB: Error: file read at offset > 4 GB\n");
+ if (offset != (os_offset_t) offs) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "File read at offset > 4 GB");
}
}
@@ -2318,16 +2449,16 @@ os_file_pwrite(
off_t offs;
ut_ad(n);
+ ut_ad(!srv_read_only_mode);
/* If off_t is > 4 bytes in size, then we assume we can pass a
64-bit address */
offs = (off_t) offset;
if (sizeof(off_t) <= 4) {
- if (UNIV_UNLIKELY(offset != (os_offset_t) offs)) {
- fprintf(stderr,
- "InnoDB: Error: file write"
- " at offset > 4 GB\n");
+ if (offset != (os_offset_t) offs) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "File write at offset > 4 GB.");
}
}
@@ -2402,7 +2533,7 @@ func_exit:
return(ret);
}
-#endif
+#endif /* !UNIV_HOTBACKUP */
}
#endif
@@ -2503,11 +2634,9 @@ try_again:
return(TRUE);
}
- fprintf(stderr,
- "InnoDB: Error: tried to read "ULINTPF" bytes at offset "
- UINT64PF"\n"
- "InnoDB: Was only able to read %ld.\n",
- n, offset, (lint) ret);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Tried to read "ULINTPF" bytes at offset " UINT64PF". "
+ "Was only able to read %ld.", n, offset, (lint) ret);
#endif /* __WIN__ */
#ifdef __WIN__
error_handling:
@@ -2525,7 +2654,7 @@ error_handling:
(ulong) GetLastError()
#else
(ulong) errno
-#endif
+#endif /* __WIN__ */
);
fflush(stderr);
@@ -2683,6 +2812,8 @@ os_file_write_func(
os_offset_t offset, /*!< in: file offset where to write */
ulint n) /*!< in: number of bytes to write */
{
+ ut_ad(!srv_read_only_mode);
+
#ifdef __WIN__
BOOL ret;
DWORD len;
@@ -2842,8 +2973,8 @@ retry:
(ulint) errno);
if (strerror(errno) != NULL) {
fprintf(stderr,
- "InnoDB: Error number %lu means '%s'.\n",
- (ulint) errno, strerror(errno));
+ "InnoDB: Error number %d means '%s'.\n",
+ errno, strerror(errno));
}
fprintf(stderr,
@@ -2866,15 +2997,15 @@ UNIV_INTERN
ibool
os_file_status(
/*===========*/
- const char* path, /*!< in: pathname of the file */
+ const char* path, /*!< in: pathname of the file */
ibool* exists, /*!< out: TRUE if file exists */
os_file_type_t* type) /*!< out: type of the file (if it exists) */
{
#ifdef __WIN__
int ret;
- struct _stat statinfo;
+ struct _stat64 statinfo;
- ret = _stat(path, &statinfo);
+ ret = _stat64(path, &statinfo);
if (ret && (errno == ENOENT || errno == ENOTDIR)) {
/* file does not exist */
*exists = FALSE;
@@ -2933,47 +3064,73 @@ os_file_status(
/*******************************************************************//**
This function returns information about the specified file
-@return TRUE if stat information found */
+@return DB_SUCCESS if all OK */
UNIV_INTERN
-ibool
+dberr_t
os_file_get_status(
/*===============*/
const char* path, /*!< in: pathname of the file */
- os_file_stat_t* stat_info) /*!< information of a file in a
+ os_file_stat_t* stat_info, /*!< information of a file in a
directory */
+ bool check_rw_perm) /*!< in: for testing whether the
+ file can be opened in RW mode */
{
-#ifdef __WIN__
int ret;
- struct _stat statinfo;
- ret = _stat(path, &statinfo);
+#ifdef __WIN__
+ struct _stat64 statinfo;
+
+ ret = _stat64(path, &statinfo);
+
if (ret && (errno == ENOENT || errno == ENOTDIR)) {
/* file does not exist */
- return(FALSE);
+ return(DB_NOT_FOUND);
+
} else if (ret) {
/* file exists, but stat call failed */
os_file_handle_error_no_exit(path, "stat", FALSE);
- return(FALSE);
- }
- if (_S_IFDIR & statinfo.st_mode) {
+ return(DB_FAIL);
+
+ } else if (_S_IFDIR & statinfo.st_mode) {
stat_info->type = OS_FILE_TYPE_DIR;
} else if (_S_IFREG & statinfo.st_mode) {
+
+ DWORD access = GENERIC_READ;
+
+ if (!srv_read_only_mode) {
+ access |= GENERIC_WRITE;
+ }
+
stat_info->type = OS_FILE_TYPE_FILE;
+
+ /* Check if we can open it in read-only mode. */
+
+ if (check_rw_perm) {
+ HANDLE fh;
+
+ fh = CreateFile(
+ (LPCTSTR) path, // File to open
+ access,
+ 0, // No sharing
+ NULL, // Default security
+ OPEN_EXISTING, // Existing file only
+ FILE_ATTRIBUTE_NORMAL, // Normal file
+ NULL); // No attr. template
+
+ if (fh == INVALID_HANDLE_VALUE) {
+ stat_info->rw_perm = false;
+ } else {
+ stat_info->rw_perm = true;
+ CloseHandle(fh);
+ }
+ }
} else {
stat_info->type = OS_FILE_TYPE_UNKNOWN;
}
-
- stat_info->ctime = statinfo.st_ctime;
- stat_info->atime = statinfo.st_atime;
- stat_info->mtime = statinfo.st_mtime;
- stat_info->size = statinfo.st_size;
-
- return(TRUE);
#else
- int ret;
struct stat statinfo;
ret = stat(path, &statinfo);
@@ -2981,32 +3138,49 @@ os_file_get_status(
if (ret && (errno == ENOENT || errno == ENOTDIR)) {
/* file does not exist */
- return(FALSE);
+ return(DB_NOT_FOUND);
+
} else if (ret) {
/* file exists, but stat call failed */
os_file_handle_error_no_exit(path, "stat", FALSE);
- return(FALSE);
- }
+ return(DB_FAIL);
- if (S_ISDIR(statinfo.st_mode)) {
+ } else if (S_ISDIR(statinfo.st_mode)) {
stat_info->type = OS_FILE_TYPE_DIR;
} else if (S_ISLNK(statinfo.st_mode)) {
stat_info->type = OS_FILE_TYPE_LINK;
} else if (S_ISREG(statinfo.st_mode)) {
stat_info->type = OS_FILE_TYPE_FILE;
+
+ if (check_rw_perm) {
+ int fh;
+ int access;
+
+ access = !srv_read_only_mode ? O_RDWR : O_RDONLY;
+
+ fh = ::open(path, access, os_innodb_umask);
+
+ if (fh == -1) {
+ stat_info->rw_perm = false;
+ } else {
+ stat_info->rw_perm = true;
+ close(fh);
+ }
+ }
} else {
stat_info->type = OS_FILE_TYPE_UNKNOWN;
}
+#endif /* _WIN_ */
+
stat_info->ctime = statinfo.st_ctime;
stat_info->atime = statinfo.st_atime;
stat_info->mtime = statinfo.st_mtime;
- stat_info->size = statinfo.st_size;
+ stat_info->size = statinfo.st_size;
- return(TRUE);
-#endif
+ return(DB_SUCCESS);
}
/* path name separator character */
@@ -3017,6 +3191,153 @@ os_file_get_status(
#endif
/****************************************************************//**
+This function returns a new path name after replacing the basename
+in an old path with a new basename. The old_path is a full path
+name including the extension. The tablename is in the normal
+form "databasename/tablename". The new base name is found after
+the forward slash. Both input strings are null terminated.
+
+This function allocates memory to be returned. It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return own: new full pathname */
+UNIV_INTERN
+char*
+os_file_make_new_pathname(
+/*======================*/
+ const char* old_path, /*!< in: pathname */
+ const char* tablename) /*!< in: contains new base name */
+{
+ ulint dir_len;
+ char* last_slash;
+ char* base_name;
+ char* new_path;
+ ulint new_path_len;
+
+ /* Split the tablename into its database and table name components.
+ They are separated by a '/'. */
+ last_slash = strrchr((char*) tablename, '/');
+ base_name = last_slash ? last_slash + 1 : (char*) tablename;
+
+ /* Find the offset of the last slash. We will strip off the
+ old basename.ibd which starts after that slash. */
+ last_slash = strrchr((char*) old_path, OS_FILE_PATH_SEPARATOR);
+ dir_len = last_slash ? last_slash - old_path : strlen(old_path);
+
+ /* allocate a new path and move the old directory path to it. */
+ new_path_len = dir_len + strlen(base_name) + sizeof "/.ibd";
+ new_path = static_cast<char*>(mem_alloc(new_path_len));
+ memcpy(new_path, old_path, dir_len);
+
+ ut_snprintf(new_path + dir_len,
+ new_path_len - dir_len,
+ "%c%s.ibd",
+ OS_FILE_PATH_SEPARATOR,
+ base_name);
+
+ return(new_path);
+}
+
+/****************************************************************//**
+This function returns a remote path name by combining a data directory
+path provided in a DATA DIRECTORY clause with the tablename which is
+in the form 'database/tablename'. It strips the file basename (which
+is the tablename) found after the last directory in the path provided.
+The full filepath created will include the database name as a directory
+under the path provided. The filename is the tablename with the '.ibd'
+extension. All input and output strings are null-terminated.
+
+This function allocates memory to be returned. It is the callers
+responsibility to free the return value after it is no longer needed.
+
+@return own: A full pathname; data_dir_path/databasename/tablename.ibd */
+UNIV_INTERN
+char*
+os_file_make_remote_pathname(
+/*=========================*/
+ const char* data_dir_path, /*!< in: pathname */
+ const char* tablename, /*!< in: tablename */
+ const char* extention) /*!< in: file extention; ibd,cfg */
+{
+ ulint data_dir_len;
+ char* last_slash;
+ char* new_path;
+ ulint new_path_len;
+
+ ut_ad(extention && strlen(extention) == 3);
+
+ /* Find the offset of the last slash. We will strip off the
+ old basename or tablename which starts after that slash. */
+ last_slash = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
+ data_dir_len = last_slash ? last_slash - data_dir_path : strlen(data_dir_path);
+
+ /* allocate a new path and move the old directory path to it. */
+ new_path_len = data_dir_len + strlen(tablename)
+ + sizeof "/." + strlen(extention);
+ new_path = static_cast<char*>(mem_alloc(new_path_len));
+ memcpy(new_path, data_dir_path, data_dir_len);
+ ut_snprintf(new_path + data_dir_len,
+ new_path_len - data_dir_len,
+ "%c%s.%s",
+ OS_FILE_PATH_SEPARATOR,
+ tablename,
+ extention);
+
+ srv_normalize_path_for_win(new_path);
+
+ return(new_path);
+}
+
+/****************************************************************//**
+This function reduces a null-terminated full remote path name into
+the path that is sent by MySQL for DATA DIRECTORY clause. It replaces
+the 'databasename/tablename.ibd' found at the end of the path with just
+'tablename'.
+
+Since the result is always smaller than the path sent in, no new memory
+is allocated. The caller should allocate memory for the path sent in.
+This function manipulates that path in place.
+
+If the path format is not as expected, just return. The result is used
+to inform a SHOW CREATE TABLE command. */
+UNIV_INTERN
+void
+os_file_make_data_dir_path(
+/*========================*/
+ char* data_dir_path) /*!< in/out: full path/data_dir_path */
+{
+ char* ptr;
+ char* tablename;
+ ulint tablename_len;
+
+ /* Replace the period before the extension with a null byte. */
+ ptr = strrchr((char*) data_dir_path, '.');
+ if (!ptr) {
+ return;
+ }
+ ptr[0] = '\0';
+
+ /* The tablename starts after the last slash. */
+ ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
+ if (!ptr) {
+ return;
+ }
+ ptr[0] = '\0';
+ tablename = ptr + 1;
+
+ /* The databasename starts after the next to last slash. */
+ ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
+ if (!ptr) {
+ return;
+ }
+ tablename_len = ut_strlen(tablename);
+
+ ut_memmove(++ptr, tablename, tablename_len);
+
+ ptr[tablename_len] = '\0';
+}
+
+/****************************************************************//**
The function os_file_dirname returns a directory component of a
null-terminated pathname string. In the usual case, dirname returns
the string up to, but not including, the final '/', and basename
@@ -3080,11 +3401,18 @@ os_file_create_subdirs_if_needed(
/*=============================*/
const char* path) /*!< in: path name */
{
- char* subdir;
- ibool success, subdir_exists;
- os_file_type_t type;
+ if (srv_read_only_mode) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "read only mode set. Can't create subdirectories '%s'",
+ path);
+
+ return(FALSE);
+
+ }
+
+ char* subdir = os_file_dirname(path);
- subdir = os_file_dirname(path);
if (strlen(subdir) == 1
&& (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
/* subdir is root or cwd, nothing to do */
@@ -3094,15 +3422,21 @@ os_file_create_subdirs_if_needed(
}
/* Test if subdir exists */
- success = os_file_status(subdir, &subdir_exists, &type);
+ os_file_type_t type;
+ ibool subdir_exists;
+ ibool success = os_file_status(subdir, &subdir_exists, &type);
+
if (success && !subdir_exists) {
+
/* subdir does not exist, create it */
success = os_file_create_subdirs_if_needed(subdir);
+
if (!success) {
mem_free(subdir);
return(FALSE);
}
+
success = os_file_create_directory(subdir, FALSE);
}
@@ -3124,7 +3458,7 @@ os_aio_array_get_nth_slot(
{
ut_a(index < array->n_slots);
- return((array->slots) + index);
+ return(&array->slots[index]);
}
#if defined(LINUX_NATIVE_AIO)
@@ -3226,43 +3560,74 @@ os_aio_native_aio_supported(void)
/*=============================*/
{
int fd;
- byte* buf;
- byte* ptr;
- struct io_event io_event;
io_context_t io_ctx;
- struct iocb iocb;
- struct iocb* p_iocb;
- int err;
+ char name[1000];
if (!os_aio_linux_create_io_ctx(1, &io_ctx)) {
/* The platform does not support native aio. */
return(FALSE);
- }
+ } else if (!srv_read_only_mode) {
+ /* Now check if tmpdir supports native aio ops. */
+ fd = innobase_mysql_tmpfile();
- /* Now check if tmpdir supports native aio ops. */
- fd = innobase_mysql_tmpfile();
+ if (fd < 0) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Unable to create temp file to check "
+ "native AIO support.");
- if (fd < 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: unable to create "
- "temp file to check native AIO support.\n");
+ return(FALSE);
+ }
+ } else {
- return(FALSE);
+ srv_normalize_path_for_win(srv_log_group_home_dir);
+
+ ulint dirnamelen = strlen(srv_log_group_home_dir);
+ ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
+ memcpy(name, srv_log_group_home_dir, dirnamelen);
+
+ /* Add a path separator if needed. */
+ if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
+ name[dirnamelen++] = SRV_PATH_SEPARATOR;
+ }
+
+ strcpy(name + dirnamelen, "ib_logfile0");
+
+ fd = ::open(name, O_RDONLY);
+
+ if (fd == -1) {
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Unable to open \"%s\" to check "
+ "native AIO read support.", name);
+
+ return(FALSE);
+ }
}
+ struct io_event io_event;
+
memset(&io_event, 0x0, sizeof(io_event));
- buf = static_cast<byte*>(ut_malloc(UNIV_PAGE_SIZE * 2));
- ptr = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+ byte* buf = static_cast<byte*>(ut_malloc(UNIV_PAGE_SIZE * 2));
+ byte* ptr = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
+
+ struct iocb iocb;
/* Suppress valgrind warning. */
memset(buf, 0x00, UNIV_PAGE_SIZE * 2);
-
memset(&iocb, 0x0, sizeof(iocb));
- p_iocb = &iocb;
- io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0);
- err = io_submit(io_ctx, 1, &p_iocb);
+ struct iocb* p_iocb = &iocb;
+
+ if (!srv_read_only_mode) {
+ io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0);
+ } else {
+ ut_a(UNIV_PAGE_SIZE >= 512);
+ io_prep_pread(p_iocb, fd, ptr, 512, 0);
+ }
+
+ int err = io_submit(io_ctx, 1, &p_iocb);
+
if (err >= 1) {
/* Now collect the submitted IO request. */
err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
@@ -3277,22 +3642,18 @@ os_aio_native_aio_supported(void)
case -EINVAL:
case -ENOSYS:
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Linux Native AIO is not"
- " supported on tmpdir.\n"
- "InnoDB: You can either move tmpdir to a"
- " file system that supports native AIO\n"
- "InnoDB: or you can set"
- " innodb_use_native_aio to FALSE to avoid"
- " this message.\n");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Linux Native AIO not supported. You can either "
+ "move %s to a file system that supports native "
+ "AIO or you can set innodb_use_native_aio to "
+ "FALSE to avoid this message.",
+ srv_read_only_mode ? name : "tmpdir");
/* fall through. */
default:
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Linux Native AIO check"
- " on tmpdir returned error[%d]\n", -err);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Linux Native AIO check on %s returned error[%d]",
+ srv_read_only_mode ? name : "tmpdir", -err);
}
return(FALSE);
@@ -3314,34 +3675,33 @@ os_aio_array_create(
ulint n_segments) /*!< in: number of segments in the aio array */
{
os_aio_array_t* array;
- ulint i;
- os_aio_slot_t* slot;
#ifdef WIN_ASYNC_IO
OVERLAPPED* over;
#elif defined(LINUX_NATIVE_AIO)
struct io_event* io_event = NULL;
-#endif
+#endif /* WIN_ASYNC_IO */
ut_a(n > 0);
ut_a(n_segments > 0);
- array = static_cast<os_aio_array_t*>(ut_malloc(sizeof(os_aio_array_t)));
+ array = static_cast<os_aio_array_t*>(ut_malloc(sizeof(*array)));
+ memset(array, 0x0, sizeof(*array));
- array->mutex = os_mutex_create();
- array->not_full = os_event_create(NULL);
- array->is_empty = os_event_create(NULL);
+ array->mutex = os_mutex_create();
+ array->not_full = os_event_create();
+ array->is_empty = os_event_create();
os_event_set(array->is_empty);
- array->n_slots = n;
- array->n_segments = n_segments;
- array->n_reserved = 0;
- array->cur_seg = 0;
+ array->n_slots = n;
+ array->n_segments = n_segments;
array->slots = static_cast<os_aio_slot_t*>(
- ut_malloc(n * sizeof(os_aio_slot_t)));
+ ut_malloc(n * sizeof(*array->slots)));
+
+ memset(array->slots, 0x0, sizeof(n * sizeof(*array->slots)));
#ifdef __WIN__
array->handles = static_cast<HANDLE*>(ut_malloc(n * sizeof(HANDLE)));
-#endif
+#endif /* __WIN__ */
#if defined(LINUX_NATIVE_AIO)
array->aio_ctx = NULL;
@@ -3359,16 +3719,27 @@ os_aio_array_create(
array->aio_ctx = static_cast<io_context**>(
ut_malloc(n_segments * sizeof(*array->aio_ctx)));
- for (i = 0; i < n_segments; ++i) {
+ for (ulint i = 0; i < n_segments; ++i) {
if (!os_aio_linux_create_io_ctx(n/n_segments,
&array->aio_ctx[i])) {
/* If something bad happened during aio setup
- we should call it a day and return right away.
- We don't care about any leaks because a failure
- to initialize the io subsystem means that the
- server (or atleast the innodb storage engine)
- is not going to startup. */
- return(NULL);
+ we disable linux native aio.
+ The disadvantage will be a small memory leak
+ at shutdown but that's ok compared to a crash
+ or a not working server.
+ This frequently happens when running the test suite
+ with many threads on a system with low fs.aio-max-nr!
+ */
+
+ fprintf(stderr,
+ " InnoDB: Warning: Linux Native AIO disabled "
+ "because os_aio_linux_create_io_ctx() "
+ "failed. To get rid of this warning you can "
+ "try increasing system "
+ "fs.aio-max-nr to 1048576 or larger or "
+ "setting innodb_use_native_aio = 0 in my.cnf\n");
+ srv_use_native_aio = FALSE;
+ goto skip_native_aio;
}
}
@@ -3381,7 +3752,9 @@ os_aio_array_create(
skip_native_aio:
#endif /* LINUX_NATIVE_AIO */
- for (i = 0; i < n; i++) {
+ for (ulint i = 0; i < n; i++) {
+ os_aio_slot_t* slot;
+
slot = os_aio_array_get_nth_slot(array, i);
slot->pos = i;
@@ -3389,18 +3762,17 @@ skip_native_aio:
#ifdef WIN_ASYNC_IO
slot->handle = CreateEvent(NULL,TRUE, FALSE, NULL);
- over = &(slot->control);
+ over = &slot->control;
over->hEvent = slot->handle;
- *((array->handles) + i) = over->hEvent;
+ array->handles[i] = over->hEvent;
#elif defined(LINUX_NATIVE_AIO)
-
memset(&slot->control, 0x0, sizeof(slot->control));
slot->n_bytes = 0;
slot->ret = 0;
-#endif
+#endif /* WIN_ASYNC_IO */
}
return(array);
@@ -3412,7 +3784,7 @@ static
void
os_aio_array_free(
/*==============*/
- os_aio_array_t* array) /*!< in, own: array to free */
+ os_aio_array_t*& array) /*!< in, own: array to free */
{
#ifdef WIN_ASYNC_IO
ulint i;
@@ -3439,6 +3811,8 @@ os_aio_array_free(
ut_free(array->slots);
ut_free(array);
+
+ array = 0;
}
/***********************************************************************
@@ -3459,93 +3833,100 @@ os_aio_init(
ulint n_slots_sync) /*<! in: number of slots in the sync aio
array */
{
- ulint i;
- ulint n_segments = 2 + n_read_segs + n_write_segs;
-
- ut_ad(n_segments >= 4);
-
os_io_init_simple();
#if defined(LINUX_NATIVE_AIO)
/* Check if native aio is supported on this system and tmpfs */
- if (srv_use_native_aio
- && !os_aio_native_aio_supported()) {
+ if (srv_use_native_aio && !os_aio_native_aio_supported()) {
+
+ ib_logf(IB_LOG_LEVEL_WARN, "Linux Native AIO disabled.");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: Linux Native AIO"
- " disabled.\n");
srv_use_native_aio = FALSE;
}
#endif /* LINUX_NATIVE_AIO */
- for (i = 0; i < n_segments; i++) {
- srv_set_io_thread_op_info(i, "not started yet");
- }
-
+ srv_reset_io_thread_op_info();
- /* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
+ os_aio_read_array = os_aio_array_create(
+ n_read_segs * n_per_seg, n_read_segs);
- os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
- if (os_aio_ibuf_array == NULL) {
- goto err_exit;
+ if (os_aio_read_array == NULL) {
+ return(FALSE);
}
- srv_io_thread_function[0] = "insert buffer thread";
+ ulint start = (srv_read_only_mode) ? 0 : 2;
+ ulint n_segs = n_read_segs + start;
- os_aio_log_array = os_aio_array_create(n_per_seg, 1);
- if (os_aio_log_array == NULL) {
- goto err_exit;
+ /* 0 is the ibuf segment and 1 is the insert buffer segment. */
+ for (ulint i = start; i < n_segs; ++i) {
+ ut_a(i < SRV_MAX_N_IO_THREADS);
+ srv_io_thread_function[i] = "read thread";
}
- srv_io_thread_function[1] = "log thread";
+ ulint n_segments = n_read_segs;
- os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
- n_read_segs);
- if (os_aio_read_array == NULL) {
- goto err_exit;
- }
+ if (!srv_read_only_mode) {
- for (i = 2; i < 2 + n_read_segs; i++) {
- ut_a(i < SRV_MAX_N_IO_THREADS);
- srv_io_thread_function[i] = "read thread";
- }
+ os_aio_log_array = os_aio_array_create(n_per_seg, 1);
- os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
- n_write_segs);
- if (os_aio_write_array == NULL) {
- goto err_exit;
- }
+ if (os_aio_log_array == NULL) {
+ return(FALSE);
+ }
- for (i = 2 + n_read_segs; i < n_segments; i++) {
- ut_a(i < SRV_MAX_N_IO_THREADS);
- srv_io_thread_function[i] = "write thread";
+ ++n_segments;
+
+ srv_io_thread_function[1] = "log thread";
+
+ os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
+
+ if (os_aio_ibuf_array == NULL) {
+ return(FALSE);
+ }
+
+ ++n_segments;
+
+ srv_io_thread_function[0] = "insert buffer thread";
+
+ os_aio_write_array = os_aio_array_create(
+ n_write_segs * n_per_seg, n_write_segs);
+
+ if (os_aio_write_array == NULL) {
+ return(FALSE);
+ }
+
+ n_segments += n_write_segs;
+
+ for (ulint i = start + n_read_segs; i < n_segments; ++i) {
+ ut_a(i < SRV_MAX_N_IO_THREADS);
+ srv_io_thread_function[i] = "write thread";
+ }
+
+ ut_ad(n_segments >= 4);
+ } else {
+ ut_ad(n_segments > 0);
}
os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
+
if (os_aio_sync_array == NULL) {
- goto err_exit;
+ return(FALSE);
}
-
os_aio_n_segments = n_segments;
os_aio_validate();
- os_aio_segment_wait_events = static_cast<os_event_struct_t**>(
- ut_malloc(n_segments * sizeof(void*)));
+ os_aio_segment_wait_events = static_cast<os_event_t*>(
+ ut_malloc(n_segments * sizeof *os_aio_segment_wait_events));
- for (i = 0; i < n_segments; i++) {
- os_aio_segment_wait_events[i] = os_event_create(NULL);
+ for (ulint i = 0; i < n_segments; ++i) {
+ os_aio_segment_wait_events[i] = os_event_create();
}
- os_last_printout = time(NULL);
+ os_last_printout = ut_time();
return(TRUE);
-err_exit:
- return(FALSE);
-
}
/***********************************************************************
@@ -3555,20 +3936,25 @@ void
os_aio_free(void)
/*=============*/
{
- ulint i;
+ if (os_aio_ibuf_array != 0) {
+ os_aio_array_free(os_aio_ibuf_array);
+ }
+
+ if (os_aio_log_array != 0) {
+ os_aio_array_free(os_aio_log_array);
+ }
+
+ if (os_aio_write_array != 0) {
+ os_aio_array_free(os_aio_write_array);
+ }
+
+ if (os_aio_sync_array != 0) {
+ os_aio_array_free(os_aio_sync_array);
+ }
- os_aio_array_free(os_aio_ibuf_array);
- os_aio_ibuf_array = NULL;
- os_aio_array_free(os_aio_log_array);
- os_aio_log_array = NULL;
os_aio_array_free(os_aio_read_array);
- os_aio_read_array = NULL;
- os_aio_array_free(os_aio_write_array);
- os_aio_write_array = NULL;
- os_aio_array_free(os_aio_sync_array);
- os_aio_sync_array = NULL;
- for (i = 0; i < os_aio_n_segments; i++) {
+ for (ulint i = 0; i < os_aio_n_segments; i++) {
os_event_free(os_aio_segment_wait_events[i]);
}
@@ -3604,14 +3990,20 @@ void
os_aio_wake_all_threads_at_shutdown(void)
/*=====================================*/
{
- ulint i;
-
#ifdef WIN_ASYNC_IO
/* This code wakes up all ai/o threads in Windows native aio */
os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
- os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
+ if (os_aio_write_array != 0) {
+ os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
+ }
+
+ if (os_aio_ibuf_array != 0) {
+ os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
+ }
+
+ if (os_aio_log_array != 0) {
+ os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
+ }
#elif defined(LINUX_NATIVE_AIO)
@@ -3623,12 +4015,14 @@ os_aio_wake_all_threads_at_shutdown(void)
if (srv_use_native_aio) {
return;
}
+
/* Fall through to simulated AIO handler wakeup if we are
not using native AIO. */
-#endif
+#endif /* !WIN_ASYNC_AIO */
+
/* This loop wakes up all simulated ai/o threads */
- for (i = 0; i < os_aio_n_segments; i++) {
+ for (ulint i = 0; i < os_aio_n_segments; i++) {
os_event_set(os_aio_segment_wait_events[i]);
}
@@ -3642,6 +4036,7 @@ void
os_aio_wait_until_no_pending_writes(void)
/*=====================================*/
{
+ ut_ad(!srv_read_only_mode);
os_event_wait(os_aio_write_array->is_empty);
}
@@ -3660,10 +4055,14 @@ os_aio_get_segment_no_from_slot(
ulint seg_len;
if (array == os_aio_ibuf_array) {
- segment = 0;
+ ut_ad(!srv_read_only_mode);
+
+ segment = IO_IBUF_SEGMENT;
} else if (array == os_aio_log_array) {
- segment = 1;
+ ut_ad(!srv_read_only_mode);
+
+ segment = IO_LOG_SEGMENT;
} else if (array == os_aio_read_array) {
seg_len = os_aio_read_array->n_slots
@@ -3671,7 +4070,9 @@ os_aio_get_segment_no_from_slot(
segment = 2 + slot->pos / seg_len;
} else {
+ ut_ad(!srv_read_only_mode);
ut_a(array == os_aio_write_array);
+
seg_len = os_aio_write_array->n_slots
/ os_aio_write_array->n_segments;
@@ -3692,15 +4093,19 @@ os_aio_get_array_and_local_segment(
os_aio_array_t** array, /*!< out: aio wait array */
ulint global_segment)/*!< in: global segment number */
{
- ulint segment;
+ ulint segment;
ut_a(global_segment < os_aio_n_segments);
- if (global_segment == 0) {
+ if (srv_read_only_mode) {
+ *array = os_aio_read_array;
+
+ return(global_segment);
+ } else if (global_segment == IO_IBUF_SEGMENT) {
*array = os_aio_ibuf_array;
segment = 0;
- } else if (global_segment == 1) {
+ } else if (global_segment == IO_LOG_SEGMENT) {
*array = os_aio_log_array;
segment = 0;
@@ -3748,7 +4153,7 @@ os_aio_array_reserve_slot(
struct iocb* iocb;
off_t aio_offset;
-#endif
+#endif /* WIN_ASYNC_IO */
ulint i;
ulint counter;
ulint slots_per_seg;
@@ -3756,7 +4161,7 @@ os_aio_array_reserve_slot(
#ifdef WIN_ASYNC_IO
ut_a((len & 0xFFFFFFFFUL) == len);
-#endif
+#endif /* WIN_ASYNC_IO */
/* No need of a mutex. Only reading constant fields */
slots_per_seg = array->n_slots / array->n_segments;
@@ -3789,9 +4194,11 @@ loop:
local segment and do a full scan of the array. We are
guaranteed to find a slot in full scan. */
for (i = local_seg * slots_per_seg, counter = 0;
- counter < array->n_slots; i++, counter++) {
+ counter < array->n_slots;
+ i++, counter++) {
i %= array->n_slots;
+
slot = os_aio_array_get_nth_slot(array, i);
if (slot->reserved == FALSE) {
@@ -3815,7 +4222,7 @@ found:
}
slot->reserved = TRUE;
- slot->reservation_time = time(NULL);
+ slot->reservation_time = ut_time();
slot->message1 = message1;
slot->message2 = message2;
slot->file = file;
@@ -3827,7 +4234,7 @@ found:
slot->io_already_done = FALSE;
#ifdef WIN_ASYNC_IO
- control = &(slot->control);
+ control = &slot->control;
control->Offset = (DWORD) offset & 0xFFFFFFFF;
control->OffsetHigh = (DWORD) (offset >> 32);
ResetEvent(slot->handle);
@@ -3858,7 +4265,6 @@ found:
iocb->data = (void*) slot;
slot->n_bytes = 0;
slot->ret = 0;
- /*fprintf(stderr, "Filled up Linux native iocb.\n");*/
skip_native_aio:
#endif /* LINUX_NATIVE_AIO */
@@ -3876,9 +4282,6 @@ os_aio_array_free_slot(
os_aio_array_t* array, /*!< in: aio array */
os_aio_slot_t* slot) /*!< in: pointer to slot */
{
- ut_ad(array);
- ut_ad(slot);
-
os_mutex_enter(array->mutex);
ut_ad(slot->reserved);
@@ -3927,36 +4330,42 @@ os_aio_simulated_wake_handler_thread(
arrays */
{
os_aio_array_t* array;
- os_aio_slot_t* slot;
ulint segment;
- ulint n;
- ulint i;
ut_ad(!srv_use_native_aio);
segment = os_aio_get_array_and_local_segment(&array, global_segment);
- n = array->n_slots / array->n_segments;
+ ulint n = array->n_slots / array->n_segments;
+
+ segment *= n;
/* Look through n slots after the segment * n'th slot */
os_mutex_enter(array->mutex);
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
+ for (ulint i = 0; i < n; ++i) {
+ const os_aio_slot_t* slot;
+
+ slot = os_aio_array_get_nth_slot(array, segment + i);
if (slot->reserved) {
+
/* Found an i/o request */
- break;
+ os_mutex_exit(array->mutex);
+
+ os_event_t event;
+
+ event = os_aio_segment_wait_events[global_segment];
+
+ os_event_set(event);
+
+ return;
}
}
os_mutex_exit(array->mutex);
-
- if (i < n) {
- os_event_set(os_aio_segment_wait_events[global_segment]);
- }
}
/**********************************************************************//**
@@ -3966,8 +4375,6 @@ void
os_aio_simulated_wake_handler_threads(void)
/*=======================================*/
{
- ulint i;
-
if (srv_use_native_aio) {
/* We do not use simulated aio: do nothing */
@@ -3976,7 +4383,7 @@ os_aio_simulated_wake_handler_threads(void)
os_aio_recommend_sleep_for_read_threads = FALSE;
- for (i = 0; i < os_aio_n_segments; i++) {
+ for (ulint i = 0; i < os_aio_n_segments; i++) {
os_aio_simulated_wake_handler_thread(i);
}
}
@@ -3998,7 +4405,6 @@ background threads too eagerly to allow for coalescing during
readahead requests. */
#ifdef __WIN__
os_aio_array_t* array;
- ulint g;
if (srv_use_native_aio) {
/* We do not use simulated aio: do nothing */
@@ -4008,12 +4414,12 @@ readahead requests. */
os_aio_recommend_sleep_for_read_threads = TRUE;
- for (g = 0; g < os_aio_n_segments; g++) {
- os_aio_get_array_and_local_segment(&array, g);
+ for (ulint i = 0; i < os_aio_n_segments; i++) {
+ os_aio_get_array_and_local_segment(&array, i);
if (array == os_aio_read_array) {
- os_event_reset(os_aio_segment_wait_events[g]);
+ os_event_reset(os_aio_segment_wait_events[i]);
}
}
#endif /* __WIN__ */
@@ -4111,11 +4517,10 @@ os_aio_func(
ibool retval;
BOOL ret = TRUE;
DWORD len = (DWORD) n;
- struct fil_node_struct * dummy_mess1;
+ struct fil_node_t* dummy_mess1;
void* dummy_mess2;
ulint dummy_type;
#endif /* WIN_ASYNC_IO */
- ibool retry;
ulint wake_later;
ut_ad(file);
@@ -4153,6 +4558,7 @@ os_aio_func(
return(os_file_read_func(file, buf, offset, n));
}
+ ut_ad(!srv_read_only_mode);
ut_a(type == OS_FILE_WRITE);
return(os_file_write_func(name, file, buf, offset, n));
@@ -4161,9 +4567,12 @@ os_aio_func(
try_again:
switch (mode) {
case OS_AIO_NORMAL:
- array = (type == OS_FILE_READ)
- ? os_aio_read_array
- : os_aio_write_array;
+ if (type == OS_FILE_READ) {
+ array = os_aio_read_array;
+ } else {
+ ut_ad(!srv_read_only_mode);
+ array = os_aio_write_array;
+ }
break;
case OS_AIO_IBUF:
ut_ad(type == OS_FILE_READ);
@@ -4172,14 +4581,21 @@ try_again:
wake_later = FALSE;
- array = os_aio_ibuf_array;
+ if (srv_read_only_mode) {
+ array = os_aio_read_array;
+ } else {
+ array = os_aio_ibuf_array;
+ }
break;
case OS_AIO_LOG:
- array = os_aio_log_array;
+ if (srv_read_only_mode) {
+ array = os_aio_read_array;
+ } else {
+ array = os_aio_log_array;
+ }
break;
case OS_AIO_SYNC:
array = os_aio_sync_array;
-
#if defined(LINUX_NATIVE_AIO)
/* In Linux native AIO we don't use sync IO array. */
ut_a(!srv_use_native_aio);
@@ -4204,7 +4620,7 @@ try_again:
if (!os_aio_linux_dispatch(array, slot)) {
goto err_exit;
}
-#endif
+#endif /* WIN_ASYNC_IO */
} else {
if (!wake_later) {
os_aio_simulated_wake_handler_thread(
@@ -4213,6 +4629,7 @@ try_again:
}
}
} else if (type == OS_FILE_WRITE) {
+ ut_ad(!srv_read_only_mode);
if (srv_use_native_aio) {
os_n_file_writes++;
#ifdef WIN_ASYNC_IO
@@ -4223,7 +4640,7 @@ try_again:
if (!os_aio_linux_dispatch(array, slot)) {
goto err_exit;
}
-#endif
+#endif /* WIN_ASYNC_IO */
} else {
if (!wake_later) {
os_aio_simulated_wake_handler_thread(
@@ -4247,11 +4664,10 @@ try_again:
we must use the same wait mechanism as for
async i/o */
- retval = os_aio_windows_handle(ULINT_UNDEFINED,
- slot->pos,
- &dummy_mess1,
- &dummy_mess2,
- &dummy_type);
+ retval = os_aio_windows_handle(
+ ULINT_UNDEFINED, slot->pos,
+ &dummy_mess1, &dummy_mess2,
+ &dummy_type);
return(retval);
}
@@ -4270,10 +4686,8 @@ err_exit:
#endif /* LINUX_NATIVE_AIO || WIN_ASYNC_IO */
os_aio_array_free_slot(array, slot);
- retry = os_file_handle_error(name,
- type == OS_FILE_READ
- ? "aio read" : "aio write");
- if (retry) {
+ if (os_file_handle_error(
+ name,type == OS_FILE_READ ? "aio read" : "aio write")) {
goto try_again;
}
@@ -4323,8 +4737,8 @@ os_aio_windows_handle(
BOOL retry = FALSE;
if (segment == ULINT_UNDEFINED) {
- array = os_aio_sync_array;
segment = 0;
+ array = os_aio_sync_array;
} else {
segment = os_aio_get_array_and_local_segment(&array, segment);
}
@@ -4338,16 +4752,21 @@ os_aio_windows_handle(
n = array->n_slots / array->n_segments;
if (array == os_aio_sync_array) {
+
WaitForSingleObject(
os_aio_array_get_nth_slot(array, pos)->handle,
INFINITE);
+
i = pos;
+
} else {
- srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
- i = WaitForMultipleObjects((DWORD) n,
- array->handles + segment * n,
- FALSE,
- INFINITE);
+ if (orig_seg != ULINT_UNDEFINED) {
+ srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
+ }
+
+ i = WaitForMultipleObjects(
+ (DWORD) n, array->handles + segment * n,
+ FALSE, INFINITE);
}
os_mutex_enter(array->mutex);
@@ -4367,8 +4786,8 @@ os_aio_windows_handle(
ut_a(slot->reserved);
if (orig_seg != ULINT_UNDEFINED) {
- srv_set_io_thread_op_info(orig_seg,
- "get windows aio return value");
+ srv_set_io_thread_op_info(
+ orig_seg, "get windows aio return value");
}
ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE);
@@ -4671,7 +5090,7 @@ found:
*type = slot->type;
- if ((slot->ret == 0) && (slot->n_bytes == (long) slot->len)) {
+ if (slot->ret == 0 && slot->n_bytes == (long) slot->len) {
ret = TRUE;
} else {
@@ -4720,8 +5139,6 @@ os_aio_simulated_handle(
{
os_aio_array_t* array;
ulint segment;
- os_aio_slot_t* slot;
- os_aio_slot_t* slot2;
os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
ulint n_consecutive;
ulint total_len;
@@ -4734,7 +5151,7 @@ os_aio_simulated_handle(
ibool ret;
ibool any_reserved;
ulint n;
- ulint i;
+ os_aio_slot_t* aio_slot;
/* Fix compiler warning */
*consecutive_ios = NULL;
@@ -4772,7 +5189,9 @@ restart:
os_mutex_enter(array->mutex);
- for (i = 0; i < n; i++) {
+ for (ulint i = 0; i < n; i++) {
+ os_aio_slot_t* slot;
+
slot = os_aio_array_get_nth_slot(array, i + segment * n);
if (!slot->reserved) {
@@ -4786,8 +5205,8 @@ restart:
(ulong) i);
}
+ aio_slot = slot;
ret = TRUE;
-
goto slot_io_done;
} else {
any_reserved = TRUE;
@@ -4797,9 +5216,7 @@ restart:
/* There is no completed request.
If there is no pending request at all,
and the system is being shut down, exit. */
- if (UNIV_UNLIKELY
- (!any_reserved
- && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) {
+ if (!any_reserved && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
os_mutex_exit(array->mutex);
*message1 = NULL;
*message2 = NULL;
@@ -4815,12 +5232,15 @@ restart:
biggest_age = 0;
lowest_offset = IB_UINT64_MAX;
- for (i = 0; i < n; i++) {
+ for (ulint i = 0; i < n; i++) {
+ os_aio_slot_t* slot;
+
slot = os_aio_array_get_nth_slot(array, i + segment * n);
if (slot->reserved) {
- age = (ulint) difftime(time(NULL),
- slot->reservation_time);
+
+ age = (ulint) difftime(
+ ut_time(), slot->reservation_time);
if ((age >= 2 && age > biggest_age)
|| (age >= 2 && age == biggest_age
@@ -4844,9 +5264,11 @@ restart:
lowest_offset = IB_UINT64_MAX;
- for (i = 0; i < n; i++) {
- slot = os_aio_array_get_nth_slot(array,
- i + segment * n);
+ for (ulint i = 0; i < n; i++) {
+ os_aio_slot_t* slot;
+
+ slot = os_aio_array_get_nth_slot(
+ array, i + segment * n);
if (slot->reserved && slot->offset < lowest_offset) {
@@ -4872,25 +5294,28 @@ restart:
ut_ad(n_consecutive != 0);
ut_ad(consecutive_ios[0] != NULL);
- slot = consecutive_ios[0];
+ aio_slot = consecutive_ios[0];
/* Check if there are several consecutive blocks to read or write */
consecutive_loop:
- for (i = 0; i < n; i++) {
- slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
+ for (ulint i = 0; i < n; i++) {
+ os_aio_slot_t* slot;
+
+ slot = os_aio_array_get_nth_slot(array, i + segment * n);
- if (slot2->reserved && slot2 != slot
- && slot2->offset == slot->offset + slot->len
- && slot2->type == slot->type
- && slot2->file == slot->file) {
+ if (slot->reserved
+ && slot != aio_slot
+ && slot->offset == slot->offset + aio_slot->len
+ && slot->type == aio_slot->type
+ && slot->file == aio_slot->file) {
/* Found a consecutive i/o request */
- consecutive_ios[n_consecutive] = slot2;
+ consecutive_ios[n_consecutive] = slot;
n_consecutive++;
- slot = slot2;
+ aio_slot = slot;
if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
@@ -4908,15 +5333,15 @@ consecutive_loop:
i/o */
total_len = 0;
- slot = consecutive_ios[0];
+ aio_slot = consecutive_ios[0];
- for (i = 0; i < n_consecutive; i++) {
+ for (ulint i = 0; i < n_consecutive; i++) {
total_len += consecutive_ios[i]->len;
}
if (n_consecutive == 1) {
/* We can use the buffer of the i/o request */
- combined_buf = slot->buf;
+ combined_buf = aio_slot->buf;
combined_buf2 = NULL;
} else {
combined_buf2 = static_cast<byte*>(
@@ -4934,50 +5359,41 @@ consecutive_loop:
os_mutex_exit(array->mutex);
- if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
+ if (aio_slot->type == OS_FILE_WRITE && n_consecutive > 1) {
/* Copy the buffers to the combined buffer */
offs = 0;
- for (i = 0; i < n_consecutive; i++) {
+ for (ulint i = 0; i < n_consecutive; i++) {
ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
consecutive_ios[i]->len);
+
offs += consecutive_ios[i]->len;
}
}
srv_set_io_thread_op_info(global_segment, "doing file i/o");
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: doing i/o of type %lu at offset " UINT64PF
- ", length %lu\n",
- (ulong) slot->type, slot->offset, (ulong) total_len);
- }
-
/* Do the i/o with ordinary, synchronous i/o functions: */
- if (slot->type == OS_FILE_WRITE) {
- ret = os_file_write(slot->name, slot->file, combined_buf,
- slot->offset, total_len);
+ if (aio_slot->type == OS_FILE_WRITE) {
+ ut_ad(!srv_read_only_mode);
+ ret = os_file_write(
+ aio_slot->name, aio_slot->file, combined_buf,
+ aio_slot->offset, total_len);
} else {
- ret = os_file_read(slot->file, combined_buf,
- slot->offset, total_len);
+ ret = os_file_read(
+ aio_slot->file, combined_buf,
+ aio_slot->offset, total_len);
}
ut_a(ret);
srv_set_io_thread_op_info(global_segment, "file i/o done");
-#if 0
- fprintf(stderr,
- "aio: %lu consecutive %lu:th segment, first offs %lu blocks\n",
- n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE);
-#endif
-
- if (slot->type == OS_FILE_READ && n_consecutive > 1) {
+ if (aio_slot->type == OS_FILE_READ && n_consecutive > 1) {
/* Copy the combined buffer to individual buffers */
offs = 0;
- for (i = 0; i < n_consecutive; i++) {
+ for (ulint i = 0; i < n_consecutive; i++) {
ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
consecutive_ios[i]->len);
@@ -4993,7 +5409,7 @@ consecutive_loop:
/* Mark the i/os done in slots */
- for (i = 0; i < n_consecutive; i++) {
+ for (ulint i = 0; i < n_consecutive; i++) {
consecutive_ios[i]->io_already_done = TRUE;
}
@@ -5003,16 +5419,16 @@ consecutive_loop:
slot_io_done:
- ut_a(slot->reserved);
+ ut_a(aio_slot->reserved);
- *message1 = slot->message1;
- *message2 = slot->message2;
+ *message1 = aio_slot->message1;
+ *message2 = aio_slot->message2;
- *type = slot->type;
+ *type = aio_slot->type;
os_mutex_exit(array->mutex);
- os_aio_array_free_slot(array, slot);
+ os_aio_array_free_slot(array, aio_slot);
return(ret);
@@ -5031,30 +5447,20 @@ recommended_sleep:
os_event_wait(os_aio_segment_wait_events[global_segment]);
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: i/o handler thread for i/o"
- " segment %lu wakes up\n",
- (ulong) global_segment);
- }
-
goto restart;
}
/**********************************************************************//**
Validates the consistency of an aio array.
-@return TRUE if ok */
+@return true if ok */
static
-ibool
+bool
os_aio_array_validate(
/*==================*/
os_aio_array_t* array) /*!< in: aio wait array */
{
- os_aio_slot_t* slot;
- ulint n_reserved = 0;
ulint i;
-
- ut_a(array);
+ ulint n_reserved = 0;
os_mutex_enter(array->mutex);
@@ -5062,6 +5468,8 @@ os_aio_array_validate(
ut_a(array->n_segments > 0);
for (i = 0; i < array->n_slots; i++) {
+ os_aio_slot_t* slot;
+
slot = os_aio_array_get_nth_slot(array, i);
if (slot->reserved) {
@@ -5074,7 +5482,7 @@ os_aio_array_validate(
os_mutex_exit(array->mutex);
- return(TRUE);
+ return(true);
}
/**********************************************************************//**
@@ -5086,10 +5494,22 @@ os_aio_validate(void)
/*=================*/
{
os_aio_array_validate(os_aio_read_array);
- os_aio_array_validate(os_aio_write_array);
- os_aio_array_validate(os_aio_ibuf_array);
- os_aio_array_validate(os_aio_log_array);
- os_aio_array_validate(os_aio_sync_array);
+
+ if (os_aio_write_array != 0) {
+ os_aio_array_validate(os_aio_write_array);
+ }
+
+ if (os_aio_ibuf_array != 0) {
+ os_aio_array_validate(os_aio_ibuf_array);
+ }
+
+ if (os_aio_log_array != 0) {
+ os_aio_array_validate(os_aio_log_array);
+ }
+
+ if (os_aio_sync_array != 0) {
+ os_aio_array_validate(os_aio_sync_array);
+ }
return(TRUE);
}
@@ -5129,65 +5549,36 @@ os_aio_print_segment_info(
}
/**********************************************************************//**
-Prints info of the aio arrays. */
+Prints info about the aio array. */
UNIV_INTERN
void
-os_aio_print(
-/*=========*/
- FILE* file) /*!< in: file where to print */
+os_aio_print_array(
+/*==============*/
+ FILE* file, /*!< in: file where to print */
+ os_aio_array_t* array) /*!< in: aio array to print */
{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- ulint n_reserved;
- ulint n_res_seg[SRV_MAX_N_IO_THREADS];
- time_t current_time;
- double time_elapsed;
- double avg_bytes_read;
- ulint i;
-
- for (i = 0; i < srv_n_file_io_threads; i++) {
- fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i,
- srv_io_thread_op_info[i],
- srv_io_thread_function[i]);
-
-#ifndef __WIN__
- if (os_aio_segment_wait_events[i]->is_set) {
- fprintf(file, " ev set");
- }
-#endif
-
- fprintf(file, "\n");
- }
-
- fputs("Pending normal aio reads:", file);
-
- array = os_aio_read_array;
-loop:
- ut_a(array);
+ ulint n_reserved = 0;
+ ulint n_res_seg[SRV_MAX_N_IO_THREADS];
os_mutex_enter(array->mutex);
ut_a(array->n_slots > 0);
ut_a(array->n_segments > 0);
- n_reserved = 0;
-
memset(n_res_seg, 0x0, sizeof(n_res_seg));
- for (i = 0; i < array->n_slots; i++) {
- ulint seg_no;
+ for (ulint i = 0; i < array->n_slots; ++i) {
+ os_aio_slot_t* slot;
+ ulint seg_no;
slot = os_aio_array_get_nth_slot(array, i);
seg_no = (i * array->n_segments) / array->n_slots;
+
if (slot->reserved) {
- n_reserved++;
- n_res_seg[seg_no]++;
-#if 0
- fprintf(stderr, "Reserved slot, messages %p %p\n",
- (void*) slot->message1,
- (void*) slot->message2);
-#endif
+ ++n_reserved;
+ ++n_res_seg[seg_no];
+
ut_a(slot->len > 0);
}
}
@@ -5199,38 +5590,61 @@ loop:
os_aio_print_segment_info(file, n_res_seg, array);
os_mutex_exit(array->mutex);
+}
- if (array == os_aio_read_array) {
- fputs(", aio writes:", file);
+/**********************************************************************//**
+Prints info of the aio arrays. */
+UNIV_INTERN
+void
+os_aio_print(
+/*=========*/
+ FILE* file) /*!< in: file where to print */
+{
+ time_t current_time;
+ double time_elapsed;
+ double avg_bytes_read;
- array = os_aio_write_array;
+ for (ulint i = 0; i < srv_n_file_io_threads; ++i) {
+ fprintf(file, "I/O thread %lu state: %s (%s)",
+ (ulong) i,
+ srv_io_thread_op_info[i],
+ srv_io_thread_function[i]);
- goto loop;
+#ifndef __WIN__
+ if (os_aio_segment_wait_events[i]->is_set) {
+ fprintf(file, " ev set");
+ }
+#endif /* __WIN__ */
+
+ fprintf(file, "\n");
}
- if (array == os_aio_write_array) {
- fputs(",\n ibuf aio reads:", file);
- array = os_aio_ibuf_array;
+ fputs("Pending normal aio reads:", file);
- goto loop;
+ os_aio_print_array(file, os_aio_read_array);
+
+ if (os_aio_write_array != 0) {
+ fputs(", aio writes:", file);
+ os_aio_print_array(file, os_aio_write_array);
}
- if (array == os_aio_ibuf_array) {
- fputs(", log i/o's:", file);
- array = os_aio_log_array;
+ if (os_aio_ibuf_array != 0) {
+ fputs(",\n ibuf aio reads:", file);
+ os_aio_print_array(file, os_aio_ibuf_array);
+ }
- goto loop;
+ if (os_aio_log_array != 0) {
+ fputs(", log i/o's:", file);
+ os_aio_print_array(file, os_aio_log_array);
}
- if (array == os_aio_log_array) {
+ if (os_aio_sync_array != 0) {
fputs(", sync i/o's:", file);
- array = os_aio_sync_array;
-
- goto loop;
+ os_aio_print_array(file, os_aio_sync_array);
}
putc('\n', file);
- current_time = time(NULL);
+ current_time = ut_time();
time_elapsed = 0.001 + difftime(current_time, os_last_printout);
fprintf(file,
@@ -5238,7 +5652,8 @@ loop:
"%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
(ulong) fil_n_pending_log_flushes,
(ulong) fil_n_pending_tablespace_flushes,
- (ulong) os_n_file_reads, (ulong) os_n_file_writes,
+ (ulong) os_n_file_reads,
+ (ulong) os_n_file_writes,
(ulong) os_n_fsyncs);
if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
@@ -5310,21 +5725,29 @@ os_aio_all_slots_free(void)
os_mutex_exit(array->mutex);
- array = os_aio_write_array;
+ if (!srv_read_only_mode) {
+ ut_a(os_aio_write_array == 0);
- os_mutex_enter(array->mutex);
+ array = os_aio_write_array;
- n_res += array->n_reserved;
+ os_mutex_enter(array->mutex);
- os_mutex_exit(array->mutex);
+ n_res += array->n_reserved;
- array = os_aio_ibuf_array;
+ os_mutex_exit(array->mutex);
- os_mutex_enter(array->mutex);
+ ut_a(os_aio_ibuf_array == 0);
- n_res += array->n_reserved;
+ array = os_aio_ibuf_array;
- os_mutex_exit(array->mutex);
+ os_mutex_enter(array->mutex);
+
+ n_res += array->n_reserved;
+
+ os_mutex_exit(array->mutex);
+ }
+
+ ut_a(os_aio_log_array == 0);
array = os_aio_log_array;
diff --git a/storage/innobase/os/os0sync.cc b/storage/innobase/os/os0sync.cc
index c2e2e7e477f..392dbe0d7a7 100644
--- a/storage/innobase/os/os0sync.cc
+++ b/storage/innobase/os/os0sync.cc
@@ -38,7 +38,7 @@ Created 9/6/1995 Heikki Tuuri
#include "srv0srv.h"
/* Type definition for an operating system mutex struct */
-struct os_mutex_struct{
+struct os_mutex_t{
os_event_t event; /*!< Used by sync0arr.cc for queing threads */
void* handle; /*!< OS handle to mutex */
ulint count; /*!< we use this counter to check
@@ -47,12 +47,12 @@ struct os_mutex_struct{
do not assume that the OS mutex
supports recursive locking, though
NT seems to do that */
- UT_LIST_NODE_T(os_mutex_str_t) os_mutex_list;
+ UT_LIST_NODE_T(os_mutex_t) os_mutex_list;
/* list of all 'slow' OS mutexes created */
};
/** Mutex protecting counts and the lists of OS mutexes and events */
-UNIV_INTERN os_mutex_t os_sync_mutex;
+UNIV_INTERN os_ib_mutex_t os_sync_mutex;
/** TRUE if os_sync_mutex has been initialized */
static ibool os_sync_mutex_inited = FALSE;
/** TRUE when os_sync_free() is being executed */
@@ -63,10 +63,10 @@ os_thread_exit */
UNIV_INTERN ulint os_thread_count = 0;
/** The list of all events created */
-static UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list;
+static UT_LIST_BASE_NODE_T(os_event) os_event_list;
/** The list of all OS 'slow' mutexes */
-static UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list;
+static UT_LIST_BASE_NODE_T(os_mutex_t) os_mutex_list;
UNIV_INTERN ulint os_event_count = 0;
UNIV_INTERN ulint os_mutex_count = 0;
@@ -329,7 +329,7 @@ os_sync_free(void)
/*==============*/
{
os_event_t event;
- os_mutex_t mutex;
+ os_ib_mutex_t mutex;
os_sync_free_called = TRUE;
event = UT_LIST_GET_FIRST(os_event_list);
@@ -365,10 +365,8 @@ must be reset explicitly by calling sync_os_reset_event.
@return the event handle */
UNIV_INTERN
os_event_t
-os_event_create(
-/*============*/
- const char* name) /*!< in: the name of the event, if NULL
- the event is created without a name */
+os_event_create(void)
+/*==================*/
{
os_event_t event;
@@ -377,10 +375,7 @@ os_event_create(
event = static_cast<os_event_t>(ut_malloc(sizeof(*event)));
- event->handle = CreateEvent(NULL,
- TRUE,
- FALSE,
- (LPCTSTR) name);
+ event->handle = CreateEvent(NULL, TRUE, FALSE, NULL);
if (!event->handle) {
fprintf(stderr,
"InnoDB: Could not create a Windows event"
@@ -390,10 +385,7 @@ os_event_create(
} else /* Windows with condition variables */
#endif
{
- UT_NOT_USED(name);
-
- event = static_cast<os_event_struct_t*>(
- ut_malloc(sizeof(struct os_event_struct)));
+ event = static_cast<os_event_t>(ut_malloc(sizeof *event));
#ifndef PFS_SKIP_EVENT_MUTEX
os_fast_mutex_init(event_os_mutex_key, &event->os_mutex);
@@ -739,27 +731,26 @@ os_event_wait_time_low(
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
+mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
@return the mutex handle */
UNIV_INTERN
-os_mutex_t
+os_ib_mutex_t
os_mutex_create(void)
/*=================*/
{
os_fast_mutex_t* mutex;
- os_mutex_t mutex_str;
+ os_ib_mutex_t mutex_str;
mutex = static_cast<os_fast_mutex_t*>(
ut_malloc(sizeof(os_fast_mutex_t)));
os_fast_mutex_init(os_mutex_key, mutex);
- mutex_str = static_cast<os_mutex_t>(
- ut_malloc(sizeof(os_mutex_str_t)));
+ mutex_str = static_cast<os_ib_mutex_t>(ut_malloc(sizeof *mutex_str));
mutex_str->handle = mutex;
mutex_str->count = 0;
- mutex_str->event = os_event_create(NULL);
+ mutex_str->event = os_event_create();
if (UNIV_LIKELY(os_sync_mutex_inited)) {
/* When creating os_sync_mutex itself we cannot reserve it */
@@ -783,7 +774,7 @@ UNIV_INTERN
void
os_mutex_enter(
/*===========*/
- os_mutex_t mutex) /*!< in: mutex to acquire */
+ os_ib_mutex_t mutex) /*!< in: mutex to acquire */
{
os_fast_mutex_lock(static_cast<os_fast_mutex_t*>(mutex->handle));
@@ -798,7 +789,7 @@ UNIV_INTERN
void
os_mutex_exit(
/*==========*/
- os_mutex_t mutex) /*!< in: mutex to release */
+ os_ib_mutex_t mutex) /*!< in: mutex to release */
{
ut_a(mutex);
@@ -814,7 +805,7 @@ UNIV_INTERN
void
os_mutex_free(
/*==========*/
- os_mutex_t mutex) /*!< in: mutex to free */
+ os_ib_mutex_t mutex) /*!< in: mutex to free */
{
ut_a(mutex);
diff --git a/storage/innobase/os/os0thread.cc b/storage/innobase/os/os0thread.cc
index 48ee61e9402..9cc09a847b1 100644
--- a/storage/innobase/os/os0thread.cc
+++ b/storage/innobase/os/os0thread.cc
@@ -132,8 +132,10 @@ os_thread_create_func(
if (thread_id) {
*thread_id = win_thread_id;
}
-
- return(thread);
+ if (thread) {
+ CloseHandle(thread);
+ }
+ return((os_thread_t)win_thread_id);
#else
int ret;
os_thread_t pthread;
diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc
index 9046338f377..f416d38cc35 100644
--- a/storage/innobase/page/page0cur.cc
+++ b/storage/innobase/page/page0cur.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -973,6 +974,9 @@ page_cur_insert_rec_low(
page = page_align(current_rec);
ut_ad(dict_table_is_comp(index->table)
== (ibool) !!page_is_comp(page));
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+ == index->id || recv_recovery_is_on() || mtr->inside_ibuf);
ut_ad(!page_rec_is_supremum(current_rec));
@@ -1007,8 +1011,8 @@ page_cur_insert_rec_low(
rec_offs_init(foffsets_);
- foffsets = rec_get_offsets(free_rec, index, foffsets,
- ULINT_UNDEFINED, &heap);
+ foffsets = rec_get_offsets(
+ free_rec, index, foffsets, ULINT_UNDEFINED, &heap);
if (rec_offs_size(foffsets) < rec_size) {
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
@@ -1167,14 +1171,27 @@ page_cur_insert_rec_zip_reorg(
buf_block_t* block, /*!< in: buffer block */
dict_index_t* index, /*!< in: record descriptor */
rec_t* rec, /*!< in: inserted record */
+ ulint rec_size,/*!< in: size of the inserted record */
page_t* page, /*!< in: uncompressed page */
page_zip_des_t* page_zip,/*!< in: compressed page */
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
{
ulint pos;
+ /* Make a local copy as the values can change dynamically. */
+ bool log_compressed = page_log_compressed_pages;
+ ulint level = page_compression_level;
+
/* Recompress or reorganize and recompress the page. */
- if (page_zip_compress(page_zip, page, index, mtr)) {
+ if (page_zip_compress(page_zip, page, index, level,
+ log_compressed ? mtr : NULL)) {
+ if (!log_compressed) {
+ page_cur_insert_rec_write_log(
+ rec, rec_size, *current_rec, index, mtr);
+ page_zip_compress_write_log_no_data(
+ level, page, index, mtr);
+ }
+
return(rec);
}
@@ -1246,6 +1263,9 @@ page_cur_insert_rec_zip(
page = page_align(*current_rec);
ut_ad(dict_table_is_comp(index->table));
ut_ad(page_is_comp(page));
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+ == index->id || mtr->inside_ibuf || recv_recovery_is_on());
ut_ad(!page_rec_is_supremum(*current_rec));
#ifdef UNIV_ZIP_DEBUG
@@ -1281,10 +1301,27 @@ page_cur_insert_rec_zip(
index, rec, offsets,
NULL);
- if (UNIV_LIKELY(insert_rec != NULL)) {
+ /* If recovery is on, this implies that the compression
+ of the page was successful during runtime. Had that not
+ been the case or had the redo logging of compressed
+ pages been enabled during runtime then we'd have seen
+ a MLOG_ZIP_PAGE_COMPRESS redo record. Therefore, we
+ know that we don't need to reorganize the page. We,
+ however, do need to recompress the page. That will
+ happen when the next redo record is read which must
+ be of type MLOG_ZIP_PAGE_COMPRESS_NO_DATA and it must
+ contain a valid compression level value.
+ This implies that during recovery from this point till
+ the next redo is applied the uncompressed and
+ compressed versions are not identical and
+ page_zip_validate will fail but that is OK because
+ we call page_zip_validate only after processing
+ all changes to a page under a single mtr during
+ recovery. */
+ if (insert_rec != NULL && !recv_recovery_is_on()) {
insert_rec = page_cur_insert_rec_zip_reorg(
current_rec, block, index, insert_rec,
- page, page_zip, mtr);
+ rec_size, page, page_zip, mtr);
#ifdef UNIV_DEBUG
if (insert_rec) {
rec_offs_make_valid(
@@ -1781,9 +1818,9 @@ UNIV_INLINE
void
page_cur_delete_rec_write_log(
/*==========================*/
- rec_t* rec, /*!< in: record to be deleted */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mini-transaction handle */
+ rec_t* rec, /*!< in: record to be deleted */
+ const dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
byte* log_ptr;
@@ -1865,10 +1902,11 @@ UNIV_INTERN
void
page_cur_delete_rec(
/*================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- dict_index_t* index, /*!< in: record descriptor */
- const ulint* offsets,/*!< in: rec_get_offsets(cursor->rec, index) */
- mtr_t* mtr) /*!< in: mini-transaction handle */
+ page_cur_t* cursor, /*!< in/out: a page cursor */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const ulint* offsets,/*!< in: rec_get_offsets(
+ cursor->rec, index) */
+ mtr_t* mtr) /*!< in: mini-transaction handle */
{
page_dir_slot_t* cur_dir_slot;
page_dir_slot_t* prev_slot;
@@ -1881,8 +1919,6 @@ page_cur_delete_rec(
ulint cur_n_owned;
rec_t* rec;
- ut_ad(cursor && mtr);
-
page = page_cur_get_page(cursor);
page_zip = page_cur_get_page_zip(cursor);
@@ -1897,17 +1933,23 @@ page_cur_delete_rec(
current_rec = cursor->rec;
ut_ad(rec_offs_validate(current_rec, index, offsets));
ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+ == index->id || mtr->inside_ibuf || recv_recovery_is_on());
/* The record must not be the supremum or infimum record. */
ut_ad(page_rec_is_user_rec(current_rec));
/* Save to local variables some data associated with current_rec */
cur_slot_no = page_dir_find_owner_slot(current_rec);
+ ut_ad(cur_slot_no > 0);
cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot);
/* 0. Write the log record */
- page_cur_delete_rec_write_log(current_rec, index, mtr);
+ if (mtr != 0) {
+ page_cur_delete_rec_write_log(current_rec, index, mtr);
+ }
/* 1. Reset the last insert info in the page header and increment
the modify clock for the frame */
@@ -1915,9 +1957,13 @@ page_cur_delete_rec(
page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
/* The page gets invalid for optimistic searches: increment the
- frame modify clock */
+ frame modify clock only if there is an mini-transaction covering
+ the change. During IMPORT we allocate local blocks that are not
+ part of the buffer pool. */
- buf_block_modify_clock_inc(page_cur_get_block(cursor));
+ if (mtr != 0) {
+ buf_block_modify_clock_inc(page_cur_get_block(cursor));
+ }
/* 2. Find the next and the previous record. Note that the cursor is
left at the next record. */
@@ -1961,14 +2007,15 @@ page_cur_delete_rec(
page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
/* 6. Free the memory occupied by the record */
- btr_blob_dbg_remove_rec(current_rec, index, offsets, "delete");
+ btr_blob_dbg_remove_rec(current_rec, const_cast<dict_index_t*>(index),
+ offsets, "delete");
page_mem_free(page, page_zip, current_rec, index, offsets);
/* 7. Now we have decremented the number of owned records of the slot.
If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
slots. */
- if (UNIV_UNLIKELY(cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED)) {
+ if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) {
page_dir_balance_slot(page, page_zip, cur_slot_no);
}
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
index deef6935f08..6b7b8424856 100644
--- a/storage/innobase/page/page0page.cc
+++ b/storage/innobase/page/page0page.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -512,7 +513,8 @@ page_create_zip(
page = page_create_low(block, TRUE);
mach_write_to_2(page + PAGE_HEADER + PAGE_LEVEL, level);
- if (UNIV_UNLIKELY(!page_zip_compress(page_zip, page, index, mtr))) {
+ if (!page_zip_compress(page_zip, page, index,
+ page_compression_level, mtr)) {
/* The compression of a newly created page
should always succeed. */
ut_error;
@@ -658,7 +660,11 @@ page_copy_rec_list_end(
if (new_page_zip) {
mtr_set_log_mode(mtr, log_mode);
- if (!page_zip_compress(new_page_zip, new_page, index, mtr)) {
+ if (!page_zip_compress(new_page_zip,
+ new_page,
+ index,
+ page_compression_level,
+ mtr)) {
/* Before trying to reorganize the page,
store the number of preceding records on the page. */
ulint ret_pos
@@ -781,8 +787,9 @@ page_copy_rec_list_start(
DBUG_EXECUTE_IF("page_copy_rec_list_start_compress_fail",
goto zip_reorganize;);
- if (UNIV_UNLIKELY
- (!page_zip_compress(new_page_zip, new_page, index, mtr))) {
+ if (!page_zip_compress(new_page_zip, new_page, index,
+ page_compression_level, mtr)) {
+
ulint ret_pos;
#ifndef DBUG_OFF
zip_reorganize:
@@ -793,8 +800,8 @@ zip_reorganize:
/* Before copying, "ret" was the predecessor
of the predefined supremum record. If it was
the predefined infimum record, then it would
- still be the infimum. Thus, the assertion
- ut_a(ret_pos > 0) would fail here. */
+ still be the infimum, and we would have
+ ret_pos == 0. */
if (UNIV_UNLIKELY
(!page_zip_reorganize(new_block, index, mtr))) {
@@ -1049,6 +1056,7 @@ page_delete_rec_list_end(
n_owned = rec_get_n_owned_new(rec2) - count;
slot_index = page_dir_find_owner_slot(rec2);
+ ut_ad(slot_index > 0);
slot = page_dir_get_nth_slot(page, slot_index);
} else {
rec_t* rec2 = rec;
@@ -1064,6 +1072,7 @@ page_delete_rec_list_end(
n_owned = rec_get_n_owned_old(rec2) - count;
slot_index = page_dir_find_owner_slot(rec2);
+ ut_ad(slot_index > 0);
slot = page_dir_get_nth_slot(page, slot_index);
}
@@ -1470,6 +1479,10 @@ page_rec_get_nth_const(
ulint n_owned;
const rec_t* rec;
+ if (nth == 0) {
+ return(page_get_infimum_rec(page));
+ }
+
ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
for (i = 0;; i++) {
@@ -2313,6 +2326,20 @@ page_validate(
}
}
+ if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)
+ && page_get_n_recs(page) > 0) {
+ trx_id_t max_trx_id = page_get_max_trx_id(page);
+ trx_id_t sys_max_trx_id = trx_sys_get_max_trx_id();
+
+ if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "PAGE_MAX_TRX_ID out of bounds: "
+ TRX_ID_FMT ", " TRX_ID_FMT,
+ max_trx_id, sys_max_trx_id);
+ goto func_exit2;
+ }
+ }
+
heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
/* The following buffer is used to check that the
@@ -2602,3 +2629,60 @@ page_find_rec_with_heap_no(
}
}
#endif /* !UNIV_HOTBACKUP */
+
+/*******************************************************//**
+Removes the record from a leaf page. This function does not log
+any changes. It is used by the IMPORT tablespace functions.
+The cursor is moved to the next record after the deleted one.
+@return true if success, i.e., the page did not become too empty */
+UNIV_INTERN
+bool
+page_delete_rec(
+/*============*/
+ const dict_index_t* index, /*!< in: The index that the record
+ belongs to */
+ page_cur_t* pcur, /*!< in/out: page cursor on record
+ to delete */
+ page_zip_des_t* page_zip,/*!< in: compressed page descriptor */
+ const ulint* offsets)/*!< in: offsets for record */
+{
+ bool no_compress_needed;
+ buf_block_t* block = pcur->block;
+ page_t* page = buf_block_get_frame(block);
+
+ ut_ad(page_is_leaf(page));
+
+ if (!rec_offs_any_extern(offsets)
+ && ((page_get_data_size(page) - rec_offs_size(offsets)
+ < BTR_CUR_PAGE_COMPRESS_LIMIT)
+ || (mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
+ && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)
+ || (page_get_n_recs(page) < 2))) {
+
+ ulint root_page_no = dict_index_get_page(index);
+
+ /* The page fillfactor will drop below a predefined
+ minimum value, OR the level in the B-tree contains just
+ one page, OR the page will become empty: we recommend
+ compression if this is not the root page. */
+
+ no_compress_needed = page_get_page_no(page) == root_page_no;
+ } else {
+ no_compress_needed = true;
+ }
+
+ if (no_compress_needed) {
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ page_cur_delete_rec(pcur, index, offsets, 0);
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+ }
+
+ return(no_compress_needed);
+}
+
diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc
index 35a8f458fb2..dee37580002 100644
--- a/storage/innobase/page/page0zip.cc
+++ b/storage/innobase/page/page0zip.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 2005, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +24,9 @@ Compressed page interface
Created June 2005 by Marko Makela
*******************************************************/
+#include <map>
+using namespace std;
+
#define THIS_MODULE
#include "page0zip.h"
#ifdef UNIV_NONINL
@@ -54,9 +58,23 @@ Created June 2005 by Marko Makela
#ifndef UNIV_HOTBACKUP
/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
-UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
+/** Statistics on compression, indexed by index->id */
+UNIV_INTERN page_zip_stat_per_index_t page_zip_stat_per_index;
+/** Mutex protecting page_zip_stat_per_index */
+UNIV_INTERN ib_mutex_t page_zip_stat_per_index_mutex;
+#ifdef HAVE_PSI_INTERFACE
+UNIV_INTERN mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
+#endif /* HAVE_PSI_INTERFACE */
#endif /* !UNIV_HOTBACKUP */
+/* Compression level to be used by zlib. Settable by user. */
+UNIV_INTERN ulint page_compression_level = 6;
+
+/* Whether or not to log compressed page images to avoid possible
+compression algorithm changes in zlib. */
+UNIV_INTERN bool page_log_compressed_pages = true;
+
/* Please refer to ../include/page0zip.ic for a description of the
compressed page format. */
@@ -386,7 +404,7 @@ page_zip_get_n_prev_extern(
compressed page */
const rec_t* rec, /*!< in: compact physical record
on a B-tree leaf page */
- dict_index_t* index) /*!< in: record descriptor */
+ const dict_index_t* index) /*!< in: record descriptor */
{
const page_t* page = page_align(rec);
ulint n_ext = 0;
@@ -1181,6 +1199,7 @@ page_zip_compress(
m_start, m_end, m_nonempty */
const page_t* page, /*!< in: uncompressed page */
dict_index_t* index, /*!< in: index of the B-tree node */
+ ulint level, /*!< in: commpression level */
mtr_t* mtr) /*!< in: mini-transaction, or NULL */
{
z_stream c_stream;
@@ -1194,7 +1213,6 @@ page_zip_compress(
const rec_t** recs; /*!< dense page directory, sorted by address */
mem_heap_t* heap;
ulint trx_id_col;
- ulint* offsets = NULL;
ulint n_blobs = 0;
byte* storage;/* storage of uncompressed columns */
#ifndef UNIV_HOTBACKUP
@@ -1203,6 +1221,10 @@ page_zip_compress(
#ifdef PAGE_ZIP_COMPRESS_DBG
FILE* logfile = NULL;
#endif
+ /* A local copy of srv_cmp_per_index_enabled to avoid reading that
+ variable multiple times in this function since it can be changed at
+ anytime. */
+ my_bool cmp_per_index_enabled = srv_cmp_per_index_enabled;
ut_a(page_is_comp(page));
ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
@@ -1265,6 +1287,11 @@ page_zip_compress(
#endif /* PAGE_ZIP_COMPRESS_DBG */
#ifndef UNIV_HOTBACKUP
page_zip_stat[page_zip->ssize - 1].compressed++;
+ if (cmp_per_index_enabled) {
+ mutex_enter(&page_zip_stat_per_index_mutex);
+ page_zip_stat_per_index[index->id].compressed++;
+ mutex_exit(&page_zip_stat_per_index_mutex);
+ }
#endif /* !UNIV_HOTBACKUP */
if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
@@ -1276,7 +1303,8 @@ page_zip_compress(
MONITOR_INC(MONITOR_PAGE_COMPRESS);
heap = mem_heap_create(page_zip_get_size(page_zip)
- + n_fields * (2 + sizeof *offsets)
+ + n_fields * (2 + sizeof(ulint))
+ + REC_OFFS_HEADER_SIZE
+ n_dense * ((sizeof *recs)
- PAGE_ZIP_DIR_SLOT_SIZE)
+ UNIV_PAGE_SIZE * 4
@@ -1295,7 +1323,7 @@ page_zip_compress(
/* Compress the data payload. */
page_zip_set_alloc(&c_stream, heap);
- err = deflateInit2(&c_stream, Z_DEFAULT_COMPRESSION,
+ err = deflateInit2(&c_stream, level,
Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
ut_a(err == Z_OK);
@@ -1408,8 +1436,19 @@ err_exit:
}
#endif /* PAGE_ZIP_COMPRESS_DBG */
#ifndef UNIV_HOTBACKUP
+ if (page_is_leaf(page)) {
+ dict_index_zip_failure(index);
+ }
+
+ ullint time_diff = ut_time_us(NULL) - usec;
page_zip_stat[page_zip->ssize - 1].compressed_usec
- += ut_time_us(NULL) - usec;
+ += time_diff;
+ if (cmp_per_index_enabled) {
+ mutex_enter(&page_zip_stat_per_index_mutex);
+ page_zip_stat_per_index[index->id].compressed_usec
+ += time_diff;
+ mutex_exit(&page_zip_stat_per_index_mutex);
+ }
#endif /* !UNIV_HOTBACKUP */
return(FALSE);
}
@@ -1469,11 +1508,18 @@ err_exit:
}
#endif /* PAGE_ZIP_COMPRESS_DBG */
#ifndef UNIV_HOTBACKUP
- {
- page_zip_stat_t* zip_stat
- = &page_zip_stat[page_zip->ssize - 1];
- zip_stat->compressed_ok++;
- zip_stat->compressed_usec += ut_time_us(NULL) - usec;
+ ullint time_diff = ut_time_us(NULL) - usec;
+ page_zip_stat[page_zip->ssize - 1].compressed_ok++;
+ page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff;
+ if (cmp_per_index_enabled) {
+ mutex_enter(&page_zip_stat_per_index_mutex);
+ page_zip_stat_per_index[index->id].compressed_ok++;
+ page_zip_stat_per_index[index->id].compressed_usec += time_diff;
+ mutex_exit(&page_zip_stat_per_index_mutex);
+ }
+
+ if (page_is_leaf(page)) {
+ dict_index_zip_success(index);
}
#endif /* !UNIV_HOTBACKUP */
@@ -1518,6 +1564,7 @@ page_zip_fields_free(
{
if (index) {
dict_table_t* table = index->table;
+ os_fast_mutex_free(&index->zip_pad.mutex);
mem_heap_free(index->heap);
mutex_free(&(table->autoinc_mutex));
ut_free(table->name);
@@ -3075,11 +3122,17 @@ err_exit:
page_zip_fields_free(index);
mem_heap_free(heap);
#ifndef UNIV_HOTBACKUP
- {
- page_zip_stat_t* zip_stat
- = &page_zip_stat[page_zip->ssize - 1];
- zip_stat->decompressed++;
- zip_stat->decompressed_usec += ut_time_us(NULL) - usec;
+ ullint time_diff = ut_time_us(NULL) - usec;
+ page_zip_stat[page_zip->ssize - 1].decompressed++;
+ page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff;
+
+ index_id_t index_id = btr_page_get_index_id(page);
+
+ if (srv_cmp_per_index_enabled) {
+ mutex_enter(&page_zip_stat_per_index_mutex);
+ page_zip_stat_per_index[index_id].decompressed++;
+ page_zip_stat_per_index[index_id].decompressed_usec += time_diff;
+ mutex_exit(&page_zip_stat_per_index_mutex);
}
#endif /* !UNIV_HOTBACKUP */
@@ -3177,7 +3230,7 @@ page_zip_validate_low(
UNIV_MEM_ASSERT_RW() checks fail. The v-bits of page[],
page_zip->data[] or page_zip could be viewed at temp_page[] or
temp_page_zip in a debugger when running valgrind --db-attach. */
- VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
+ (void) VALGRIND_GET_VBITS(page, temp_page, UNIV_PAGE_SIZE);
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
# if UNIV_WORD_SIZE == 4
VALGRIND_GET_VBITS(page_zip, &temp_page_zip, sizeof temp_page_zip);
@@ -3186,8 +3239,8 @@ page_zip_validate_low(
pad bytes. */
UNIV_MEM_ASSERT_RW(page_zip, sizeof *page_zip);
# endif
- VALGRIND_GET_VBITS(page_zip->data, temp_page,
- page_zip_get_size(page_zip));
+ (void) VALGRIND_GET_VBITS(page_zip->data, temp_page,
+ page_zip_get_size(page_zip));
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
#endif /* UNIV_DEBUG_VALGRIND */
@@ -3295,7 +3348,7 @@ page_zip_validate_low(
"record list: 0x%02x!=0x%02x\n",
(unsigned) page_offset(rec),
(unsigned) page_offset(trec)));
- valid = FALSE;
+ valid = FALSE;
break;
}
@@ -4042,10 +4095,10 @@ static
void
page_zip_clear_rec(
/*===============*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- byte* rec, /*!< in: record to clear */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ byte* rec, /*!< in: record to clear */
+ const dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
{
ulint heap_no;
page_t* page = page_align(rec);
@@ -4256,11 +4309,12 @@ UNIV_INTERN
void
page_zip_dir_delete(
/*================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- byte* rec, /*!< in: record to delete */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- const byte* free) /*!< in: previous start of the free list */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page */
+ byte* rec, /*!< in: deleted record */
+ const dict_index_t* index, /*!< in: index of rec */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec) */
+ const byte* free) /*!< in: previous start of
+ the free list */
{
byte* slot_rec;
byte* slot_free;
@@ -4576,7 +4630,8 @@ page_zip_reorganize(
/* Restore logging. */
mtr_set_log_mode(mtr, log_mode);
- if (!page_zip_compress(page_zip, page, index, mtr)) {
+ if (!page_zip_compress(page_zip, page, index,
+ page_compression_level, mtr)) {
#ifndef UNIV_HOTBACKUP
buf_block_free(temp_block);
diff --git a/storage/innobase/pars/lexyy.cc b/storage/innobase/pars/lexyy.cc
index 9de8ea51efd..48ab04e1eff 100644
--- a/storage/innobase/pars/lexyy.cc
+++ b/storage/innobase/pars/lexyy.cc
@@ -35,7 +35,7 @@
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
+ * if you want the limit (max/min) macros for int types.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS 1
@@ -55,7 +55,6 @@ typedef int flex_int32_t;
typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
/* Limits of integral types. */
#ifndef INT8_MIN
@@ -86,6 +85,8 @@ typedef unsigned int flex_uint32_t;
#define UINT32_MAX (4294967295U)
#endif
+#endif /* ! C99 */
+
#endif /* ! FLEXINT_H */
#ifdef __cplusplus
@@ -142,7 +143,15 @@ typedef unsigned int flex_uint32_t;
/* Size of default input buffer. */
#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
#endif
/* The state buf must be large enough to hold one state per character in the main buffer.
@@ -276,7 +285,7 @@ static yy_size_t yy_n_chars; /* number of characters read into yy_ch_buf */
yy_size_t yyleng;
/* Points to current character in buffer. */
-static char *yy_c_buf_p = (char*) 0;
+static char *yy_c_buf_p = (char *) 0;
static int yy_init = 0; /* whether we need to initialize */
static int yy_start = 0; /* start state number */
@@ -338,7 +347,7 @@ void yyfree (void * );
typedef unsigned char YY_CHAR;
-FILE *yyin = (FILE*) 0, *yyout = (FILE*) 0;
+FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
typedef int yy_state_type;
@@ -373,7 +382,7 @@ struct yy_trans_info
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
-static yyconst flex_int16_t yy_accept[424] =
+static yyconst flex_int16_t yy_accept[425] =
{ 0,
0, 0, 119, 119, 0, 0, 0, 0, 125, 123,
122, 122, 8, 123, 114, 5, 103, 109, 112, 110,
@@ -382,46 +391,46 @@ static yyconst flex_int16_t yy_accept[424] =
96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
115, 116, 119, 120, 6, 7, 9, 10, 122, 4,
98, 118, 2, 1, 3, 99, 100, 102, 101, 0,
- 96, 96, 96, 96, 96, 96, 44, 96, 96, 96,
+ 96, 0, 96, 96, 96, 96, 96, 44, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 28, 17, 25, 96, 96, 96, 96,
+ 96, 96, 96, 96, 28, 17, 25, 96, 96, 96,
- 96, 96, 54, 63, 96, 14, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 119, 120, 120, 121, 6, 7,
- 9, 10, 2, 0, 97, 13, 45, 96, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 96, 27, 96, 96, 96,
- 41, 96, 96, 96, 96, 21, 96, 96, 96, 96,
- 96, 15, 96, 96, 96, 18, 96, 96, 96, 96,
- 96, 82, 96, 96, 96, 51, 96, 12, 96, 36,
+ 96, 96, 96, 54, 63, 96, 14, 96, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
-
- 96, 0, 97, 96, 96, 96, 96, 20, 96, 24,
+ 96, 96, 96, 96, 96, 119, 120, 120, 121, 6,
+ 7, 9, 10, 2, 0, 97, 13, 45, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 46, 96, 96, 30, 96, 89, 96, 96, 39,
- 96, 96, 96, 96, 96, 48, 96, 94, 91, 32,
- 93, 96, 11, 66, 96, 96, 96, 42, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 29, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 87, 0,
- 96, 26, 96, 96, 96, 68, 96, 96, 96, 96,
- 37, 96, 96, 96, 96, 96, 96, 96, 31, 67,
- 23, 96, 59, 96, 77, 96, 96, 96, 43, 96,
-
- 96, 96, 96, 96, 96, 96, 96, 92, 96, 96,
- 56, 96, 96, 96, 96, 96, 96, 96, 40, 33,
- 0, 81, 95, 19, 96, 96, 85, 96, 76, 55,
- 96, 65, 96, 52, 96, 96, 96, 47, 96, 78,
- 96, 80, 96, 96, 34, 96, 96, 96, 35, 74,
- 96, 96, 96, 96, 60, 96, 50, 49, 96, 96,
- 96, 57, 53, 64, 96, 96, 96, 22, 96, 96,
- 75, 83, 96, 96, 79, 96, 70, 96, 96, 96,
- 96, 96, 38, 96, 90, 69, 96, 86, 96, 96,
- 96, 88, 96, 96, 61, 96, 16, 96, 72, 71,
-
- 96, 58, 96, 84, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 73, 96, 96, 96, 96, 96,
- 96, 62, 0
+ 96, 96, 96, 96, 96, 96, 96, 27, 96, 96,
+ 96, 41, 96, 96, 96, 96, 21, 96, 96, 96,
+ 96, 96, 15, 96, 96, 96, 18, 96, 96, 96,
+ 96, 96, 82, 96, 96, 96, 51, 96, 12, 96,
+ 36, 96, 96, 96, 96, 96, 96, 96, 96, 96,
+
+ 96, 96, 0, 97, 96, 96, 96, 96, 20, 96,
+ 24, 96, 96, 96, 96, 96, 96, 96, 96, 96,
+ 96, 96, 46, 96, 96, 30, 96, 89, 96, 96,
+ 39, 96, 96, 96, 96, 96, 48, 96, 94, 91,
+ 32, 93, 96, 11, 66, 96, 96, 96, 42, 96,
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 29,
+ 96, 96, 96, 96, 96, 96, 96, 96, 96, 87,
+ 0, 96, 26, 96, 96, 96, 68, 96, 96, 96,
+ 96, 37, 96, 96, 96, 96, 96, 96, 96, 31,
+ 67, 23, 96, 59, 96, 77, 96, 96, 96, 43,
+
+ 96, 96, 96, 96, 96, 96, 96, 96, 92, 96,
+ 96, 56, 96, 96, 96, 96, 96, 96, 96, 40,
+ 33, 0, 81, 95, 19, 96, 96, 85, 96, 76,
+ 55, 96, 65, 96, 52, 96, 96, 96, 47, 96,
+ 78, 96, 80, 96, 96, 34, 96, 96, 96, 35,
+ 74, 96, 96, 96, 96, 60, 96, 50, 49, 96,
+ 96, 96, 57, 53, 64, 96, 96, 96, 22, 96,
+ 96, 75, 83, 96, 96, 79, 96, 70, 96, 96,
+ 96, 96, 96, 38, 96, 90, 69, 96, 86, 96,
+ 96, 96, 88, 96, 96, 61, 96, 16, 96, 72,
+
+ 71, 96, 58, 96, 84, 96, 96, 96, 96, 96,
+ 96, 96, 96, 96, 96, 73, 96, 96, 96, 96,
+ 96, 96, 62, 0
} ;
static yyconst flex_int32_t yy_ec[256] =
@@ -432,14 +441,14 @@ static yyconst flex_int32_t yy_ec[256] =
1, 2, 1, 4, 5, 6, 7, 1, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 17, 17,
17, 17, 17, 17, 17, 17, 17, 18, 19, 20,
- 21, 22, 23, 1, 24, 25, 26, 27, 28, 29,
- 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
- 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
- 1, 1, 1, 1, 50, 1, 33, 33, 33, 33,
-
- 33, 33, 33, 33, 33, 33, 33, 51, 33, 33,
- 33, 33, 52, 33, 53, 33, 33, 33, 33, 33,
- 33, 33, 54, 1, 55, 1, 1, 1, 1, 1,
+ 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
+ 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+ 1, 1, 1, 1, 51, 1, 34, 34, 34, 34,
+
+ 34, 34, 34, 34, 34, 34, 34, 52, 34, 34,
+ 34, 34, 53, 34, 54, 34, 34, 34, 34, 34,
+ 34, 34, 55, 1, 56, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -456,328 +465,438 @@ static yyconst flex_int32_t yy_ec[256] =
1, 1, 1, 1, 1
} ;
-static yyconst flex_int32_t yy_meta[56] =
+static yyconst flex_int32_t yy_meta[57] =
{ 0,
1, 1, 1, 2, 3, 1, 1, 4, 1, 1,
5, 1, 1, 1, 1, 6, 7, 1, 1, 1,
- 8, 1, 1, 9, 9, 9, 9, 9, 9, 9,
+ 8, 1, 1, 6, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 1, 1
+ 9, 9, 9, 9, 1, 1
} ;
-static yyconst flex_int16_t yy_base[436] =
+static yyconst flex_int16_t yy_base[438] =
{ 0,
- 0, 0, 849, 848, 850, 849, 852, 851, 854, 861,
- 54, 56, 861, 0, 861, 861, 861, 861, 861, 861,
- 861, 861, 838, 841, 45, 830, 861, 42, 861, 829,
- 861, 45, 49, 54, 58, 56, 72, 833, 83, 86,
- 63, 67, 90, 53, 105, 107, 106, 120, 51, 101,
- 861, 861, 0, 55, 0, 840, 0, 843, 106, 0,
- 861, 861, 829, 61, 824, 861, 861, 861, 861, 839,
- 827, 88, 124, 130, 132, 125, 826, 129, 133, 136,
- 52, 138, 148, 140, 142, 145, 149, 152, 151, 159,
- 162, 169, 165, 825, 172, 824, 173, 170, 175, 179,
-
- 176, 177, 823, 822, 180, 182, 184, 200, 201, 195,
- 189, 202, 204, 207, 205, 210, 218, 220, 213, 215,
- 223, 230, 238, 217, 0, 240, 244, 861, 0, 829,
- 0, 832, 818, 781, 0, 817, 816, 233, 237, 243,
- 248, 251, 246, 252, 255, 257, 258, 262, 264, 263,
- 265, 267, 266, 269, 273, 270, 815, 274, 275, 287,
- 814, 290, 292, 291, 293, 294, 297, 300, 304, 298,
- 307, 313, 308, 309, 317, 813, 314, 315, 323, 318,
- 324, 328, 331, 332, 333, 812, 336, 811, 338, 810,
- 340, 339, 342, 344, 343, 341, 347, 346, 348, 349,
-
- 359, 773, 0, 356, 369, 370, 360, 808, 371, 807,
- 372, 375, 376, 378, 379, 380, 382, 383, 388, 393,
- 394, 806, 396, 397, 805, 398, 804, 399, 400, 803,
- 403, 404, 408, 413, 405, 802, 415, 801, 800, 799,
- 798, 406, 797, 796, 416, 417, 420, 795, 422, 418,
- 423, 425, 424, 426, 439, 429, 437, 440, 794, 446,
- 450, 453, 454, 455, 457, 458, 459, 460, 793, 757,
- 461, 791, 463, 464, 466, 790, 467, 468, 473, 474,
- 789, 475, 476, 477, 478, 480, 485, 486, 788, 787,
- 786, 489, 785, 491, 784, 498, 493, 494, 783, 499,
-
- 504, 509, 511, 513, 516, 514, 517, 782, 520, 519,
- 781, 521, 523, 527, 525, 528, 526, 529, 780, 779,
- 780, 776, 773, 530, 533, 535, 772, 534, 771, 770,
- 541, 769, 550, 760, 543, 548, 551, 753, 552, 736,
- 554, 730, 556, 557, 723, 558, 566, 563, 693, 692,
- 569, 572, 565, 578, 691, 574, 690, 689, 567, 585,
- 588, 688, 687, 685, 571, 589, 591, 683, 592, 593,
- 681, 680, 595, 596, 679, 597, 678, 599, 604, 602,
- 605, 608, 676, 606, 675, 674, 609, 673, 607, 610,
- 614, 670, 620, 623, 668, 628, 667, 630, 665, 664,
-
- 625, 663, 629, 112, 627, 626, 631, 632, 647, 633,
- 636, 637, 644, 650, 110, 652, 659, 657, 660, 661,
- 662, 57, 861, 710, 719, 728, 731, 734, 738, 747,
- 756, 765, 774, 781, 784
+ 0, 0, 293, 287, 284, 281, 272, 256, 254, 1357,
+ 55, 57, 1357, 0, 1357, 1357, 1357, 1357, 1357, 1357,
+ 1357, 1357, 238, 227, 46, 205, 1357, 43, 1357, 203,
+ 1357, 46, 50, 56, 52, 66, 64, 51, 81, 92,
+ 91, 94, 96, 111, 113, 116, 130, 134, 53, 143,
+ 1357, 1357, 0, 106, 0, 212, 0, 210, 141, 0,
+ 1357, 1357, 192, 56, 173, 1357, 1357, 1357, 1357, 168,
+ 140, 150, 152, 154, 155, 161, 167, 171, 177, 172,
+ 184, 174, 188, 189, 191, 194, 203, 212, 215, 217,
+ 219, 221, 226, 228, 231, 240, 233, 235, 246, 251,
+
+ 258, 253, 255, 256, 269, 271, 278, 272, 285, 283,
+ 287, 289, 296, 305, 298, 315, 319, 321, 322, 326,
+ 332, 333, 342, 339, 343, 0, 112, 173, 1357, 0,
+ 155, 0, 156, 132, 93, 0, 355, 357, 358, 360,
+ 364, 367, 374, 370, 379, 380, 389, 383, 390, 392,
+ 395, 408, 411, 409, 415, 418, 425, 427, 429, 436,
+ 431, 441, 446, 448, 450, 452, 453, 462, 471, 464,
+ 473, 474, 478, 485, 488, 490, 491, 494, 500, 501,
+ 504, 506, 507, 517, 518, 519, 520, 521, 522, 523,
+ 533, 536, 538, 543, 549, 554, 555, 561, 556, 566,
+
+ 567, 576, 60, 0, 573, 578, 580, 582, 583, 593,
+ 589, 596, 598, 603, 605, 607, 610, 617, 619, 621,
+ 622, 628, 633, 634, 635, 639, 640, 649, 650, 652,
+ 653, 655, 659, 664, 668, 669, 665, 671, 674, 678,
+ 681, 685, 687, 688, 692, 697, 698, 701, 703, 704,
+ 707, 708, 717, 713, 728, 730, 724, 740, 734, 745,
+ 746, 750, 751, 756, 757, 760, 761, 762, 771, 773,
+ 42, 778, 782, 783, 787, 789, 792, 794, 793, 804,
+ 805, 808, 809, 810, 819, 823, 826, 828, 829, 830,
+ 835, 840, 844, 846, 847, 856, 857, 858, 859, 860,
+
+ 863, 872, 873, 878, 879, 882, 885, 889, 894, 895,
+ 896, 898, 905, 910, 908, 912, 914, 915, 926, 930,
+ 931, 73, 932, 933, 935, 937, 942, 944, 946, 947,
+ 948, 949, 951, 958, 961, 965, 967, 972, 978, 979,
+ 981, 984, 983, 985, 994, 988, 999, 1000, 1001, 1004,
+ 1013, 1015, 1022, 1016, 1019, 1026, 1032, 1033, 1035, 1036,
+ 1038, 1039, 1048, 1049, 1050, 1051, 1053, 1054, 1060, 1063,
+ 1065, 1066, 1069, 1070, 1072, 1082, 1084, 1085, 1087, 1096,
+ 1097, 1098, 1099, 1101, 1113, 1114, 1115, 1116, 1117, 1118,
+ 1119, 1128, 1130, 1131, 1134, 1133, 1135, 1137, 1150, 1151,
+
+ 1153, 1155, 1157, 1162, 1160, 1167, 1172, 1173, 1174, 1176,
+ 1185, 1190, 1183, 1187, 1189, 1199, 1204, 1206, 1208, 1210,
+ 1215, 1220, 1222, 1357, 1269, 1278, 1287, 1290, 1293, 1297,
+ 1306, 1315, 1324, 1333, 1340, 1344, 1347
} ;
-static yyconst flex_int16_t yy_def[436] =
+static yyconst flex_int16_t yy_def[438] =
{ 0,
- 423, 1, 424, 424, 425, 425, 426, 426, 423, 423,
- 423, 423, 423, 427, 423, 423, 423, 423, 423, 423,
- 423, 423, 423, 423, 423, 428, 423, 423, 423, 423,
- 423, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 423, 423, 430, 431, 432, 423, 433, 423, 423, 427,
- 423, 423, 423, 423, 428, 423, 423, 423, 423, 434,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
-
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 430, 431, 431, 423, 432, 423,
- 433, 423, 423, 423, 435, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
-
- 429, 423, 435, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 423,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
-
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 423, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
-
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 429, 429, 429, 429, 429, 429, 429, 429,
- 429, 429, 0, 423, 423, 423, 423, 423, 423, 423,
- 423, 423, 423, 423, 423
+ 424, 1, 425, 425, 426, 426, 427, 427, 424, 424,
+ 424, 424, 424, 428, 424, 424, 424, 424, 424, 424,
+ 424, 424, 424, 424, 424, 429, 424, 424, 424, 424,
+ 424, 430, 430, 430, 430, 430, 34, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 424, 424, 431, 432, 433, 424, 434, 424, 424, 428,
+ 424, 424, 424, 424, 429, 424, 424, 424, 424, 435,
+ 430, 436, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 431, 432, 432, 424, 433,
+ 424, 434, 424, 424, 424, 437, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+
+ 430, 430, 424, 437, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 424, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 424, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
+ 430, 430, 430, 0, 424, 424, 424, 424, 424, 424,
+ 424, 424, 424, 424, 424, 424, 424
} ;
-static yyconst flex_int16_t yy_nxt[917] =
+static yyconst flex_int16_t yy_nxt[1414] =
{ 0,
10, 11, 12, 13, 10, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
- 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
- 38, 39, 38, 38, 40, 41, 42, 43, 44, 38,
- 45, 46, 47, 48, 49, 50, 38, 38, 38, 38,
- 38, 38, 38, 51, 52, 59, 59, 59, 59, 63,
- 70, 64, 67, 68, 70, 127, 70, 70, 70, 70,
- 128, 70, 70, 70, 122, 63, 74, 64, 70, 149,
- 75, 72, 70, 76, 78, 83, 73, 70, 79, 84,
- 86, 80, 87, 108, 81, 85, 77, 82, 70, 89,
-
- 100, 70, 88, 70, 101, 70, 90, 59, 59, 91,
- 102, 94, 92, 97, 136, 93, 70, 98, 103, 95,
- 70, 70, 70, 99, 96, 70, 104, 70, 105, 117,
- 106, 123, 109, 107, 112, 70, 118, 113, 124, 70,
- 70, 110, 111, 119, 70, 70, 114, 70, 70, 137,
- 115, 70, 143, 70, 116, 70, 120, 70, 121, 139,
- 70, 140, 142, 70, 70, 138, 70, 70, 141, 155,
- 144, 146, 147, 151, 70, 157, 145, 70, 150, 148,
- 70, 154, 152, 158, 70, 70, 156, 70, 70, 153,
- 70, 70, 70, 159, 70, 70, 160, 70, 164, 70,
-
- 169, 163, 161, 168, 70, 171, 162, 174, 175, 167,
- 70, 173, 170, 165, 166, 70, 70, 70, 172, 70,
- 70, 182, 70, 183, 179, 70, 176, 187, 70, 189,
- 70, 177, 70, 70, 184, 70, 185, 178, 70, 180,
- 190, 188, 192, 181, 186, 70, 195, 193, 70, 197,
- 423, 191, 70, 70, 127, 423, 196, 201, 70, 128,
- 204, 70, 194, 70, 198, 199, 70, 70, 205, 200,
- 70, 207, 70, 70, 206, 208, 209, 70, 70, 70,
- 70, 70, 70, 215, 70, 70, 210, 217, 70, 70,
- 70, 222, 213, 211, 221, 214, 212, 225, 216, 220,
-
- 228, 226, 70, 218, 219, 70, 70, 70, 70, 70,
- 229, 223, 70, 70, 224, 70, 227, 231, 232, 70,
- 233, 235, 70, 70, 70, 230, 237, 238, 70, 70,
- 70, 236, 70, 70, 241, 234, 240, 239, 70, 70,
- 247, 242, 243, 70, 245, 244, 70, 70, 70, 248,
- 246, 70, 249, 70, 70, 70, 70, 70, 70, 70,
- 254, 70, 70, 70, 70, 252, 257, 250, 260, 261,
- 265, 70, 264, 258, 70, 70, 255, 251, 259, 256,
- 262, 253, 263, 268, 70, 70, 70, 70, 267, 266,
- 70, 70, 269, 70, 70, 70, 271, 70, 70, 276,
-
- 274, 279, 280, 70, 275, 272, 273, 278, 70, 70,
- 283, 70, 70, 70, 70, 70, 285, 277, 70, 70,
- 70, 70, 281, 70, 282, 284, 289, 287, 70, 290,
- 70, 70, 70, 70, 296, 70, 286, 70, 70, 70,
- 70, 70, 291, 298, 70, 292, 288, 301, 294, 305,
- 293, 307, 70, 295, 70, 70, 299, 297, 303, 300,
- 310, 70, 306, 302, 304, 70, 308, 311, 70, 70,
- 70, 309, 70, 70, 70, 70, 70, 312, 70, 70,
- 313, 70, 70, 70, 316, 318, 319, 320, 70, 70,
- 70, 70, 70, 70, 326, 70, 314, 315, 328, 317,
-
- 70, 70, 330, 322, 70, 323, 70, 334, 70, 70,
- 327, 324, 331, 70, 70, 325, 329, 332, 333, 70,
- 337, 335, 336, 340, 70, 339, 70, 342, 70, 70,
- 343, 70, 70, 338, 70, 70, 70, 341, 70, 347,
- 70, 70, 70, 70, 70, 70, 353, 345, 70, 70,
- 70, 344, 355, 357, 348, 346, 70, 352, 70, 349,
- 350, 351, 354, 70, 356, 70, 70, 70, 365, 70,
- 358, 70, 70, 70, 360, 361, 362, 364, 70, 359,
- 70, 70, 70, 363, 70, 366, 70, 70, 367, 70,
- 369, 373, 368, 70, 374, 376, 375, 371, 372, 370,
-
- 70, 379, 378, 70, 70, 377, 70, 70, 70, 380,
- 70, 70, 70, 383, 70, 382, 381, 70, 386, 70,
- 70, 70, 70, 70, 70, 70, 391, 385, 388, 70,
- 392, 384, 389, 387, 395, 70, 397, 390, 70, 393,
- 70, 70, 70, 70, 70, 70, 70, 70, 70, 398,
- 402, 70, 70, 394, 400, 396, 403, 399, 404, 70,
- 406, 405, 70, 413, 412, 70, 409, 70, 408, 401,
- 407, 411, 70, 414, 70, 70, 70, 70, 70, 70,
- 70, 410, 70, 70, 415, 70, 418, 417, 70, 70,
- 70, 70, 419, 70, 70, 70, 70, 420, 70, 416,
-
- 70, 421, 70, 70, 70, 70, 70, 70, 70, 422,
- 53, 53, 53, 53, 53, 53, 53, 53, 53, 55,
- 55, 55, 55, 55, 55, 55, 55, 55, 57, 57,
- 57, 57, 57, 57, 57, 57, 57, 60, 70, 60,
- 65, 65, 65, 71, 71, 70, 71, 125, 125, 125,
- 125, 70, 125, 125, 125, 125, 126, 126, 126, 126,
- 126, 126, 126, 126, 126, 129, 129, 129, 70, 129,
- 129, 129, 129, 129, 131, 70, 131, 131, 131, 131,
- 131, 131, 131, 135, 70, 70, 70, 70, 70, 135,
- 203, 70, 203, 135, 70, 70, 70, 70, 70, 70,
-
- 70, 70, 70, 70, 70, 70, 70, 321, 70, 70,
- 70, 70, 70, 70, 70, 70, 70, 70, 70, 70,
- 70, 70, 70, 70, 270, 70, 70, 70, 70, 70,
- 70, 70, 70, 202, 133, 132, 130, 70, 70, 70,
- 70, 70, 70, 134, 423, 133, 132, 130, 70, 69,
- 66, 62, 61, 423, 58, 58, 56, 56, 54, 54,
- 9, 423, 423, 423, 423, 423, 423, 423, 423, 423,
- 423, 423, 423, 423, 423, 423, 423, 423, 423, 423,
- 423, 423, 423, 423, 423, 423, 423, 423, 423, 423,
- 423, 423, 423, 423, 423, 423, 423, 423, 423, 423,
-
- 423, 423, 423, 423, 423, 423, 423, 423, 423, 423,
- 423, 423, 423, 423, 423, 423
+ 29, 30, 31, 10, 32, 33, 34, 35, 36, 37,
+ 38, 38, 39, 38, 38, 40, 41, 42, 43, 44,
+ 38, 45, 46, 47, 48, 49, 50, 38, 38, 38,
+ 38, 38, 38, 38, 51, 52, 59, 59, 59, 59,
+ 63, 70, 64, 67, 68, 70, 70, 70, 70, 72,
+ 63, 70, 64, 72, 72, 72, 72, 123, 75, 72,
+ 84, 70, 76, 73, 85, 77, 136, 79, 74, 72,
+ 86, 80, 90, 322, 81, 71, 70, 82, 78, 91,
+
+ 83, 87, 92, 88, 72, 93, 70, 70, 94, 70,
+ 95, 70, 271, 89, 72, 72, 128, 72, 96, 72,
+ 98, 129, 424, 97, 99, 104, 70, 424, 70, 101,
+ 100, 70, 102, 105, 72, 106, 72, 107, 103, 72,
+ 108, 110, 59, 59, 113, 70, 203, 114, 134, 70,
+ 111, 112, 109, 72, 118, 70, 115, 72, 70, 133,
+ 116, 119, 131, 72, 117, 70, 72, 70, 120, 70,
+ 70, 121, 135, 122, 124, 72, 70, 72, 72, 137,
+ 138, 125, 70, 128, 72, 140, 70, 70, 129, 70,
+ 72, 141, 70, 424, 72, 72, 139, 72, 142, 70,
+
+ 72, 144, 150, 70, 70, 143, 70, 72, 134, 70,
+ 145, 72, 72, 133, 72, 152, 146, 72, 70, 131,
+ 147, 148, 156, 69, 153, 66, 72, 70, 149, 151,
+ 70, 154, 70, 155, 70, 72, 70, 62, 72, 158,
+ 72, 70, 72, 70, 72, 157, 70, 159, 70, 72,
+ 70, 72, 61, 424, 72, 70, 72, 161, 72, 58,
+ 160, 70, 162, 72, 163, 164, 70, 165, 70, 72,
+ 70, 70, 168, 70, 72, 58, 72, 170, 72, 72,
+ 169, 72, 166, 167, 70, 172, 70, 70, 56, 171,
+ 174, 56, 72, 70, 72, 72, 173, 54, 70, 175,
+
+ 70, 72, 70, 54, 70, 176, 72, 180, 72, 424,
+ 72, 70, 72, 70, 183, 177, 424, 178, 424, 72,
+ 70, 72, 181, 179, 184, 424, 182, 424, 72, 188,
+ 70, 186, 424, 189, 70, 185, 70, 70, 72, 187,
+ 190, 70, 72, 424, 72, 72, 193, 70, 70, 72,
+ 194, 191, 424, 424, 70, 72, 72, 70, 70, 424,
+ 198, 192, 72, 424, 196, 72, 72, 200, 424, 424,
+ 70, 201, 70, 70, 197, 70, 195, 199, 72, 70,
+ 72, 72, 70, 72, 202, 70, 205, 72, 424, 70,
+ 72, 208, 206, 72, 70, 70, 207, 72, 70, 209,
+
+ 210, 424, 72, 72, 70, 70, 72, 70, 424, 216,
+ 70, 211, 72, 72, 424, 72, 218, 424, 72, 424,
+ 424, 212, 213, 70, 70, 214, 70, 217, 215, 424,
+ 70, 72, 72, 70, 72, 223, 219, 220, 72, 222,
+ 70, 72, 70, 221, 70, 424, 70, 424, 72, 424,
+ 72, 70, 72, 226, 72, 230, 70, 227, 224, 72,
+ 225, 70, 229, 70, 72, 70, 424, 70, 70, 72,
+ 424, 72, 228, 72, 232, 72, 72, 70, 233, 70,
+ 234, 236, 231, 424, 424, 72, 70, 72, 70, 70,
+ 424, 237, 238, 70, 72, 235, 72, 72, 240, 239,
+
+ 70, 72, 242, 70, 424, 70, 70, 243, 72, 70,
+ 424, 72, 241, 72, 72, 70, 70, 72, 246, 70,
+ 244, 70, 70, 72, 72, 245, 248, 72, 249, 72,
+ 72, 247, 70, 70, 70, 70, 70, 70, 70, 250,
+ 72, 72, 72, 72, 72, 72, 72, 255, 70, 424,
+ 251, 70, 253, 70, 424, 424, 72, 252, 70, 72,
+ 424, 72, 256, 258, 70, 257, 72, 424, 254, 70,
+ 70, 70, 72, 259, 261, 262, 70, 72, 72, 72,
+ 260, 70, 70, 424, 72, 266, 263, 265, 70, 72,
+ 72, 70, 424, 70, 264, 70, 72, 70, 70, 72,
+
+ 267, 72, 269, 72, 70, 72, 72, 268, 70, 424,
+ 270, 70, 72, 70, 272, 273, 72, 274, 70, 72,
+ 70, 72, 70, 275, 277, 70, 72, 276, 72, 280,
+ 72, 281, 70, 72, 70, 279, 70, 70, 424, 424,
+ 72, 278, 72, 70, 72, 72, 286, 284, 70, 70,
+ 70, 72, 424, 282, 70, 70, 72, 72, 72, 285,
+ 283, 424, 72, 72, 70, 70, 288, 70, 70, 290,
+ 70, 287, 72, 72, 70, 72, 72, 424, 72, 70,
+ 70, 291, 72, 70, 70, 289, 70, 72, 72, 70,
+ 424, 72, 72, 70, 72, 292, 70, 72, 293, 297,
+
+ 70, 72, 70, 70, 72, 295, 294, 70, 72, 296,
+ 72, 72, 70, 70, 298, 72, 70, 424, 70, 70,
+ 72, 72, 70, 70, 72, 299, 72, 72, 70, 302,
+ 72, 72, 70, 424, 424, 424, 72, 424, 300, 70,
+ 72, 301, 306, 70, 424, 70, 303, 72, 304, 70,
+ 305, 72, 307, 72, 308, 70, 424, 72, 309, 424,
+ 70, 70, 312, 72, 311, 70, 70, 310, 72, 72,
+ 424, 70, 70, 72, 72, 70, 70, 70, 313, 72,
+ 72, 314, 424, 72, 72, 72, 70, 317, 70, 319,
+ 320, 424, 424, 70, 72, 315, 72, 70, 70, 321,
+
+ 316, 72, 70, 318, 70, 72, 72, 70, 70, 70,
+ 72, 424, 72, 424, 424, 72, 72, 72, 424, 70,
+ 70, 323, 327, 70, 70, 70, 324, 72, 72, 424,
+ 329, 72, 72, 72, 70, 325, 328, 331, 70, 326,
+ 424, 70, 72, 70, 70, 70, 72, 332, 330, 72,
+ 70, 72, 72, 72, 335, 70, 424, 424, 72, 70,
+ 333, 70, 70, 72, 334, 336, 337, 72, 424, 72,
+ 72, 70, 70, 70, 70, 70, 338, 424, 70, 72,
+ 72, 72, 72, 72, 424, 340, 72, 70, 70, 341,
+ 339, 424, 343, 70, 70, 72, 72, 70, 424, 344,
+
+ 70, 72, 72, 342, 70, 72, 348, 424, 72, 70,
+ 70, 70, 72, 70, 424, 346, 345, 72, 72, 72,
+ 70, 72, 347, 70, 424, 70, 349, 70, 72, 70,
+ 70, 72, 350, 72, 354, 72, 351, 72, 72, 352,
+ 356, 70, 353, 358, 355, 70, 70, 70, 70, 72,
+ 70, 357, 70, 72, 72, 72, 72, 70, 72, 70,
+ 72, 70, 70, 70, 70, 72, 70, 72, 359, 72,
+ 72, 72, 72, 70, 72, 424, 70, 424, 424, 361,
+ 70, 72, 70, 362, 72, 360, 365, 70, 72, 363,
+ 72, 366, 364, 70, 70, 72, 70, 424, 70, 70,
+
+ 70, 72, 72, 70, 72, 367, 72, 72, 72, 70,
+ 368, 72, 424, 424, 70, 70, 70, 72, 424, 70,
+ 369, 370, 72, 72, 72, 424, 374, 72, 70, 371,
+ 70, 70, 424, 375, 70, 372, 72, 70, 72, 72,
+ 373, 70, 72, 376, 379, 72, 377, 70, 70, 72,
+ 70, 70, 424, 70, 70, 72, 72, 378, 72, 72,
+ 380, 72, 72, 70, 70, 70, 70, 383, 70, 70,
+ 382, 72, 72, 72, 72, 70, 72, 72, 70, 381,
+ 70, 70, 424, 72, 70, 70, 72, 70, 72, 72,
+ 387, 386, 72, 72, 384, 72, 385, 70, 424, 70,
+
+ 70, 424, 70, 424, 389, 72, 388, 72, 72, 390,
+ 72, 70, 70, 70, 70, 392, 70, 424, 424, 72,
+ 72, 72, 72, 393, 72, 391, 396, 424, 70, 70,
+ 70, 70, 70, 70, 70, 394, 72, 72, 72, 72,
+ 72, 72, 72, 70, 398, 70, 70, 395, 70, 70,
+ 70, 72, 70, 72, 72, 424, 72, 72, 72, 424,
+ 72, 399, 403, 397, 404, 70, 70, 400, 70, 401,
+ 70, 424, 70, 72, 72, 70, 72, 70, 72, 405,
+ 72, 402, 70, 72, 424, 72, 424, 70, 70, 70,
+ 72, 70, 406, 424, 407, 72, 72, 72, 70, 72,
+
+ 70, 412, 70, 424, 70, 70, 72, 424, 72, 410,
+ 72, 408, 72, 72, 70, 409, 424, 413, 414, 70,
+ 415, 70, 72, 70, 411, 70, 424, 72, 416, 72,
+ 70, 72, 424, 72, 419, 70, 424, 70, 72, 417,
+ 418, 424, 424, 72, 420, 72, 424, 424, 421, 424,
+ 424, 424, 424, 424, 424, 424, 422, 424, 424, 424,
+ 424, 424, 424, 424, 424, 424, 424, 424, 423, 53,
+ 53, 53, 53, 53, 53, 53, 53, 53, 55, 55,
+ 55, 55, 55, 55, 55, 55, 55, 57, 57, 57,
+ 57, 57, 57, 57, 57, 57, 60, 424, 60, 65,
+
+ 65, 65, 71, 71, 424, 71, 126, 126, 126, 126,
+ 424, 126, 126, 126, 126, 127, 127, 127, 127, 127,
+ 127, 127, 127, 127, 130, 130, 130, 424, 130, 130,
+ 130, 130, 130, 132, 424, 132, 132, 132, 132, 132,
+ 132, 132, 136, 424, 424, 424, 424, 424, 136, 72,
+ 72, 424, 72, 204, 424, 204, 9, 424, 424, 424,
+ 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
+ 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
+ 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
+ 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
+
+ 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
+ 424, 424, 424
} ;
-static yyconst flex_int16_t yy_chk[917] =
+static yyconst flex_int16_t yy_chk[1414] =
{ 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 11, 11, 12, 12, 25,
- 32, 25, 28, 28, 33, 54, 49, 81, 44, 34,
- 54, 36, 422, 35, 49, 64, 33, 64, 41, 81,
- 33, 32, 42, 33, 34, 35, 32, 37, 34, 35,
- 36, 34, 36, 44, 34, 35, 33, 34, 39, 37,
-
- 41, 40, 36, 72, 42, 43, 37, 59, 59, 37,
- 42, 39, 37, 40, 72, 37, 50, 40, 43, 39,
- 45, 47, 46, 40, 39, 415, 43, 404, 43, 47,
- 43, 50, 45, 43, 46, 48, 47, 46, 50, 73,
- 76, 45, 45, 47, 78, 74, 46, 75, 79, 73,
- 46, 80, 78, 82, 46, 84, 48, 85, 48, 74,
- 86, 75, 76, 83, 87, 73, 89, 88, 75, 85,
- 79, 80, 80, 83, 90, 87, 79, 91, 82, 80,
- 93, 84, 83, 88, 92, 98, 86, 95, 97, 83,
- 99, 101, 102, 89, 100, 105, 90, 106, 95, 107,
-
- 99, 93, 91, 98, 111, 100, 92, 105, 106, 97,
- 110, 102, 99, 95, 95, 108, 109, 112, 101, 113,
- 115, 110, 114, 111, 109, 116, 107, 113, 119, 115,
- 120, 108, 124, 117, 111, 118, 112, 108, 121, 109,
- 115, 114, 117, 109, 112, 122, 120, 118, 138, 121,
- 126, 116, 139, 123, 127, 126, 120, 124, 140, 127,
- 138, 143, 119, 141, 122, 123, 142, 144, 139, 123,
- 145, 141, 146, 147, 140, 142, 142, 148, 150, 149,
- 151, 153, 152, 147, 154, 156, 143, 149, 155, 158,
- 159, 153, 146, 144, 152, 146, 145, 156, 148, 151,
-
- 159, 156, 160, 150, 150, 162, 164, 163, 165, 166,
- 160, 154, 167, 170, 155, 168, 158, 163, 164, 169,
- 165, 166, 171, 173, 174, 162, 167, 168, 172, 177,
- 178, 166, 175, 180, 171, 165, 170, 169, 179, 181,
- 178, 172, 173, 182, 175, 174, 183, 184, 185, 179,
- 177, 187, 180, 189, 192, 191, 196, 193, 195, 194,
- 185, 198, 197, 199, 200, 183, 191, 181, 194, 194,
- 197, 204, 196, 192, 201, 207, 187, 182, 193, 189,
- 194, 184, 195, 200, 205, 206, 209, 211, 199, 198,
- 212, 213, 201, 214, 215, 216, 204, 217, 218, 211,
-
- 207, 214, 215, 219, 209, 205, 206, 213, 220, 221,
- 218, 223, 224, 226, 228, 229, 220, 212, 231, 232,
- 235, 242, 216, 233, 217, 219, 226, 223, 234, 228,
- 237, 245, 246, 250, 235, 247, 221, 249, 251, 253,
- 252, 254, 229, 242, 256, 231, 224, 247, 233, 252,
- 232, 254, 257, 234, 255, 258, 245, 237, 250, 246,
- 257, 260, 253, 249, 251, 261, 255, 258, 262, 263,
- 264, 256, 265, 266, 267, 268, 271, 260, 273, 274,
- 261, 275, 277, 278, 264, 266, 267, 268, 279, 280,
- 282, 283, 284, 285, 277, 286, 262, 263, 279, 265,
-
- 287, 288, 282, 271, 292, 273, 294, 286, 297, 298,
- 278, 274, 283, 296, 300, 275, 280, 284, 285, 301,
- 292, 287, 288, 297, 302, 296, 303, 300, 304, 306,
- 301, 305, 307, 294, 310, 309, 312, 298, 313, 305,
- 315, 317, 314, 316, 318, 324, 313, 303, 325, 328,
- 326, 302, 315, 317, 306, 304, 331, 312, 335, 307,
- 309, 310, 314, 336, 316, 333, 337, 339, 335, 341,
- 318, 343, 344, 346, 325, 326, 328, 333, 348, 324,
- 353, 347, 359, 331, 351, 336, 365, 352, 337, 356,
- 341, 347, 339, 354, 348, 352, 351, 344, 346, 343,
-
- 360, 356, 354, 361, 366, 353, 367, 369, 370, 359,
- 373, 374, 376, 365, 378, 361, 360, 380, 369, 379,
- 381, 384, 389, 382, 387, 390, 378, 367, 373, 391,
- 379, 366, 374, 370, 382, 393, 387, 376, 394, 380,
- 401, 406, 405, 396, 403, 398, 407, 408, 410, 389,
- 394, 411, 412, 381, 391, 384, 396, 390, 398, 413,
- 403, 401, 409, 411, 410, 414, 407, 416, 406, 393,
- 405, 409, 418, 412, 417, 419, 420, 421, 402, 400,
- 399, 408, 397, 395, 413, 392, 417, 416, 388, 386,
- 385, 383, 418, 377, 375, 372, 371, 419, 368, 414,
-
- 364, 420, 363, 362, 358, 357, 355, 350, 349, 421,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 425,
+ 1, 1, 1, 1, 1, 1, 11, 11, 12, 12,
+ 25, 32, 25, 28, 28, 33, 38, 35, 49, 32,
+ 64, 34, 64, 33, 38, 35, 49, 49, 33, 34,
+ 35, 36, 33, 32, 35, 33, 322, 34, 32, 36,
+ 35, 34, 37, 271, 34, 37, 39, 34, 33, 37,
+
+ 34, 36, 37, 36, 39, 37, 41, 40, 37, 42,
+ 39, 43, 203, 36, 41, 40, 54, 42, 39, 43,
+ 40, 54, 127, 39, 40, 43, 44, 127, 45, 41,
+ 40, 46, 42, 43, 44, 43, 45, 43, 42, 46,
+ 43, 45, 59, 59, 46, 47, 135, 46, 134, 48,
+ 45, 45, 44, 47, 47, 71, 46, 48, 50, 133,
+ 46, 47, 131, 71, 46, 72, 50, 73, 47, 74,
+ 75, 48, 70, 48, 50, 73, 76, 74, 75, 73,
+ 74, 50, 77, 128, 76, 75, 78, 80, 128, 82,
+ 77, 76, 79, 65, 78, 80, 74, 82, 76, 81,
+
+ 79, 79, 82, 83, 84, 77, 85, 81, 63, 86,
+ 80, 83, 84, 58, 85, 84, 80, 86, 87, 56,
+ 81, 81, 86, 30, 84, 26, 87, 88, 81, 83,
+ 89, 84, 90, 85, 91, 88, 92, 24, 89, 88,
+ 90, 93, 91, 94, 92, 87, 95, 89, 97, 93,
+ 98, 94, 23, 9, 95, 96, 97, 91, 98, 8,
+ 90, 99, 92, 96, 93, 94, 100, 96, 102, 99,
+ 103, 104, 98, 101, 100, 7, 102, 100, 103, 104,
+ 99, 101, 96, 96, 105, 101, 106, 108, 6, 100,
+ 103, 5, 105, 107, 106, 108, 102, 4, 110, 106,
+
+ 109, 107, 111, 3, 112, 107, 110, 110, 109, 0,
+ 111, 113, 112, 115, 111, 108, 0, 109, 0, 113,
+ 114, 115, 110, 109, 112, 0, 110, 0, 114, 114,
+ 116, 113, 0, 115, 117, 112, 118, 119, 116, 113,
+ 116, 120, 117, 0, 118, 119, 118, 121, 122, 120,
+ 119, 116, 0, 0, 124, 121, 122, 123, 125, 0,
+ 122, 117, 124, 0, 121, 123, 125, 124, 0, 0,
+ 137, 124, 138, 139, 121, 140, 120, 123, 137, 141,
+ 138, 139, 142, 140, 125, 144, 139, 141, 0, 143,
+ 142, 142, 140, 144, 145, 146, 141, 143, 148, 143,
+
+ 143, 0, 145, 146, 147, 149, 148, 150, 0, 148,
+ 151, 144, 147, 149, 0, 150, 150, 0, 151, 0,
+ 0, 145, 146, 152, 154, 147, 153, 149, 147, 0,
+ 155, 152, 154, 156, 153, 154, 151, 151, 155, 153,
+ 157, 156, 158, 152, 159, 0, 161, 0, 157, 0,
+ 158, 160, 159, 157, 161, 161, 162, 157, 155, 160,
+ 156, 163, 160, 164, 162, 165, 0, 166, 167, 163,
+ 0, 164, 159, 165, 164, 166, 167, 168, 165, 170,
+ 166, 167, 163, 0, 0, 168, 169, 170, 171, 172,
+ 0, 167, 168, 173, 169, 166, 171, 172, 170, 169,
+
+ 174, 173, 172, 175, 0, 176, 177, 173, 174, 178,
+ 0, 175, 171, 176, 177, 179, 180, 178, 176, 181,
+ 174, 182, 183, 179, 180, 175, 179, 181, 180, 182,
+ 183, 178, 184, 185, 186, 187, 188, 189, 190, 181,
+ 184, 185, 186, 187, 188, 189, 190, 186, 191, 0,
+ 182, 192, 184, 193, 0, 0, 191, 183, 194, 192,
+ 0, 193, 188, 192, 195, 190, 194, 0, 185, 196,
+ 197, 199, 195, 193, 195, 195, 198, 196, 197, 199,
+ 194, 200, 201, 0, 198, 198, 195, 197, 205, 200,
+ 201, 202, 0, 206, 196, 207, 205, 208, 209, 202,
+
+ 199, 206, 201, 207, 211, 208, 209, 200, 210, 0,
+ 202, 212, 211, 213, 205, 206, 210, 207, 214, 212,
+ 215, 213, 216, 208, 212, 217, 214, 210, 215, 215,
+ 216, 216, 218, 217, 219, 214, 220, 221, 0, 0,
+ 218, 213, 219, 222, 220, 221, 221, 219, 223, 224,
+ 225, 222, 0, 217, 226, 227, 223, 224, 225, 220,
+ 218, 0, 226, 227, 228, 229, 224, 230, 231, 227,
+ 232, 222, 228, 229, 233, 230, 231, 0, 232, 234,
+ 237, 229, 233, 235, 236, 225, 238, 234, 237, 239,
+ 0, 235, 236, 240, 238, 230, 241, 239, 232, 236,
+
+ 242, 240, 243, 244, 241, 234, 233, 245, 242, 235,
+ 243, 244, 246, 247, 238, 245, 248, 0, 249, 250,
+ 246, 247, 251, 252, 248, 243, 249, 250, 254, 248,
+ 251, 252, 253, 0, 0, 0, 254, 0, 246, 257,
+ 253, 247, 253, 255, 0, 256, 250, 257, 251, 259,
+ 252, 255, 254, 256, 255, 258, 0, 259, 256, 0,
+ 260, 261, 259, 258, 258, 262, 263, 257, 260, 261,
+ 0, 264, 265, 262, 263, 266, 267, 268, 261, 264,
+ 265, 262, 0, 266, 267, 268, 269, 265, 270, 267,
+ 268, 0, 0, 272, 269, 263, 270, 273, 274, 269,
+
+ 264, 272, 275, 266, 276, 273, 274, 277, 279, 278,
+ 275, 0, 276, 0, 0, 277, 279, 278, 0, 280,
+ 281, 272, 278, 282, 283, 284, 274, 280, 281, 0,
+ 280, 282, 283, 284, 285, 275, 279, 283, 286, 276,
+ 0, 287, 285, 288, 289, 290, 286, 284, 281, 287,
+ 291, 288, 289, 290, 287, 292, 0, 0, 291, 293,
+ 285, 294, 295, 292, 286, 288, 289, 293, 0, 294,
+ 295, 296, 297, 298, 299, 300, 293, 0, 301, 296,
+ 297, 298, 299, 300, 0, 297, 301, 302, 303, 298,
+ 295, 0, 301, 304, 305, 302, 303, 306, 0, 302,
+
+ 307, 304, 305, 299, 308, 306, 306, 0, 307, 309,
+ 310, 311, 308, 312, 0, 304, 303, 309, 310, 311,
+ 313, 312, 305, 315, 0, 314, 307, 316, 313, 317,
+ 318, 315, 308, 314, 314, 316, 310, 317, 318, 311,
+ 316, 319, 313, 318, 315, 320, 321, 323, 324, 319,
+ 325, 317, 326, 320, 321, 323, 324, 327, 325, 328,
+ 326, 329, 330, 331, 332, 327, 333, 328, 319, 329,
+ 330, 331, 332, 334, 333, 0, 335, 0, 0, 326,
+ 336, 334, 337, 327, 335, 325, 334, 338, 336, 329,
+ 337, 336, 332, 339, 340, 338, 341, 0, 343, 342,
+
+ 344, 339, 340, 346, 341, 337, 343, 342, 344, 345,
+ 338, 346, 0, 0, 347, 348, 349, 345, 0, 350,
+ 340, 342, 347, 348, 349, 0, 348, 350, 351, 344,
+ 352, 354, 0, 349, 355, 345, 351, 353, 352, 354,
+ 347, 356, 355, 352, 355, 353, 353, 357, 358, 356,
+ 359, 360, 0, 361, 362, 357, 358, 354, 359, 360,
+ 357, 361, 362, 363, 364, 365, 366, 362, 367, 368,
+ 361, 363, 364, 365, 366, 369, 367, 368, 370, 360,
+ 371, 372, 0, 369, 373, 374, 370, 375, 371, 372,
+ 370, 368, 373, 374, 366, 375, 367, 376, 0, 377,
+
+ 378, 0, 379, 0, 374, 376, 371, 377, 378, 375,
+ 379, 380, 381, 382, 383, 379, 384, 0, 0, 380,
+ 381, 382, 383, 380, 384, 377, 383, 0, 385, 386,
+ 387, 388, 389, 390, 391, 381, 385, 386, 387, 388,
+ 389, 390, 391, 392, 388, 393, 394, 382, 396, 395,
+ 397, 392, 398, 393, 394, 0, 396, 395, 397, 0,
+ 398, 390, 395, 385, 397, 399, 400, 391, 401, 392,
+ 402, 0, 403, 399, 400, 405, 401, 404, 402, 399,
+ 403, 394, 406, 405, 0, 404, 0, 407, 408, 409,
+ 406, 410, 402, 0, 404, 407, 408, 409, 413, 410,
+
+ 411, 410, 414, 0, 415, 412, 413, 0, 411, 408,
+ 414, 406, 415, 412, 416, 407, 0, 411, 412, 417,
+ 413, 418, 416, 419, 409, 420, 0, 417, 414, 418,
+ 421, 419, 0, 420, 418, 422, 0, 423, 421, 415,
+ 417, 0, 0, 422, 419, 423, 0, 0, 420, 0,
+ 0, 0, 0, 0, 0, 0, 421, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 422, 425,
425, 425, 425, 425, 425, 425, 425, 425, 426, 426,
- 426, 426, 426, 426, 426, 426, 426, 427, 345, 427,
- 428, 428, 428, 429, 429, 342, 429, 430, 430, 430,
- 430, 340, 430, 430, 430, 430, 431, 431, 431, 431,
- 431, 431, 431, 431, 431, 432, 432, 432, 338, 432,
- 432, 432, 432, 432, 433, 334, 433, 433, 433, 433,
- 433, 433, 433, 434, 332, 330, 329, 327, 323, 434,
- 435, 322, 435, 321, 320, 319, 311, 308, 299, 295,
-
- 293, 291, 290, 289, 281, 276, 272, 270, 269, 259,
- 248, 244, 243, 241, 240, 239, 238, 236, 230, 227,
- 225, 222, 210, 208, 202, 190, 188, 186, 176, 161,
- 157, 137, 136, 134, 133, 132, 130, 104, 103, 96,
- 94, 77, 71, 70, 65, 63, 58, 56, 38, 30,
- 26, 24, 23, 9, 8, 7, 6, 5, 4, 3,
- 423, 423, 423, 423, 423, 423, 423, 423, 423, 423,
- 423, 423, 423, 423, 423, 423, 423, 423, 423, 423,
- 423, 423, 423, 423, 423, 423, 423, 423, 423, 423,
- 423, 423, 423, 423, 423, 423, 423, 423, 423, 423,
-
- 423, 423, 423, 423, 423, 423, 423, 423, 423, 423,
- 423, 423, 423, 423, 423, 423
+ 426, 426, 426, 426, 426, 426, 426, 427, 427, 427,
+ 427, 427, 427, 427, 427, 427, 428, 0, 428, 429,
+
+ 429, 429, 430, 430, 0, 430, 431, 431, 431, 431,
+ 0, 431, 431, 431, 431, 432, 432, 432, 432, 432,
+ 432, 432, 432, 432, 433, 433, 433, 0, 433, 433,
+ 433, 433, 433, 434, 0, 434, 434, 434, 434, 434,
+ 434, 434, 435, 0, 0, 0, 0, 0, 435, 436,
+ 436, 0, 436, 437, 0, 437, 424, 424, 424, 424,
+ 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
+ 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
+ 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
+ 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
+
+ 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
+ 424, 424, 424
} ;
static yy_state_type yy_last_accepting_state;
@@ -847,6 +966,7 @@ Created 12/14/1997 Heikki Tuuri
#define realloc(P, A) ut_realloc(P, A)
#define exit(A) ut_error
+/* Note: We cast &result to int* from yysize_t* */
#define YY_INPUT(buf, result, max_size) \
(result = pars_get_lex_chars(buf, max_size))
@@ -883,7 +1003,7 @@ string_append(
-#line 887 "lexyy.cc"
+#line 1006 "lexyy.cc"
#define INITIAL 0
#define comment 1
@@ -965,7 +1085,12 @@ static int input (void );
/* Amount of stuff to slurp up with each read. */
#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
#endif
/* Copy whatever the last rule matched to the standard output. */
@@ -973,7 +1098,7 @@ static int input (void );
/* This used to be an fputs(), but since the string might contain NUL's,
* we now use fwrite().
*/
-#define ECHO fwrite( yytext, yyleng, 1, yyout )
+#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0)
#endif
/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
@@ -984,7 +1109,7 @@ static int input (void );
if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
{ \
int c = '*'; \
- yy_size_t n; \
+ size_t n; \
for ( n = 0; n < max_size && \
(c = getc( yyin )) != EOF && c != '\n'; ++n ) \
buf[n] = (char) c; \
@@ -1069,7 +1194,7 @@ YY_DECL
#line 112 "pars0lex.l"
-#line 1073 "lexyy.cc"
+#line 1197 "lexyy.cc"
if ( !(yy_init) )
{
@@ -1122,13 +1247,13 @@ yy_match:
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 424 )
+ if ( yy_current_state >= 425 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
++yy_cp;
}
- while ( yy_current_state != 423 );
+ while ( yy_current_state != 424 );
yy_cp = (yy_last_accepting_cpos);
yy_current_state = (yy_last_accepting_state);
@@ -2109,7 +2234,7 @@ YY_RULE_SETUP
#line 691 "pars0lex.l"
YY_FATAL_ERROR( "flex scanner jammed" );
YY_BREAK
-#line 2113 "lexyy.cc"
+#line 2237 "lexyy.cc"
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(comment):
case YY_STATE_EOF(quoted):
@@ -2299,7 +2424,7 @@ static int yy_get_next_buffer (void)
else
{
- yy_size_t num_to_read =
+ int num_to_read =
YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
while ( num_to_read <= 0 )
@@ -2313,16 +2438,16 @@ static int yy_get_next_buffer (void)
if ( b->yy_is_our_buffer )
{
- yy_size_t new_size = b->yy_buf_size * 2;
+ int new_size = b->yy_buf_size * 2;
if ( new_size <= 0 )
b->yy_buf_size += b->yy_buf_size / 8;
else
b->yy_buf_size *= 2;
- b->yy_ch_buf = (char*)
+ b->yy_ch_buf = (char *)
/* Include room in for 2 EOB chars. */
- yyrealloc((void*) b->yy_ch_buf,b->yy_buf_size + 2 );
+ yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 );
}
else
/* Can't grow it, we don't own it. */
@@ -2344,7 +2469,7 @@ static int yy_get_next_buffer (void)
/* Read in more data. */
YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
- (yy_n_chars), num_to_read );
+ (yy_n_chars), (size_t) num_to_read );
YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
}
@@ -2371,7 +2496,7 @@ static int yy_get_next_buffer (void)
if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
/* Extend the array by 50%, plus the number we really need. */
yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char*) yyrealloc((void*) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size );
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size );
if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
}
@@ -2387,7 +2512,7 @@ static int yy_get_next_buffer (void)
/* yy_get_previous_state - get the state just before the EOB char was reached */
- static yy_state_type yy_get_previous_state (void)
+ yy_state_type yy_get_previous_state (void)
{
register yy_state_type yy_current_state;
register char *yy_cp;
@@ -2405,7 +2530,7 @@ static int yy_get_next_buffer (void)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 424 )
+ if ( yy_current_state >= 425 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
@@ -2419,7 +2544,7 @@ static int yy_get_next_buffer (void)
* synopsis
* next_state = yy_try_NUL_trans( current_state );
*/
- static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state )
+ static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state )
{
register int yy_is_jam;
register char *yy_cp = (yy_c_buf_p);
@@ -2433,11 +2558,11 @@ static int yy_get_next_buffer (void)
while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
{
yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 424 )
+ if ( yy_current_state >= 425 )
yy_c = yy_meta[(unsigned int) yy_c];
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 423);
+ yy_is_jam = (yy_current_state == 424);
return yy_is_jam ? 0 : yy_current_state;
}
@@ -2466,7 +2591,7 @@ static int yy_get_next_buffer (void)
else
{ /* need more input */
- yy_size_t offset = (yy_c_buf_p) - (yytext_ptr);
+ int offset = (int)((yy_c_buf_p) - (yytext_ptr));
++(yy_c_buf_p);
switch ( yy_get_next_buffer( ) )
@@ -2490,7 +2615,7 @@ static int yy_get_next_buffer (void)
case EOB_ACT_END_OF_FILE:
{
if ( yywrap( ) )
- return 0;
+ return EOF;
if ( ! (yy_did_buffer_switch_on_eof) )
YY_NEW_FILE;
@@ -2508,7 +2633,7 @@ static int yy_get_next_buffer (void)
}
}
- c = *(unsigned char*) (yy_c_buf_p); /* cast for 8-bit char's */
+ c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */
*(yy_c_buf_p) = '\0'; /* preserve yytext */
(yy_hold_char) = *++(yy_c_buf_p);
@@ -2518,7 +2643,7 @@ static int yy_get_next_buffer (void)
/** Immediately switch to a different input stream.
* @param input_file A readable stream.
- *
+ *
* @note This function does not reset the start condition to @c INITIAL .
*/
void yyrestart (FILE * input_file )
@@ -2536,7 +2661,7 @@ static int yy_get_next_buffer (void)
/** Switch to a different input buffer.
* @param new_buffer The new input buffer.
- *
+ *
*/
__attribute__((unused)) static void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer )
{
@@ -2580,7 +2705,7 @@ static void yy_load_buffer_state (void)
/** Allocate and initialize an input buffer state.
* @param file A readable stream.
* @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- *
+ *
* @return the allocated buffer state.
*/
static YY_BUFFER_STATE yy_create_buffer (FILE * file, int size )
@@ -2596,7 +2721,7 @@ static void yy_load_buffer_state (void)
/* yy_ch_buf has to be 2 characters longer than the size given because
* we need to put in 2 end-of-buffer characters.
*/
- b->yy_ch_buf = (char*) yyalloc(b->yy_buf_size + 2 );
+ b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 );
if ( ! b->yy_ch_buf )
YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
@@ -2609,9 +2734,9 @@ static void yy_load_buffer_state (void)
/** Destroy the buffer.
* @param b a buffer created with yy_create_buffer()
- *
+ *
*/
- void yy_delete_buffer (YY_BUFFER_STATE b )
+ void yy_delete_buffer (YY_BUFFER_STATE b )
{
if ( ! b )
@@ -2621,20 +2746,20 @@ static void yy_load_buffer_state (void)
YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
if ( b->yy_is_our_buffer )
- yyfree((void*) b->yy_ch_buf );
+ yyfree((void *) b->yy_ch_buf );
- yyfree((void*) b );
+ yyfree((void *) b );
}
/* Initializes or reinitializes a buffer.
* This function is sometimes called more than once on the same buffer,
* such as during a yyrestart() or at EOF.
*/
- static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file )
+ static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file )
{
int oerrno = errno;
-
+
yy_flush_buffer(b );
b->yy_input_file = file;
@@ -2650,13 +2775,13 @@ static void yy_load_buffer_state (void)
}
b->yy_is_interactive = 0;
-
+
errno = oerrno;
}
/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
* @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- *
+ *
*/
void yy_flush_buffer (YY_BUFFER_STATE b )
{
@@ -2685,7 +2810,7 @@ static void yy_load_buffer_state (void)
* the current state. This function will allocate the stack
* if necessary.
* @param new_buffer The new state.
- *
+ *
*/
void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
{
@@ -2715,7 +2840,7 @@ void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
/** Removes and deletes the top of the stack, if present.
* The next element becomes the new top.
- *
+ *
*/
void yypop_buffer_state (void)
{
@@ -2738,8 +2863,8 @@ void yypop_buffer_state (void)
*/
static void yyensure_buffer_stack (void)
{
- yy_size_t num_to_alloc;
-
+ int num_to_alloc;
+
if (!(yy_buffer_stack)) {
/* First allocation is just for 2 elements, since we don't know if this
@@ -2747,7 +2872,7 @@ static void yyensure_buffer_stack (void)
* immediate realloc on the next call.
*/
num_to_alloc = 1;
- (yy_buffer_stack) = (struct yy_buffer_state**) yyalloc
+ (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc
(num_to_alloc * sizeof(struct yy_buffer_state*)
);
if ( ! (yy_buffer_stack) )
@@ -2766,7 +2891,7 @@ static void yyensure_buffer_stack (void)
int grow_size = 8 /* arbitrary grow size */;
num_to_alloc = (yy_buffer_stack_max) + grow_size;
- (yy_buffer_stack) = (struct yy_buffer_state**) yyrealloc
+ (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc
((yy_buffer_stack),
num_to_alloc * sizeof(struct yy_buffer_state*)
);
@@ -2809,7 +2934,7 @@ static void yy_fatal_error (yyconst char* msg )
/* Accessor methods (get/set functions) to struct members. */
/** Get the current line number.
- *
+ *
*/
int yyget_lineno (void)
{
@@ -2818,7 +2943,7 @@ int yyget_lineno (void)
}
/** Get the input stream.
- *
+ *
*/
FILE *yyget_in (void)
{
@@ -2826,7 +2951,7 @@ FILE *yyget_in (void)
}
/** Get the output stream.
- *
+ *
*/
FILE *yyget_out (void)
{
@@ -2834,7 +2959,7 @@ FILE *yyget_out (void)
}
/** Get the length of the current token.
- *
+ *
*/
yy_size_t yyget_leng (void)
{
@@ -2842,7 +2967,7 @@ yy_size_t yyget_leng (void)
}
/** Get the current token.
- *
+ *
*/
char *yyget_text (void)
@@ -2852,18 +2977,18 @@ char *yyget_text (void)
/** Set the current line number.
* @param line_number
- *
+ *
*/
void yyset_lineno (int line_number )
{
-
+
yylineno = line_number;
}
/** Set the input stream. This does not discard the current
* input buffer.
* @param in_str A readable stream.
- *
+ *
* @see yy_switch_to_buffer
*/
void yyset_in (FILE * in_str )
@@ -2895,7 +3020,7 @@ static int yy_init_globals (void)
(yy_buffer_stack) = 0;
(yy_buffer_stack_top) = 0;
(yy_buffer_stack_max) = 0;
- (yy_c_buf_p) = (char*) 0;
+ (yy_c_buf_p) = (char *) 0;
(yy_init) = 0;
(yy_start) = 0;
@@ -2904,8 +3029,8 @@ static int yy_init_globals (void)
yyin = stdin;
yyout = stdout;
#else
- yyin = (FILE*) 0;
- yyout = (FILE*) 0;
+ yyin = (FILE *) 0;
+ yyout = (FILE *) 0;
#endif
/* For future reference: Set errno on error, since we are called by
@@ -2917,7 +3042,7 @@ static int yy_init_globals (void)
/* yylex_destroy is for both reentrant and non-reentrant scanners. */
__attribute__((unused)) static int yylex_destroy (void)
{
-
+
/* Pop the buffer stack, destroying each element. */
while(YY_CURRENT_BUFFER){
yy_delete_buffer(YY_CURRENT_BUFFER );
@@ -2962,24 +3087,24 @@ static int yy_flex_strlen (yyconst char * s )
void *yyalloc (yy_size_t size )
{
- return (void*) malloc( size );
+ return (void *) malloc( size );
}
void *yyrealloc (void * ptr, yy_size_t size )
{
- /* The cast to (char*) in the following accommodates both
+ /* The cast to (char *) in the following accommodates both
* implementations that use char* generic pointers, and those
* that use void* generic pointers. It works with the latter
* because both ANSI C and C++ allow castless assignment from
* any pointer type to void*, and deal with argument conversions
* as though doing an assignment.
*/
- return (void*) realloc( (char*) ptr, size );
+ return (void *) realloc( (char *) ptr, size );
}
void yyfree (void * ptr )
{
- free( (char*) ptr ); /* see yyrealloc() for (char*) cast */
+ free( (char*) ptr ); /* see yyrealloc() for (char *) cast */
}
#define YYTABLES_NAME "yytables"
diff --git a/storage/innobase/pars/pars0lex.l b/storage/innobase/pars/pars0lex.l
index 2446e40cde8..83c3af4b6c5 100644
--- a/storage/innobase/pars/pars0lex.l
+++ b/storage/innobase/pars/pars0lex.l
@@ -102,7 +102,7 @@ string_append(
DIGIT [0-9]
ID [a-z_A-Z][a-z_A-Z0-9]*
-TABLE_NAME [a-z_A-Z][a-z_A-Z0-9]*\/(#sql-|[a-z_A-Z])[a-z_A-Z0-9]*
+TABLE_NAME [a-z_A-Z][@a-z_A-Z0-9]*\/(#sql-|[a-z_A-Z])[a-z_A-Z0-9]*
BOUND_LIT \:[a-z_A-Z0-9]+
BOUND_ID \$[a-z_A-Z0-9]+
diff --git a/storage/innobase/pars/pars0opt.cc b/storage/innobase/pars/pars0opt.cc
index e5f347eedd6..cbed2b39eeb 100644
--- a/storage/innobase/pars/pars0opt.cc
+++ b/storage/innobase/pars/pars0opt.cc
@@ -345,7 +345,7 @@ opt_calc_index_goodness(
/* At least for now we don't support using FTS indexes for queries
done through InnoDB's own SQL parser. */
- if (index->type == DICT_FTS) {
+ if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) {
return(0);
}
@@ -400,7 +400,7 @@ opt_calc_index_goodness(
}
}
- /* We have to test for goodness here, as last_op may note be set */
+ /* We have to test for goodness here, as last_op may not be set */
if (goodness && dict_index_is_clust(index)) {
goodness++;
diff --git a/storage/innobase/pars/pars0pars.cc b/storage/innobase/pars/pars0pars.cc
index a4ab85adc36..f82610e62d0 100644
--- a/storage/innobase/pars/pars0pars.cc
+++ b/storage/innobase/pars/pars0pars.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -859,7 +859,8 @@ pars_retrieve_table_def(
sym_node->resolved = TRUE;
sym_node->token_type = SYM_TABLE_REF_COUNTED;
- sym_node->table = dict_table_open_on_name(sym_node->name, TRUE);
+ sym_node->table = dict_table_open_on_name(
+ sym_node->name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
ut_a(sym_node->table != NULL);
}
@@ -1115,8 +1116,8 @@ pars_function_declaration(
sym_node->token_type = SYM_FUNCTION;
/* Check that the function exists. */
- ut_a(pars_info_get_user_func(pars_sym_tab_global->info,
- sym_node->name));
+ ut_a(pars_info_lookup_user_func(
+ pars_sym_tab_global->info, sym_node->name));
return(sym_node);
}
@@ -1782,8 +1783,9 @@ pars_fetch_statement(
} else {
pars_resolve_exp_variables_and_types(NULL, user_func);
- node->func = pars_info_get_user_func(pars_sym_tab_global->info,
- user_func->name);
+ node->func = pars_info_lookup_user_func(
+ pars_sym_tab_global->info, user_func->name);
+
ut_a(node->func);
node->into_list = NULL;
@@ -1941,9 +1943,23 @@ pars_create_table(
const dtype_t* dtype;
ulint n_cols;
ulint flags = 0;
+ ulint flags2 = 0;
if (compact != NULL) {
+
+ /* System tables currently only use the REDUNDANT row
+ format therefore the check for srv_file_per_table should be
+ safe for now. */
+
flags |= DICT_TF_COMPACT;
+
+ /* FIXME: Ideally this should be part of the SQL syntax
+ or use some other mechanism. We want to reduce dependency
+ on global variables. There is an inherent race here but
+ that has always existed around this variable. */
+ if (srv_file_per_table) {
+ flags2 |= DICT_TF2_USE_TABLESPACE;
+ }
}
if (block_size != NULL) {
@@ -1974,10 +1990,8 @@ pars_create_table(
n_cols = que_node_list_get_len(column_defs);
- /* As the InnoDB SQL parser is for internal use only,
- for creating some system tables, this function will only
- create tables in the old (not compact) record format. */
- table = dict_mem_table_create(table_sym->name, 0, n_cols, flags, 0);
+ table = dict_mem_table_create(
+ table_sym->name, 0, n_cols, flags, flags2);
#ifdef UNIV_DEBUG
if (not_fit_in_memory != NULL) {
@@ -1998,7 +2012,7 @@ pars_create_table(
column = static_cast<sym_node_t*>(que_node_get_next(column));
}
- node = tab_create_graph_create(table, pars_sym_tab_global->heap);
+ node = tab_create_graph_create(table, pars_sym_tab_global->heap, true);
table_sym->resolved = TRUE;
table_sym->token_type = SYM_TABLE;
@@ -2052,7 +2066,7 @@ pars_create_index(
column = static_cast<sym_node_t*>(que_node_get_next(column));
}
- node = ind_create_graph_create(index, pars_sym_tab_global->heap);
+ node = ind_create_graph_create(index, pars_sym_tab_global->heap, true);
table_sym->resolved = TRUE;
table_sym->token_type = SYM_TABLE;
@@ -2251,7 +2265,7 @@ que_thr_t*
pars_complete_graph_for_exec(
/*=========================*/
que_node_t* node, /*!< in: root node for an incomplete
- query graph */
+ query graph, or NULL for dummy graph */
trx_t* trx, /*!< in: transaction handle */
mem_heap_t* heap) /*!< in: memory heap from which allocated */
{
@@ -2265,7 +2279,9 @@ pars_complete_graph_for_exec(
thr->child = node;
- que_node_set_parent(node, thr);
+ if (node) {
+ que_node_set_parent(node, thr);
+ }
trx->graph = NULL;
@@ -2478,7 +2494,7 @@ pars_info_bind_int8_literal(
const char* name, /* in: name */
const ib_uint64_t* val) /* in: value */
{
- pars_bound_lit_t* pbl;
+ pars_bound_lit_t* pbl;
pbl = pars_info_lookup_bound_lit(info, name);
@@ -2519,6 +2535,33 @@ pars_info_add_ull_literal(
}
/****************************************************************//**
+If the literal value already exists then it rebinds otherwise it
+creates a new entry. */
+UNIV_INTERN
+void
+pars_info_bind_ull_literal(
+/*=======================*/
+ pars_info_t* info, /*!< in: info struct */
+ const char* name, /*!< in: name */
+ const ib_uint64_t* val) /*!< in: value */
+{
+ pars_bound_lit_t* pbl;
+
+ pbl = pars_info_lookup_bound_lit(info, name);
+
+ if (!pbl) {
+ pars_info_add_literal(
+ info, name, val, sizeof(*val), DATA_FIXBINARY, 0);
+ } else {
+
+ pbl->address = val;
+ pbl->length = sizeof(*val);
+
+ sym_tab_rebind_lit(pbl->node, val, sizeof(*val));
+ }
+}
+
+/****************************************************************//**
Add user function. */
UNIV_INTERN
void
@@ -2605,19 +2648,6 @@ pars_info_get_bound_id(
}
/****************************************************************//**
-Get user function with the given name.
-@return user func, or NULL if not found */
-UNIV_INTERN
-pars_user_func_t*
-pars_info_get_user_func(
-/*====================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name) /*!< in: function name to find*/
-{
- return(pars_info_lookup_user_func(info, name));
-}
-
-/****************************************************************//**
Get bound literal with the given name.
@return bound literal, or NULL if not found */
UNIV_INTERN
diff --git a/storage/innobase/pars/pars0sym.cc b/storage/innobase/pars/pars0sym.cc
index c71ad8a6b39..b01a69cb33a 100644
--- a/storage/innobase/pars/pars0sym.cc
+++ b/storage/innobase/pars/pars0sym.cc
@@ -84,7 +84,7 @@ sym_tab_free_private(
if (sym->token_type == SYM_TABLE_REF_COUNTED) {
- dict_table_close(sym->table, TRUE);
+ dict_table_close(sym->table, TRUE, FALSE);
sym->table = NULL;
sym->resolved = FALSE;
diff --git a/storage/innobase/que/que0que.cc b/storage/innobase/que/que0que.cc
index c023723685c..fb185959d56 100644
--- a/storage/innobase/que/que0que.cc
+++ b/storage/innobase/que/que0que.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -1248,7 +1248,7 @@ loop:
Evaluate the given SQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-enum db_err
+dberr_t
que_eval_sql(
/*=========*/
pars_info_t* info, /*!< in: info struct, or NULL */
diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc
index 02d78d657c6..14dc9ee5e7f 100644
--- a/storage/innobase/read/read0read.cc
+++ b/storage/innobase/read/read0read.cc
@@ -174,59 +174,6 @@ The order does not matter. No new transactions can be created and no running
transaction can commit or rollback (or free views).
*/
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Validates a read view object. */
-static
-ibool
-read_view_validate(
-/*===============*/
- const read_view_t* view) /*!< in: view to validate */
-{
- ulint i;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- /* Check that the view->trx_ids array is in descending order. */
- for (i = 1; i < view->n_trx_ids; ++i) {
-
- ut_a(view->trx_ids[i] < view->trx_ids[i - 1]);
- }
-
- return(TRUE);
-}
-
-/** Functor to validate the view list. */
-struct Check {
-
- Check() : m_prev_view(0) { }
-
- void operator()(const read_view_t* view)
- {
- ut_a(m_prev_view == NULL
- || m_prev_view->low_limit_no >= view->low_limit_no);
-
- m_prev_view = view;
- }
-
- const read_view_t* m_prev_view;
-};
-
-/*********************************************************************//**
-Validates a read view list. */
-static
-ibool
-read_view_list_validate(void)
-/*=========================*/
-{
- ut_ad(mutex_own(&trx_sys->mutex));
-
- ut_list_map(trx_sys->view_list, &read_view_t::view_list, Check());
-
- return(TRUE);
-}
-#endif
-
/*********************************************************************//**
Creates a read view object.
@return own: read view struct */
@@ -530,25 +477,6 @@ read_view_purge_open(
}
/*********************************************************************//**
-Remove a read view from the trx_sys->view_list. */
-UNIV_INTERN
-void
-read_view_remove(
-/*=============*/
- read_view_t* view) /*!< in: read view */
-{
- mutex_enter(&trx_sys->mutex);
-
- ut_ad(read_view_validate(view));
-
- UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
-
- ut_ad(read_view_list_validate());
-
- mutex_exit(&trx_sys->mutex);
-}
-
-/*********************************************************************//**
Closes a consistent read view for MySQL. This function is called at an SQL
statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
UNIV_INTERN
@@ -559,7 +487,7 @@ read_view_close_for_mysql(
{
ut_a(trx->global_read_view);
- read_view_remove(trx->global_read_view);
+ read_view_remove(trx->global_read_view, false);
mem_heap_empty(trx->global_read_view_heap);
@@ -692,7 +620,7 @@ read_cursor_view_close_for_mysql(
belong to this transaction */
trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use;
- read_view_remove(curview->read_view);
+ read_view_remove(curview->read_view, false);
trx->read_view = trx->global_read_view;
diff --git a/storage/innobase/rem/rem0cmp.cc b/storage/innobase/rem/rem0cmp.cc
index 19f5633953a..db0fdf3ee21 100644
--- a/storage/innobase/rem/rem0cmp.cc
+++ b/storage/innobase/rem/rem0cmp.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,6 +30,7 @@ Created 7/1/1994 Heikki Tuuri
#endif
#include "ha_prototypes.h"
+#include "handler0alter.h"
#include "srv0srv.h"
/* ALPHABETICAL ORDER
@@ -69,10 +70,12 @@ cmp_debug_dtuple_rec_with_match(
has an equal number or more fields than
dtuple */
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields);/*!< in/out: number of already
+ ulint n_cmp, /*!< in: number of fields to compare */
+ ulint* matched_fields)/*!< in/out: number of already
completely matched fields; when function
returns, contains the value for current
comparison */
+ __attribute__((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
/*************************************************************//**
This function is used to compare two data fields for which the data type
@@ -621,14 +624,15 @@ respectively, when only the common first fields are compared, or until
the first externally stored field in rec */
UNIV_INTERN
int
-cmp_dtuple_rec_with_match(
-/*======================*/
+cmp_dtuple_rec_with_match_low(
+/*==========================*/
const dtuple_t* dtuple, /*!< in: data tuple */
const rec_t* rec, /*!< in: physical record which differs from
dtuple in some of the common fields, or which
has an equal number or more fields than
dtuple */
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n_cmp, /*!< in: number of fields to compare */
ulint* matched_fields, /*!< in/out: number of already completely
matched fields; when function returns,
contains the value for current comparison */
@@ -652,7 +656,7 @@ cmp_dtuple_rec_with_match(
ulint cur_field; /* current field number */
ulint cur_bytes; /* number of already matched bytes
in current field */
- int ret = 3333; /* return value */
+ int ret; /* return value */
ut_ad(dtuple && rec && matched_fields && matched_bytes);
ut_ad(dtuple_check_typed(dtuple));
@@ -661,7 +665,9 @@ cmp_dtuple_rec_with_match(
cur_field = *matched_fields;
cur_bytes = *matched_bytes;
- ut_ad(cur_field <= dtuple_get_n_fields_cmp(dtuple));
+ ut_ad(n_cmp > 0);
+ ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
+ ut_ad(cur_field <= n_cmp);
ut_ad(cur_field <= rec_offs_n_fields(offsets));
if (cur_bytes == 0 && cur_field == 0) {
@@ -681,7 +687,7 @@ cmp_dtuple_rec_with_match(
/* Match fields in a loop; stop if we run out of fields in dtuple
or find an externally stored field */
- while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
+ while (cur_field < n_cmp) {
ulint mtype;
ulint prtype;
@@ -838,7 +844,7 @@ next_field:
order_resolved:
ut_ad((ret >= - 1) && (ret <= 1));
ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, offsets,
- matched_fields));
+ n_cmp, matched_fields));
ut_ad(*matched_fields == cur_field); /* In the debug version, the
above cmp_debug_... sets
*matched_fields to a value */
@@ -909,156 +915,181 @@ cmp_dtuple_is_prefix_of_rec(
}
/*************************************************************//**
-Compare two physical records that contain the same number of columns,
-none of which are stored externally.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively, than rec2 */
-UNIV_INTERN
+Compare two physical record fields.
+@retval 1 if rec1 field is greater than rec2
+@retval -1 if rec1 field is less than rec2
+@retval 0 if rec1 field equals to rec2 */
+static __attribute__((nonnull, warn_unused_result))
int
-cmp_rec_rec_simple(
-/*===============*/
+cmp_rec_rec_simple_field(
+/*=====================*/
const rec_t* rec1, /*!< in: physical record */
const rec_t* rec2, /*!< in: physical record */
const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
const dict_index_t* index, /*!< in: data dictionary index */
- ibool* null_eq)/*!< out: set to TRUE if
- found matching null values */
+ ulint n) /*!< in: field to compare */
{
- ulint rec1_f_len; /*!< length of current field in rec1 */
- const byte* rec1_b_ptr; /*!< pointer to the current byte
- in rec1 field */
- ulint rec1_byte; /*!< value of current byte to be
- compared in rec1 */
- ulint rec2_f_len; /*!< length of current field in rec2 */
- const byte* rec2_b_ptr; /*!< pointer to the current byte
- in rec2 field */
- ulint rec2_byte; /*!< value of current byte to be
- compared in rec2 */
- ulint cur_field; /*!< current field number */
- ulint n_uniq;
-
- n_uniq = dict_index_get_n_unique(index);
- ut_ad(rec_offs_n_fields(offsets1) >= n_uniq);
- ut_ad(rec_offs_n_fields(offsets2) >= n_uniq);
-
- ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
+ const byte* rec1_b_ptr;
+ const byte* rec2_b_ptr;
+ ulint rec1_f_len;
+ ulint rec2_f_len;
+ const dict_col_t* col = dict_index_get_nth_col(index, n);
- for (cur_field = 0; cur_field < n_uniq; cur_field++) {
+ ut_ad(!rec_offs_nth_extern(offsets1, n));
+ ut_ad(!rec_offs_nth_extern(offsets2, n));
- ulint cur_bytes;
- ulint mtype;
- ulint prtype;
-
- {
- const dict_col_t* col
- = dict_index_get_nth_col(index, cur_field);
+ rec1_b_ptr = rec_get_nth_field(rec1, offsets1, n, &rec1_f_len);
+ rec2_b_ptr = rec_get_nth_field(rec2, offsets2, n, &rec2_f_len);
- mtype = col->mtype;
- prtype = col->prtype;
+ if (rec1_f_len == UNIV_SQL_NULL || rec2_f_len == UNIV_SQL_NULL) {
+ if (rec1_f_len == rec2_f_len) {
+ return(0);
}
+ /* We define the SQL null to be the smallest possible
+ value of a field in the alphabetical order */
+ return(rec1_f_len == UNIV_SQL_NULL ? -1 : 1);
+ }
- ut_ad(!rec_offs_nth_extern(offsets1, cur_field));
- ut_ad(!rec_offs_nth_extern(offsets2, cur_field));
-
- rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
- cur_field, &rec1_f_len);
- rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
- cur_field, &rec2_f_len);
+ if (col->mtype >= DATA_FLOAT
+ || (col->mtype == DATA_BLOB
+ && !(col->prtype & DATA_BINARY_TYPE)
+ && dtype_get_charset_coll(col->prtype)
+ != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
+ return(cmp_whole_field(col->mtype, col->prtype,
+ rec1_b_ptr, (unsigned) rec1_f_len,
+ rec2_b_ptr, (unsigned) rec2_f_len));
+ }
- if (rec1_f_len == UNIV_SQL_NULL
- || rec2_f_len == UNIV_SQL_NULL) {
+ /* Compare the fields */
+ for (ulint cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
+ ulint rec1_byte;
+ ulint rec2_byte;
- if (rec1_f_len == rec2_f_len) {
- if (null_eq) {
- *null_eq = TRUE;
- }
+ if (rec2_f_len <= cur_bytes) {
+ if (rec1_f_len <= cur_bytes) {
+ return(0);
+ }
- goto next_field;
+ rec2_byte = dtype_get_pad_char(
+ col->mtype, col->prtype);
- } else if (rec2_f_len == UNIV_SQL_NULL) {
+ if (rec2_byte == ULINT_UNDEFINED) {
+ return(1);
+ }
+ } else {
+ rec2_byte = *rec2_b_ptr;
+ }
- /* We define the SQL null to be the
- smallest possible value of a field
- in the alphabetical order */
+ if (rec1_f_len <= cur_bytes) {
+ rec1_byte = dtype_get_pad_char(
+ col->mtype, col->prtype);
- return(1);
- } else {
+ if (rec1_byte == ULINT_UNDEFINED) {
return(-1);
}
+ } else {
+ rec1_byte = *rec1_b_ptr;
}
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
- int ret = cmp_whole_field(mtype, prtype,
- rec1_b_ptr,
- (unsigned) rec1_f_len,
- rec2_b_ptr,
- (unsigned) rec2_f_len);
- if (ret) {
- return(ret);
- }
+ if (rec1_byte == rec2_byte) {
+ /* If the bytes are equal, they will remain such
+ even after the collation transformation below */
+ continue;
+ }
- goto next_field;
+ if (col->mtype <= DATA_CHAR
+ || (col->mtype == DATA_BLOB
+ && !(col->prtype & DATA_BINARY_TYPE))) {
+
+ rec1_byte = cmp_collate(rec1_byte);
+ rec2_byte = cmp_collate(rec2_byte);
}
- /* Compare the fields */
- for (cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
- if (rec2_f_len <= cur_bytes) {
+ if (rec1_byte < rec2_byte) {
+ return(-1);
+ } else if (rec1_byte > rec2_byte) {
+ return(1);
+ }
+ }
+}
- if (rec1_f_len <= cur_bytes) {
+/*************************************************************//**
+Compare two physical records that contain the same number of columns,
+none of which are stored externally.
+@retval 1 if rec1 (including non-ordering columns) is greater than rec2
+@retval -1 if rec1 (including non-ordering columns) is less than rec2
+@retval 0 if rec1 is a duplicate of rec2 */
+UNIV_INTERN
+int
+cmp_rec_rec_simple(
+/*===============*/
+ const rec_t* rec1, /*!< in: physical record */
+ const rec_t* rec2, /*!< in: physical record */
+ const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
+ const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
+ const dict_index_t* index, /*!< in: data dictionary index */
+ struct TABLE* table) /*!< in: MySQL table, for reporting
+ duplicate key value if applicable,
+ or NULL */
+{
+ ulint n;
+ ulint n_uniq = dict_index_get_n_unique(index);
+ bool null_eq = false;
- goto next_field;
- }
+ ut_ad(rec_offs_n_fields(offsets1) >= n_uniq);
+ ut_ad(rec_offs_n_fields(offsets2) == rec_offs_n_fields(offsets2));
- rec2_byte = dtype_get_pad_char(mtype, prtype);
+ ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
- if (rec2_byte == ULINT_UNDEFINED) {
- return(1);
- }
- } else {
- rec2_byte = *rec2_b_ptr;
- }
+ for (n = 0; n < n_uniq; n++) {
+ int cmp = cmp_rec_rec_simple_field(
+ rec1, rec2, offsets1, offsets2, index, n);
- if (rec1_f_len <= cur_bytes) {
- rec1_byte = dtype_get_pad_char(mtype, prtype);
+ if (cmp) {
+ return(cmp);
+ }
- if (rec1_byte == ULINT_UNDEFINED) {
- return(-1);
- }
- } else {
- rec1_byte = *rec1_b_ptr;
- }
+ /* If the fields are internally equal, they must both
+ be NULL or non-NULL. */
+ ut_ad(rec_offs_nth_sql_null(offsets1, n)
+ == rec_offs_nth_sql_null(offsets2, n));
- if (rec1_byte == rec2_byte) {
- /* If the bytes are equal, they will remain
- such even after the collation transformation
- below */
+ if (rec_offs_nth_sql_null(offsets1, n)) {
+ ut_ad(!(dict_index_get_nth_col(index, n)->prtype
+ & DATA_NOT_NULL));
+ null_eq = true;
+ }
+ }
- continue;
- }
+ /* If we ran out of fields, the ordering columns of rec1 were
+ equal to rec2. Issue a duplicate key error if needed. */
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && !(prtype & DATA_BINARY_TYPE))) {
+ if (!null_eq && table && dict_index_is_unique(index)) {
+ /* Report erroneous row using new version of table. */
+ innobase_rec_to_mysql(table, rec1, index, offsets1);
+ return(0);
+ }
- rec1_byte = cmp_collate(rec1_byte);
- rec2_byte = cmp_collate(rec2_byte);
- }
+ /* Else, keep comparing so that we have the full internal
+ order. */
+ for (; n < dict_index_get_n_fields(index); n++) {
+ int cmp = cmp_rec_rec_simple_field(
+ rec1, rec2, offsets1, offsets2, index, n);
- if (rec1_byte < rec2_byte) {
- return(-1);
- } else if (rec1_byte > rec2_byte) {
- return(1);
- }
+ if (cmp) {
+ return(cmp);
}
-next_field:
- continue;
+
+ /* If the fields are internally equal, they must both
+ be NULL or non-NULL. */
+ ut_ad(rec_offs_nth_sql_null(offsets1, n)
+ == rec_offs_nth_sql_null(offsets2, n));
}
- /* If we ran out of fields, rec1 was equal to rec2. */
+ /* This should never be reached. Internally, an index must
+ never contain duplicate entries. */
+ ut_ad(0);
return(0);
}
@@ -1327,6 +1358,7 @@ cmp_debug_dtuple_rec_with_match(
has an equal number or more fields than
dtuple */
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
+ ulint n_cmp, /*!< in: number of fields to compare */
ulint* matched_fields) /*!< in/out: number of already
completely matched fields; when function
returns, contains the value for current
@@ -1339,14 +1371,16 @@ cmp_debug_dtuple_rec_with_match(
field data */
ulint rec_f_len; /* length of current field in rec */
const byte* rec_f_data; /* pointer to the current rec field */
- int ret = 3333; /* return value */
+ int ret; /* return value */
ulint cur_field; /* current field number */
ut_ad(dtuple && rec && matched_fields);
ut_ad(dtuple_check_typed(dtuple));
ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(*matched_fields <= dtuple_get_n_fields_cmp(dtuple));
+ ut_ad(n_cmp > 0);
+ ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
+ ut_ad(*matched_fields <= n_cmp);
ut_ad(*matched_fields <= rec_offs_n_fields(offsets));
cur_field = *matched_fields;
@@ -1372,7 +1406,7 @@ cmp_debug_dtuple_rec_with_match(
/* Match fields in a loop; stop if we run out of fields in dtuple */
- while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
+ while (cur_field < n_cmp) {
ulint mtype;
ulint prtype;
diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc
index 5a864f122a3..3a5d2f579c3 100644
--- a/storage/innobase/rem/rem0rec.cc
+++ b/storage/innobase/rem/rem0rec.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,6 +29,7 @@ Created 5/30/1994 Heikki Tuuri
#include "rem0rec.ic"
#endif
+#include "page0page.h"
#include "mtr0mtr.h"
#include "mtr0log.h"
#include "fts0fts.h"
@@ -162,13 +163,12 @@ UNIV_INTERN
ulint
rec_get_n_extern_new(
/*=================*/
- const rec_t* rec, /*!< in: compact physical record */
- dict_index_t* index, /*!< in: record descriptor */
- ulint n) /*!< in: number of columns to scan */
+ const rec_t* rec, /*!< in: compact physical record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint n) /*!< in: number of columns to scan */
{
const byte* nulls;
const byte* lens;
- dict_field_t* field;
ulint null_mask;
ulint n_extern;
ulint i;
@@ -189,10 +189,13 @@ rec_get_n_extern_new(
/* read the lengths of fields 0..n */
do {
- ulint len;
+ const dict_field_t* field
+ = dict_index_get_nth_field(index, i);
+ const dict_col_t* col
+ = dict_field_get_col(field);
+ ulint len;
- field = dict_index_get_nth_field(index, i);
- if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
+ if (!(col->prtype & DATA_NOT_NULL)) {
/* nullable field => read the null flag */
if (UNIV_UNLIKELY(!(byte) null_mask)) {
@@ -210,8 +213,6 @@ rec_get_n_extern_new(
if (UNIV_UNLIKELY(!field->fixed_len)) {
/* Variable-length field: read the length */
- const dict_col_t* col
- = dict_field_get_col(field);
len = *lens--;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
@@ -240,16 +241,15 @@ rec_get_n_extern_new(
Determine the offset to each field in a leaf-page record
in ROW_FORMAT=COMPACT. This is a special case of
rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INTERN
+UNIV_INLINE __attribute__((nonnull))
void
rec_init_offsets_comp_ordinary(
/*===========================*/
const rec_t* rec, /*!< in: physical record in
ROW_FORMAT=COMPACT */
- ulint extra, /*!< in: number of bytes to reserve
- between the record header and
- the data payload
- (usually REC_N_NEW_EXTRA_BYTES) */
+ bool temp, /*!< in: whether to use the
+ format for temporary files in
+ index creation */
const dict_index_t* index, /*!< in: record descriptor */
ulint* offsets)/*!< in/out: array of offsets;
in: n=rec_offs_n_fields(offsets) */
@@ -257,28 +257,40 @@ rec_init_offsets_comp_ordinary(
ulint i = 0;
ulint offs = 0;
ulint any_ext = 0;
- const byte* nulls = rec - (extra + 1);
- const byte* lens = nulls
- - UT_BITS_IN_BYTES(index->n_nullable);
- dict_field_t* field;
+ ulint n_null = index->n_nullable;
+ const byte* nulls = temp
+ ? rec - 1
+ : rec - (1 + REC_N_NEW_EXTRA_BYTES);
+ const byte* lens = nulls - UT_BITS_IN_BYTES(n_null);
ulint null_mask = 1;
#ifdef UNIV_DEBUG
- /* We cannot invoke rec_offs_make_valid() here, because it can hold
- that extra != REC_N_NEW_EXTRA_BYTES. Similarly, rec_offs_validate()
- will fail in that case, because it invokes rec_get_status(). */
+ /* We cannot invoke rec_offs_make_valid() here if temp=true.
+ Similarly, rec_offs_validate() will fail in that case, because
+ it invokes rec_get_status(). */
offsets[2] = (ulint) rec;
offsets[3] = (ulint) index;
#endif /* UNIV_DEBUG */
+ ut_ad(temp || dict_table_is_comp(index->table));
+
+ if (temp && dict_table_is_comp(index->table)) {
+ /* No need to do adjust fixed_len=0. We only need to
+ adjust it for ROW_FORMAT=REDUNDANT. */
+ temp = false;
+ }
+
/* read the lengths of fields 0..n */
do {
- ulint len;
+ const dict_field_t* field
+ = dict_index_get_nth_field(index, i);
+ const dict_col_t* col
+ = dict_field_get_col(field);
+ ulint len;
- field = dict_index_get_nth_field(index, i);
- if (!(dict_field_get_col(field)->prtype
- & DATA_NOT_NULL)) {
+ if (!(col->prtype & DATA_NOT_NULL)) {
/* nullable field => read the null flag */
+ ut_ad(n_null--);
if (UNIV_UNLIKELY(!(byte) null_mask)) {
nulls--;
@@ -297,10 +309,9 @@ rec_init_offsets_comp_ordinary(
null_mask <<= 1;
}
- if (UNIV_UNLIKELY(!field->fixed_len)) {
+ if (!field->fixed_len
+ || (temp && !dict_col_get_fixed_size(col, temp))) {
/* Variable-length field: read the length */
- const dict_col_t* col
- = dict_field_get_col(field);
len = *lens--;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
@@ -394,9 +405,8 @@ rec_init_offsets(
= dict_index_get_n_unique_in_tree(index);
break;
case REC_STATUS_ORDINARY:
- rec_init_offsets_comp_ordinary(rec,
- REC_N_NEW_EXTRA_BYTES,
- index, offsets);
+ rec_init_offsets_comp_ordinary(
+ rec, false, index, offsets);
return;
}
@@ -774,34 +784,45 @@ rec_get_nth_field_offs_old(
/**********************************************************//**
Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
@return total size */
-UNIV_INTERN
+UNIV_INLINE __attribute__((warn_unused_result, nonnull(1,2)))
ulint
-rec_get_converted_size_comp_prefix(
-/*===============================*/
+rec_get_converted_size_comp_prefix_low(
+/*===================================*/
const dict_index_t* index, /*!< in: record descriptor;
dict_table_is_comp() is
assumed to hold, even if
it does not */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
+ ulint* extra, /*!< out: extra size */
+ bool temp) /*!< in: whether this is a
+ temporary file record */
{
ulint extra_size;
ulint data_size;
ulint i;
- ut_ad(index);
- ut_ad(fields);
+ ulint n_null = index->n_nullable;
ut_ad(n_fields > 0);
ut_ad(n_fields <= dict_index_get_n_fields(index));
+ ut_ad(!temp || extra);
- extra_size = REC_N_NEW_EXTRA_BYTES
- + UT_BITS_IN_BYTES(index->n_nullable);
+ extra_size = temp
+ ? UT_BITS_IN_BYTES(n_null)
+ : REC_N_NEW_EXTRA_BYTES
+ + UT_BITS_IN_BYTES(n_null);
data_size = 0;
+ if (temp && dict_table_is_comp(index->table)) {
+ /* No need to do adjust fixed_len=0. We only need to
+ adjust it for ROW_FORMAT=REDUNDANT. */
+ temp = false;
+ }
+
/* read the lengths of fields 0..n */
for (i = 0; i < n_fields; i++) {
const dict_field_t* field;
ulint len;
+ ulint fixed_len;
const dict_col_t* col;
field = dict_index_get_nth_field(index, i);
@@ -810,6 +831,8 @@ rec_get_converted_size_comp_prefix(
ut_ad(dict_col_type_assert_equal(col,
dfield_get_type(&fields[i])));
+ /* All NULLable fields must be included in the n_null count. */
+ ut_ad((col->prtype & DATA_NOT_NULL) || n_null--);
if (dfield_is_null(&fields[i])) {
/* No length is stored for NULL fields. */
@@ -820,6 +843,11 @@ rec_get_converted_size_comp_prefix(
ut_ad(len <= col->len || col->mtype == DATA_BLOB
|| (col->len == 0 && col->mtype == DATA_VARCHAR));
+ fixed_len = field->fixed_len;
+ if (temp && fixed_len
+ && !dict_col_get_fixed_size(col, temp)) {
+ fixed_len = 0;
+ }
/* If the maximum length of a variable-length field
is up to 255 bytes, the actual length is always stored
in one byte. If the maximum length is more than 255
@@ -827,11 +855,20 @@ rec_get_converted_size_comp_prefix(
0..127. The length will be encoded in two bytes when
it is 128 or more, or when the field is stored externally. */
- if (field->fixed_len) {
- ut_ad(len == field->fixed_len);
+ if (fixed_len) {
+#ifdef UNIV_DEBUG
+ ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
+ ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
+
+ ut_ad(len <= fixed_len);
+
+ ut_ad(!mbmaxlen || len >= mbminlen
+ * (fixed_len / mbmaxlen));
+
/* dict_index_add_col() should guarantee this */
ut_ad(!field->prefix_len
- || field->fixed_len == field->prefix_len);
+ || fixed_len == field->prefix_len);
+#endif /* UNIV_DEBUG */
} else if (dfield_is_ext(&fields[i])) {
ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
extra_size += 2;
@@ -848,7 +885,7 @@ rec_get_converted_size_comp_prefix(
data_size += len;
}
- if (UNIV_LIKELY_NULL(extra)) {
+ if (extra) {
*extra = extra_size;
}
@@ -856,6 +893,23 @@ rec_get_converted_size_comp_prefix(
}
/**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp_prefix(
+/*===============================*/
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint* extra) /*!< out: extra size */
+{
+ ut_ad(dict_table_is_comp(index->table));
+ return(rec_get_converted_size_comp_prefix_low(
+ index, fields, n_fields, extra, false));
+}
+
+/**********************************************************//**
Determines the size of a data tuple in ROW_FORMAT=COMPACT.
@return total size */
UNIV_INTERN
@@ -872,8 +926,6 @@ rec_get_converted_size_comp(
ulint* extra) /*!< out: extra size */
{
ulint size;
- ut_ad(index);
- ut_ad(fields);
ut_ad(n_fields > 0);
switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
@@ -899,8 +951,8 @@ rec_get_converted_size_comp(
return(ULINT_UNDEFINED);
}
- return(size + rec_get_converted_size_comp_prefix(index, fields,
- n_fields, extra));
+ return(size + rec_get_converted_size_comp_prefix_low(
+ index, fields, n_fields, extra, false));
}
/***********************************************************//**
@@ -1077,19 +1129,18 @@ rec_convert_dtuple_to_rec_old(
/*********************************************************//**
Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INTERN
+UNIV_INLINE __attribute__((nonnull))
void
rec_convert_dtuple_to_rec_comp(
/*===========================*/
rec_t* rec, /*!< in: origin of record */
- ulint extra, /*!< in: number of bytes to
- reserve between the record
- header and the data payload
- (normally REC_N_NEW_EXTRA_BYTES) */
const dict_index_t* index, /*!< in: record descriptor */
- ulint status, /*!< in: status bits of the record */
const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields)/*!< in: number of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint status, /*!< in: status bits of the record */
+ bool temp) /*!< in: whether to use the
+ format for temporary files in
+ index creation */
{
const dfield_t* field;
const dtype_t* type;
@@ -1101,32 +1152,48 @@ rec_convert_dtuple_to_rec_comp(
ulint n_node_ptr_field;
ulint fixed_len;
ulint null_mask = 1;
- ut_ad(extra == 0 || dict_table_is_comp(index->table));
- ut_ad(extra == 0 || extra == REC_N_NEW_EXTRA_BYTES);
+ ulint n_null;
+
+ ut_ad(temp || dict_table_is_comp(index->table));
ut_ad(n_fields > 0);
- switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
- case REC_STATUS_ORDINARY:
+ if (temp) {
+ ut_ad(status == REC_STATUS_ORDINARY);
ut_ad(n_fields <= dict_index_get_n_fields(index));
n_node_ptr_field = ULINT_UNDEFINED;
- break;
- case REC_STATUS_NODE_PTR:
- ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1);
- n_node_ptr_field = n_fields - 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- ut_ad(n_fields == 1);
- n_node_ptr_field = ULINT_UNDEFINED;
- break;
- default:
- ut_error;
- return;
+ nulls = rec - 1;
+ if (dict_table_is_comp(index->table)) {
+ /* No need to do adjust fixed_len=0. We only
+ need to adjust it for ROW_FORMAT=REDUNDANT. */
+ temp = false;
+ }
+ } else {
+ nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
+
+ switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
+ case REC_STATUS_ORDINARY:
+ ut_ad(n_fields <= dict_index_get_n_fields(index));
+ n_node_ptr_field = ULINT_UNDEFINED;
+ break;
+ case REC_STATUS_NODE_PTR:
+ ut_ad(n_fields
+ == dict_index_get_n_unique_in_tree(index) + 1);
+ n_node_ptr_field = n_fields - 1;
+ break;
+ case REC_STATUS_INFIMUM:
+ case REC_STATUS_SUPREMUM:
+ ut_ad(n_fields == 1);
+ n_node_ptr_field = ULINT_UNDEFINED;
+ break;
+ default:
+ ut_error;
+ return;
+ }
}
end = rec;
- nulls = rec - (extra + 1);
- lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
+ n_null = index->n_nullable;
+ lens = nulls - UT_BITS_IN_BYTES(n_null);
/* clear the SQL-null flags */
memset(lens + 1, 0, nulls - lens);
@@ -1148,7 +1215,7 @@ rec_convert_dtuple_to_rec_comp(
if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
/* nullable field */
- ut_ad(index->n_nullable > 0);
+ ut_ad(n_null--);
if (UNIV_UNLIKELY(!(byte) null_mask)) {
nulls--;
@@ -1171,6 +1238,10 @@ rec_convert_dtuple_to_rec_comp(
ifield = dict_index_get_nth_field(index, i);
fixed_len = ifield->fixed_len;
+ if (temp && fixed_len
+ && !dict_col_get_fixed_size(ifield->col, temp)) {
+ fixed_len = 0;
+ }
/* If the maximum length of a variable-length field
is up to 255 bytes, the actual length is always stored
in one byte. If the maximum length is more than 255
@@ -1178,8 +1249,17 @@ rec_convert_dtuple_to_rec_comp(
0..127. The length will be encoded in two bytes when
it is 128 or more, or when the field is stored externally. */
if (fixed_len) {
- ut_ad(len == fixed_len);
+#ifdef UNIV_DEBUG
+ ulint mbminlen = DATA_MBMINLEN(
+ ifield->col->mbminmaxlen);
+ ulint mbmaxlen = DATA_MBMAXLEN(
+ ifield->col->mbminmaxlen);
+
+ ut_ad(len <= fixed_len);
+ ut_ad(!mbmaxlen || len >= mbminlen
+ * (fixed_len / mbmaxlen));
ut_ad(!dfield_is_ext(field));
+#endif /* UNIV_DEBUG */
} else if (dfield_is_ext(field)) {
ut_ad(ifield->col->len >= 256
|| ifield->col->mtype == DATA_BLOB);
@@ -1227,14 +1307,12 @@ rec_convert_dtuple_to_rec_new(
rec_t* rec;
status = dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK;
- rec_get_converted_size_comp(index, status,
- dtuple->fields, dtuple->n_fields,
- &extra_size);
+ rec_get_converted_size_comp(
+ index, status, dtuple->fields, dtuple->n_fields, &extra_size);
rec = buf + extra_size;
rec_convert_dtuple_to_rec_comp(
- rec, REC_N_NEW_EXTRA_BYTES, index, status,
- dtuple->fields, dtuple->n_fields);
+ rec, index, dtuple->fields, dtuple->n_fields, status, false);
/* Set the info bits of the record */
rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
@@ -1296,6 +1374,54 @@ rec_convert_dtuple_to_rec(
return(rec);
}
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_temp(
+/*========================*/
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint* extra) /*!< out: extra size */
+{
+ return(rec_get_converted_size_comp_prefix_low(
+ index, fields, n_fields, extra, true));
+}
+
+/******************************************************//**
+Determine the offset to each field in temporary file.
+@see rec_convert_dtuple_to_temp() */
+UNIV_INTERN
+void
+rec_init_offsets_temp(
+/*==================*/
+ const rec_t* rec, /*!< in: temporary file record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets)/*!< in/out: array of offsets;
+ in: n=rec_offs_n_fields(offsets) */
+{
+ rec_init_offsets_comp_ordinary(rec, true, index, offsets);
+}
+
+/*********************************************************//**
+Builds a temporary file record out of a data tuple.
+@see rec_init_offsets_temp() */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_temp(
+/*=======================*/
+ rec_t* rec, /*!< out: record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields) /*!< in: number of fields */
+{
+ rec_convert_dtuple_to_rec_comp(rec, index, fields, n_fields,
+ REC_STATUS_ORDINARY, true);
+}
+
/**************************************************************//**
Copies the first n fields of a physical record to a data tuple. The fields
are copied to the memory heap. */
@@ -1506,6 +1632,7 @@ rec_copy_prefix_to_buf(
return(*buf + (rec - (lens + 1)));
}
+#endif /* UNIV_HOTBACKUP */
/***************************************************************//**
Validates the consistency of an old-style physical record.
@@ -1782,4 +1909,47 @@ rec_print(
}
}
}
+
+# ifdef UNIV_DEBUG
+/************************************************************//**
+Reads the DB_TRX_ID of a clustered index record.
+@return the value of DB_TRX_ID */
+UNIV_INTERN
+trx_id_t
+rec_get_trx_id(
+/*===========*/
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index) /*!< in: clustered index */
+{
+ const page_t* page
+ = page_align(rec);
+ ulint trx_id_col
+ = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+ const byte* trx_id;
+ ulint len;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ rec_offs_init(offsets_);
+
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+ ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
+ == index->id);
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(trx_id_col > 0);
+ ut_ad(trx_id_col != ULINT_UNDEFINED);
+
+ offsets = rec_get_offsets(rec, index, offsets, trx_id_col + 1, &heap);
+
+ trx_id = rec_get_nth_field(rec, offsets, trx_id_col, &len);
+
+ ut_ad(len == DATA_TRX_ID_LEN);
+
+ if (heap) {
+ mem_heap_free(heap);
+ }
+
+ return(trx_read_trx_id(trx_id));
+}
+# endif /* UNIV_DEBUG */
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/row/row0ext.cc b/storage/innobase/row/row0ext.cc
index 8d4da9f034b..f084fa09c5a 100644
--- a/storage/innobase/row/row0ext.cc
+++ b/storage/innobase/row/row0ext.cc
@@ -95,6 +95,8 @@ row_ext_create(
row_ext_t* ret;
+ ut_ad(n_ext > 0);
+
ret = static_cast<row_ext_t*>(
mem_heap_alloc(heap,
(sizeof *ret) + (n_ext - 1) * sizeof ret->len));
diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc
index 50b681361d8..9a6af50e09d 100644
--- a/storage/innobase/row/row0ftsort.cc
+++ b/storage/innobase/row/row0ftsort.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -23,6 +23,7 @@ Create Full Text Index with (parallel) merge sort
Created 10/13/2010 Jimmy Yang
*******************************************************/
+#include "dict0dict.h" /* dict_table_stats_lock() */
#include "row0merge.h"
#include "pars0pars.h"
#include "row0ftsort.h"
@@ -47,9 +48,6 @@ Created 10/13/2010 Jimmy Yang
/** Parallel sort degree */
UNIV_INTERN ulong fts_sort_pll_degree = 2;
-/** Parallel sort buffer size */
-UNIV_INTERN ulong srv_sort_buf_size = 1048576;
-
/*********************************************************************//**
Create a temporary "fts sort index" used to merge sort the
tokenized doc string. The index has three "fields":
@@ -124,7 +122,7 @@ row_merge_create_fts_sort_index(
if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
/* If Doc ID column is being added by this create
index, then just check the number of rows in the table */
- if (table->stat_n_rows < MAX_DOC_ID_OPT_VAL) {
+ if (dict_table_get_n_rows(table) < MAX_DOC_ID_OPT_VAL) {
*opt_doc_id_size = TRUE;
}
} else {
@@ -173,10 +171,10 @@ ibool
row_fts_psort_info_init(
/*====================*/
trx_t* trx, /*!< in: transaction */
- struct TABLE* table, /*!< in: MySQL table object */
+ row_merge_dup_t* dup, /*!< in,own: descriptor of
+ FTS index being created */
const dict_table_t* new_table,/*!< in: table on which indexes are
created */
- dict_index_t* index, /*!< in: FTS index to be created */
ibool opt_doc_id_size,
/*!< in: whether to use 4 bytes
instead of 8 bytes integer to
@@ -192,7 +190,6 @@ row_fts_psort_info_init(
fts_psort_t* psort_info = NULL;
fts_psort_t* merge_info = NULL;
ulint block_size;
- os_event_t sort_event;
ibool ret = TRUE;
block_size = 3 * srv_sort_buf_size;
@@ -201,28 +198,28 @@ row_fts_psort_info_init(
fts_sort_pll_degree * sizeof *psort_info));
if (!psort_info) {
- return FALSE;
+ ut_free(dup);
+ return(FALSE);
}
- sort_event = os_event_create(NULL);
-
/* Common Info for all sort threads */
common_info = static_cast<fts_psort_common_t*>(
mem_alloc(sizeof *common_info));
- common_info->table = table;
+ if (!common_info) {
+ ut_free(dup);
+ mem_free(psort_info);
+ return(FALSE);
+ }
+
+ common_info->dup = dup;
common_info->new_table = (dict_table_t*) new_table;
common_info->trx = trx;
- common_info->sort_index = index;
common_info->all_info = psort_info;
- common_info->sort_event = sort_event;
+ common_info->sort_event = os_event_create();
+ common_info->merge_event = os_event_create();
common_info->opt_doc_id_size = opt_doc_id_size;
- if (!common_info) {
- mem_free(psort_info);
- return FALSE;
- }
-
/* There will be FTS_NUM_AUX_INDEX number of "sort buckets" for
each parallel sort thread. Each "sort bucket" holds records for
a particular "FTS index partition" */
@@ -242,9 +239,12 @@ row_fts_psort_info_init(
}
psort_info[j].merge_buf[i] = row_merge_buf_create(
- index);
+ dup->index);
- row_merge_file_create(psort_info[j].merge_file[i]);
+ if (row_merge_file_create(psort_info[j].merge_file[i])
+ < 0) {
+ goto func_exit;
+ }
/* Need to align memory for O_DIRECT write */
psort_info[j].block_alloc[i] =
@@ -314,6 +314,9 @@ row_fts_psort_info_destroy(
}
}
+ os_event_free(merge_info[0].psort_common->sort_event);
+ os_event_free(merge_info[0].psort_common->merge_event);
+ ut_free(merge_info[0].psort_common->dup);
mem_free(merge_info[0].psort_common);
mem_free(psort_info);
}
@@ -433,12 +436,11 @@ row_merge_fts_doc_tokenize(
ut_a(t_ctx->buf_used < FTS_NUM_AUX_INDEX);
idx = t_ctx->buf_used;
- buf->tuples[buf->n_tuples + n_tuple[idx]] = field =
- static_cast<dfield_t*>(mem_heap_alloc(
- buf->heap,
- FTS_NUM_FIELDS_SORT * sizeof *field));
+ mtuple_t* mtuple = &buf->tuples[buf->n_tuples + n_tuple[idx]];
- ut_a(field);
+ field = mtuple->fields = static_cast<dfield_t*>(
+ mem_heap_alloc(buf->heap,
+ FTS_NUM_FIELDS_SORT * sizeof *field));
/* The first field is the tokenized word */
dfield_set_data(field, t_str.f_str, t_str.f_len);
@@ -522,6 +524,10 @@ row_merge_fts_doc_tokenize(
/* Update the data length and the number of new word tuples
added in this round of tokenization */
for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
+ /* The computation of total_size below assumes that no
+ delete-mark flags will be stored and that all fields
+ are NOT NULL and fixed-length. */
+
sort_buf[i]->total_size += data_size[i];
sort_buf[i]->n_tuples += n_tuple[i];
@@ -560,7 +566,7 @@ fts_parallel_tokenization(
ulint mycount[FTS_NUM_AUX_INDEX];
ib_uint64_t total_rec = 0;
ulint num_doc_processed = 0;
- doc_id_t last_doc_id;
+ doc_id_t last_doc_id = 0;
ulint zip_size;
mem_heap_t* blob_heap = NULL;
fts_doc_t doc;
@@ -581,10 +587,10 @@ fts_parallel_tokenization(
memset(mycount, 0, FTS_NUM_AUX_INDEX * sizeof(int));
doc.charset = fts_index_get_charset(
- psort_info->psort_common->sort_index);
+ psort_info->psort_common->dup->index);
idx_field = dict_index_get_nth_field(
- psort_info->psort_common->sort_index, 0);
+ psort_info->psort_common->dup->index, 0);
word_dtype.prtype = idx_field->col->prtype;
word_dtype.mbminmaxlen = idx_field->col->mbminmaxlen;
word_dtype.mtype = (strcmp(doc.charset->name, "latin1_swedish_ci") == 0)
@@ -742,7 +748,12 @@ loop:
}
if (doc_item) {
- prev_doc_item = doc_item;
+ prev_doc_item = doc_item;
+
+ if (last_doc_id != doc_item->doc_id) {
+ t_ctx.init_pos = 0;
+ }
+
retried = 0;
} else if (psort_info->state == FTS_PARENT_COMPLETE) {
retried++;
@@ -751,16 +762,51 @@ loop:
goto loop;
exit:
+ /* Do a final sort of the last (or latest) batch of records
+ in block memory. Flush them to temp file if records cannot
+ be hold in one block memory */
for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
if (t_ctx.rows_added[i]) {
row_merge_buf_sort(buf[i], NULL);
row_merge_buf_write(
- buf[i], (const merge_file_t*) merge_file[i],
- block[i]);
- row_merge_write(merge_file[i]->fd,
- merge_file[i]->offset++, block[i]);
+ buf[i], merge_file[i], block[i]);
+
+ /* Write to temp file, only if records have
+ been flushed to temp file before (offset > 0):
+ The pseudo code for sort is following:
+
+ while (there are rows) {
+ tokenize rows, put result in block[]
+ if (block[] runs out) {
+ sort rows;
+ write to temp file with
+ row_merge_write();
+ offset++;
+ }
+ }
+
+ # write out the last batch
+ if (offset > 0) {
+ row_merge_write();
+ offset++;
+ } else {
+ # no need to write anything
+ offset stay as 0
+ }
+
+ so if merge_file[i]->offset is 0 when we come to
+ here as the last batch, this means rows have
+ never flush to temp file, it can be held all in
+ memory */
+ if (merge_file[i]->offset != 0) {
+ row_merge_write(merge_file[i]->fd,
+ merge_file[i]->offset++,
+ block[i]);
+
+ UNIV_MEM_INVALID(block[i][0],
+ srv_sort_buf_size);
+ }
- UNIV_MEM_INVALID(block[i][0], srv_sort_buf_size);
buf[i] = row_merge_buf_empty(buf[i]);
t_ctx.rows_added[i] = 0;
}
@@ -776,16 +822,19 @@ exit:
continue;
}
- tmpfd[i] = innobase_mysql_tmpfile();
+ tmpfd[i] = row_merge_file_create_low();
+ if (tmpfd[i] < 0) {
+ goto func_exit;
+ }
+
row_merge_sort(psort_info->psort_common->trx,
- psort_info->psort_common->sort_index,
- merge_file[i],
- (row_merge_block_t*) block[i], &tmpfd[i],
- psort_info->psort_common->table);
+ psort_info->psort_common->dup,
+ merge_file[i], block[i], &tmpfd[i]);
total_rec += merge_file[i]->n_rec;
close(tmpfd[i]);
}
+func_exit:
if (fts_enable_diag_print) {
DEBUG_FTS_SORT_PRINT(" InnoDB_FTS: complete merge sort\n");
}
@@ -794,8 +843,14 @@ exit:
psort_info->child_status = FTS_CHILD_COMPLETE;
os_event_set(psort_info->psort_common->sort_event);
+ psort_info->child_status = FTS_CHILD_EXITING;
+
+#ifdef __WIN__
+ CloseHandle(psort_info->thread_hdl);
+#endif /*__WIN__ */
os_thread_exit(NULL);
+
OS_THREAD_DUMMY_RETURN;
}
@@ -812,8 +867,9 @@ row_fts_start_psort(
for (i = 0; i < fts_sort_pll_degree; i++) {
psort_info[i].psort_id = i;
- os_thread_create(fts_parallel_tokenization,
- (void*) &psort_info[i], &thd_id);
+ psort_info[i].thread_hdl = os_thread_create(
+ fts_parallel_tokenization,
+ (void*) &psort_info[i], &thd_id);
}
}
@@ -833,14 +889,20 @@ fts_parallel_merge(
id = psort_info->psort_id;
- row_fts_merge_insert(psort_info->psort_common->sort_index,
+ row_fts_merge_insert(psort_info->psort_common->dup->index,
psort_info->psort_common->new_table,
psort_info->psort_common->all_info, id);
psort_info->child_status = FTS_CHILD_COMPLETE;
- os_event_set(psort_info->psort_common->sort_event);
+ os_event_set(psort_info->psort_common->merge_event);
+ psort_info->child_status = FTS_CHILD_EXITING;
+
+#ifdef __WIN__
+ CloseHandle(psort_info->thread_hdl);
+#endif /*__WIN__ */
os_thread_exit(NULL);
+
OS_THREAD_DUMMY_RETURN;
}
@@ -860,16 +922,16 @@ row_fts_start_parallel_merge(
merge_info[i].psort_id = i;
merge_info[i].child_status = 0;
- os_thread_create(fts_parallel_merge,
- (void*) &merge_info[i], &thd_id);
+ merge_info[i].thread_hdl = os_thread_create(
+ fts_parallel_merge, (void*) &merge_info[i], &thd_id);
}
}
/********************************************************************//**
Insert processed FTS data to auxillary index tables.
@return DB_SUCCESS if insertion runs fine */
-UNIV_INTERN
-ulint
+static __attribute__((nonnull))
+dberr_t
row_merge_write_fts_word(
/*=====================*/
trx_t* trx, /*!< in: transaction */
@@ -880,15 +942,15 @@ row_merge_write_fts_word(
CHARSET_INFO* charset) /*!< in: charset */
{
ulint selected;
- ulint ret = DB_SUCCESS;
+ dberr_t ret = DB_SUCCESS;
selected = fts_select_index(
charset, word->text.f_str, word->text.f_len);
fts_table->suffix = fts_get_suffix(selected);
/* Pop out each fts_node in word->nodes write them to auxiliary table */
- while(ib_vector_size(word->nodes) > 0) {
- ulint error;
+ while (ib_vector_size(word->nodes) > 0) {
+ dberr_t error;
fts_node_t* fts_node;
fts_node = static_cast<fts_node_t*>(ib_vector_pop(word->nodes));
@@ -900,8 +962,8 @@ row_merge_write_fts_word(
if (error != DB_SUCCESS) {
fprintf(stderr, "InnoDB: failed to write"
" word %s to FTS auxiliary index"
- " table, error (%lu) \n",
- word->text.f_str, error);
+ " table, error (%s) \n",
+ word->text.f_str, ut_strerr(error));
ret = error;
}
@@ -1064,7 +1126,6 @@ row_fts_sel_tree_propagate(
int child_left;
int child_right;
int selected;
- ibool null_eq = FALSE;
/* Find which parent this value will be propagated to */
parent = (propogated - 1) / 2;
@@ -1083,10 +1144,10 @@ row_fts_sel_tree_propagate(
} else if (child_right == -1
|| mrec[child_right] == NULL) {
selected = child_left;
- } else if (row_merge_cmp(mrec[child_left], mrec[child_right],
- offsets[child_left],
- offsets[child_right],
- index, &null_eq) < 0) {
+ } else if (cmp_rec_rec_simple(mrec[child_left], mrec[child_right],
+ offsets[child_left],
+ offsets[child_right],
+ index, NULL) < 0) {
selected = child_left;
} else {
selected = child_right;
@@ -1143,8 +1204,6 @@ row_fts_build_sel_tree_level(
num_item = (1 << level);
for (i = 0; i < num_item; i++) {
- ibool null_eq = FALSE;
-
child_left = sel_tree[(start + i) * 2 + 1];
child_right = sel_tree[(start + i) * 2 + 2];
@@ -1174,14 +1233,12 @@ row_fts_build_sel_tree_level(
}
/* Select the smaller one to set parent pointer */
- if (row_merge_cmp(mrec[child_left], mrec[child_right],
- offsets[child_left],
- offsets[child_right],
- index, &null_eq) < 0) {
- sel_tree[start + i] = child_left;
- } else {
- sel_tree[start + i] = child_right;
- }
+ int cmp = cmp_rec_rec_simple(
+ mrec[child_left], mrec[child_right],
+ offsets[child_left], offsets[child_right],
+ index, NULL);
+
+ sel_tree[start + i] = cmp < 0 ? child_left : child_right;
}
}
@@ -1231,7 +1288,7 @@ Read sorted file containing index data tuples and insert these data
tuples to the index
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
row_fts_merge_insert(
/*=================*/
dict_index_t* index, /*!< in: index */
@@ -1243,7 +1300,7 @@ row_fts_merge_insert(
const byte** b;
mem_heap_t* tuple_heap;
mem_heap_t* heap;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
ulint* foffs;
ulint** offsets;
fts_tokenizer_word_t new_word;
@@ -1317,7 +1374,7 @@ row_fts_merge_insert(
count_diag += (int) psort_info[i].merge_file[id]->n_rec;
}
- if (fts_enable_diag_print) {
+ if (fts_enable_diag_print) {
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB_FTS: to inserted %lu records\n",
(ulong) count_diag);
@@ -1349,8 +1406,13 @@ row_fts_merge_insert(
/* No Rows to read */
mrec[i] = b[i] = NULL;
} else {
- if (!row_merge_read(fd[i], foffs[i],
- (row_merge_block_t*) block[i])) {
+ /* Read from temp file only if it has been
+ written to. Otherwise, block memory holds
+ all the sorted records */
+ if (psort_info[i].merge_file[id]->offset > 0
+ && (!row_merge_read(
+ fd[i], foffs[i],
+ (row_merge_block_t*) block[i]))) {
error = DB_CORRUPTION;
goto exit;
}
@@ -1386,14 +1448,14 @@ row_fts_merge_insert(
}
for (i = min_rec + 1; i < fts_sort_pll_degree; i++) {
- ibool null_eq = FALSE;
if (!mrec[i]) {
continue;
}
- if (row_merge_cmp(mrec[i], mrec[min_rec],
- offsets[i], offsets[min_rec],
- index, &null_eq) < 0) {
+ if (cmp_rec_rec_simple(
+ mrec[i], mrec[min_rec],
+ offsets[i], offsets[min_rec],
+ index, NULL) < 0) {
min_rec = i;
}
}
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
new file mode 100644
index 00000000000..f5eb31191a5
--- /dev/null
+++ b/storage/innobase/row/row0import.cc
@@ -0,0 +1,3806 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0import.cc
+Import a tablespace to a running instance.
+
+Created 2012-02-08 by Sunny Bains.
+*******************************************************/
+
+#include "row0import.h"
+
+#ifdef UNIV_NONINL
+#include "row0import.ic"
+#endif
+
+#include "btr0pcur.h"
+#include "que0que.h"
+#include "dict0boot.h"
+#include "ibuf0ibuf.h"
+#include "pars0pars.h"
+#include "row0upd.h"
+#include "row0sel.h"
+#include "row0mysql.h"
+#include "srv0start.h"
+#include "row0quiesce.h"
+
+#include <vector>
+
+/** The size of the buffer to use for IO. Note: os_file_read() doesn't expect
+reads to fail. If you set the buffer size to be greater than a multiple of the
+file size then it will assert. TODO: Fix this limitation of the IO functions.
+@param n - page size of the tablespace.
+@retval number of pages */
+#define IO_BUFFER_SIZE(n) ((1024 * 1024) / n)
+
+/** For gathering stats on records during phase I */
+struct row_stats_t {
+ ulint m_n_deleted; /*!< Number of deleted records
+ found in the index */
+
+ ulint m_n_purged; /*!< Number of records purged
+ optimisatically */
+
+ ulint m_n_rows; /*!< Number of rows */
+
+ ulint m_n_purge_failed; /*!< Number of deleted rows
+ that could not be purged */
+};
+
+/** Index information required by IMPORT. */
+struct row_index_t {
+ index_id_t m_id; /*!< Index id of the table
+ in the exporting server */
+ byte* m_name; /*!< Index name */
+
+ ulint m_space; /*!< Space where it is placed */
+
+ ulint m_page_no; /*!< Root page number */
+
+ ulint m_type; /*!< Index type */
+
+ ulint m_trx_id_offset; /*!< Relevant only for clustered
+ indexes, offset of transaction
+ id system column */
+
+ ulint m_n_user_defined_cols; /*!< User defined columns */
+
+ ulint m_n_uniq; /*!< Number of columns that can
+ uniquely identify the row */
+
+ ulint m_n_nullable; /*!< Number of nullable
+ columns */
+
+ ulint m_n_fields; /*!< Total number of fields */
+
+ dict_field_t* m_fields; /*!< Index fields */
+
+ const dict_index_t*
+ m_srv_index; /*!< Index instance in the
+ importing server */
+
+ row_stats_t m_stats; /*!< Statistics gathered during
+ the import phase */
+
+};
+
+/** Meta data required by IMPORT. */
+struct row_import {
+ row_import() UNIV_NOTHROW
+ :
+ m_table(),
+ m_version(),
+ m_hostname(),
+ m_table_name(),
+ m_autoinc(),
+ m_page_size(),
+ m_flags(),
+ m_n_cols(),
+ m_cols(),
+ m_col_names(),
+ m_n_indexes(),
+ m_indexes(),
+ m_missing(true) { }
+
+ ~row_import() UNIV_NOTHROW;
+
+ /**
+ Find the index entry in in the indexes array.
+ @param name - index name
+ @return instance if found else 0. */
+ row_index_t* get_index(const char* name) const UNIV_NOTHROW;
+
+ /**
+ Get the number of rows in the index.
+ @param name - index name
+ @return number of rows (doesn't include delete marked rows). */
+ ulint get_n_rows(const char* name) const UNIV_NOTHROW;
+
+ /**
+ Find the ordinal value of the column name in the cfg table columns.
+ @param name - of column to look for.
+ @return ULINT_UNDEFINED if not found. */
+ ulint find_col(const char* name) const UNIV_NOTHROW;
+
+ /**
+ Find the index field entry in in the cfg indexes fields.
+ @name - of the index to look for
+ @return instance if found else 0. */
+ const dict_field_t* find_field(
+ const row_index_t* cfg_index,
+ const char* name) const UNIV_NOTHROW;
+
+ /**
+ Get the number of rows for which purge failed during the convert phase.
+ @param name - index name
+ @return number of rows for which purge failed. */
+ ulint get_n_purge_failed(const char* name) const UNIV_NOTHROW;
+
+ /**
+ Check if the index is clean. ie. no delete-marked records
+ @param name - index name
+ @return true if index needs to be purged. */
+ bool requires_purge(const char* name) const UNIV_NOTHROW
+ {
+ return(get_n_purge_failed(name) > 0);
+ }
+
+ /**
+ Set the index root <space, pageno> using the index name */
+ void set_root_by_name() UNIV_NOTHROW;
+
+ /**
+ Set the index root <space, pageno> using a heuristic
+ @return DB_SUCCESS or error code */
+ dberr_t set_root_by_heuristic() UNIV_NOTHROW;
+
+ /** Check if the index schema that was read from the .cfg file
+ matches the in memory index definition.
+ Note: It will update row_import_t::m_srv_index to map the meta-data
+ read from the .cfg file to the server index instance.
+ @return DB_SUCCESS or error code. */
+ dberr_t match_index_columns(
+ THD* thd,
+ const dict_index_t* index) UNIV_NOTHROW;
+
+ /**
+ Check if the table schema that was read from the .cfg file matches the
+ in memory table definition.
+ @param thd - MySQL session variable
+ @return DB_SUCCESS or error code. */
+ dberr_t match_table_columns(
+ THD* thd) UNIV_NOTHROW;
+
+ /**
+ Check if the table (and index) schema that was read from the .cfg file
+ matches the in memory table definition.
+ @param thd - MySQL session variable
+ @return DB_SUCCESS or error code. */
+ dberr_t match_schema(
+ THD* thd) UNIV_NOTHROW;
+
+ dict_table_t* m_table; /*!< Table instance */
+
+ ulint m_version; /*!< Version of config file */
+
+ byte* m_hostname; /*!< Hostname where the
+ tablespace was exported */
+ byte* m_table_name; /*!< Exporting instance table
+ name */
+
+ ib_uint64_t m_autoinc; /*!< Next autoinc value */
+
+ ulint m_page_size; /*!< Tablespace page size */
+
+ ulint m_flags; /*!< Table flags */
+
+ ulint m_n_cols; /*!< Number of columns in the
+ meta-data file */
+
+ dict_col_t* m_cols; /*!< Column data */
+
+ byte** m_col_names; /*!< Column names, we store the
+ column naems separately becuase
+ there is no field to store the
+ value in dict_col_t */
+
+ ulint m_n_indexes; /*!< Number of indexes,
+ including clustered index */
+
+ row_index_t* m_indexes; /*!< Index meta data */
+
+ bool m_missing; /*!< true if a .cfg file was
+ found and was readable */
+};
+
+/** Use the page cursor to iterate over records in a block. */
+class RecIterator {
+public:
+ /**
+ Default constructor */
+ RecIterator() UNIV_NOTHROW
+ {
+ memset(&m_cur, 0x0, sizeof(m_cur));
+ }
+
+ /**
+ Position the cursor on the first user record. */
+ void open(buf_block_t* block) UNIV_NOTHROW
+ {
+ page_cur_set_before_first(block, &m_cur);
+
+ if (!end()) {
+ next();
+ }
+ }
+
+ /**
+ Move to the next record. */
+ void next() UNIV_NOTHROW
+ {
+ page_cur_move_to_next(&m_cur);
+ }
+
+ /**
+ @return the current record */
+ rec_t* current() UNIV_NOTHROW
+ {
+ ut_ad(!end());
+ return(page_cur_get_rec(&m_cur));
+ }
+
+ /**
+ @return true if cursor is at the end */
+ bool end() UNIV_NOTHROW
+ {
+ return(page_cur_is_after_last(&m_cur) == TRUE);
+ }
+
+ /** Remove the current record
+ @return true on success */
+ bool remove(
+ const dict_index_t* index,
+ page_zip_des_t* page_zip,
+ ulint* offsets) UNIV_NOTHROW
+ {
+ /* We can't end up with an empty page unless it is root. */
+ if (page_get_n_recs(m_cur.block->frame) <= 1) {
+ return(false);
+ }
+
+ return(page_delete_rec(index, &m_cur, page_zip, offsets));
+ }
+
+private:
+ page_cur_t m_cur;
+};
+
+/** Class that purges delete marked reocords from indexes, both secondary
+and cluster. It does a pessimistic delete. This should only be done if we
+couldn't purge the delete marked reocrds during Phase I. */
+class IndexPurge {
+public:
+ /** Constructor
+ @param trx - the user transaction covering the import tablespace
+ @param index - to be imported
+ @param space_id - space id of the tablespace */
+ IndexPurge(
+ trx_t* trx,
+ dict_index_t* index) UNIV_NOTHROW
+ :
+ m_trx(trx),
+ m_index(index),
+ m_n_rows(0)
+ {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Phase II - Purge records from index %s",
+ index->name);
+ }
+
+ /** Descructor */
+ ~IndexPurge() UNIV_NOTHROW { }
+
+ /** Purge delete marked records.
+ @return DB_SUCCESS or error code. */
+ dberr_t garbage_collect() UNIV_NOTHROW;
+
+ /** The number of records that are not delete marked.
+ @return total records in the index after purge */
+ ulint get_n_rows() const UNIV_NOTHROW
+ {
+ return(m_n_rows);
+ }
+
+private:
+ /**
+ Begin import, position the cursor on the first record. */
+ void open() UNIV_NOTHROW;
+
+ /**
+ Close the persistent curosr and commit the mini-transaction. */
+ void close() UNIV_NOTHROW;
+
+ /**
+ Position the cursor on the next record.
+ @return DB_SUCCESS or error code */
+ dberr_t next() UNIV_NOTHROW;
+
+ /**
+ Store the persistent cursor position and reopen the
+ B-tree cursor in BTR_MODIFY_TREE mode, because the
+ tree structure may be changed during a pessimistic delete. */
+ void purge_pessimistic_delete() UNIV_NOTHROW;
+
+ /**
+ Purge delete-marked records.
+ @param offsets - current row offsets. */
+ void purge() UNIV_NOTHROW;
+
+protected:
+ // Disable copying
+ IndexPurge();
+ IndexPurge(const IndexPurge&);
+ IndexPurge &operator=(const IndexPurge&);
+
+private:
+ trx_t* m_trx; /*!< User transaction */
+ mtr_t m_mtr; /*!< Mini-transaction */
+ btr_pcur_t m_pcur; /*!< Persistent cursor */
+ dict_index_t* m_index; /*!< Index to be processed */
+ ulint m_n_rows; /*!< Records in index */
+};
+
+/** Functor that is called for each physical page that is read from the
+tablespace file. */
+class AbstractCallback : public PageCallback {
+public:
+ /** Constructor
+ @param trx - covering transaction */
+ AbstractCallback(trx_t* trx)
+ :
+ m_trx(trx),
+ m_space(ULINT_UNDEFINED),
+ m_xdes(),
+ m_xdes_page_no(ULINT_UNDEFINED),
+ m_space_flags(ULINT_UNDEFINED),
+ m_table_flags(ULINT_UNDEFINED) UNIV_NOTHROW { }
+
+ /**
+ Free any extent descriptor instance */
+ virtual ~AbstractCallback()
+ {
+ delete [] m_xdes;
+ }
+
+ /** Determine the page size to use for traversing the tablespace
+ @param file_size - size of the tablespace file in bytes
+ @param block - contents of the first page in the tablespace file.
+ @retval DB_SUCCESS or error code. */
+ virtual dberr_t init(
+ os_offset_t file_size,
+ const buf_block_t* block) UNIV_NOTHROW;
+
+ /** @return true if compressed table. */
+ bool is_compressed_table() const UNIV_NOTHROW
+ {
+ return(get_zip_size() > 0);
+ }
+
+protected:
+ /**
+ Get the data page depending on the table type, compressed or not.
+ @param block - block read from disk
+ @retval the buffer frame */
+ buf_frame_t* get_frame(buf_block_t* block) const UNIV_NOTHROW
+ {
+ if (is_compressed_table()) {
+ return(block->page.zip.data);
+ }
+
+ return(buf_block_get_frame(block));
+ }
+
+ /** Check for session interrupt. If required we could
+ even flush to disk here every N pages.
+ @retval DB_SUCCESS or error code */
+ dberr_t periodic_check() UNIV_NOTHROW
+ {
+ if (trx_is_interrupted(m_trx)) {
+ return(DB_INTERRUPTED);
+ }
+
+ return(DB_SUCCESS);
+ }
+
+ /**
+ Get the physical offset of the extent descriptor within the page.
+ @param page_no - page number of the extent descriptor
+ @param page - contents of the page containing the extent descriptor.
+ @return the start of the xdes array in a page */
+ const xdes_t* xdes(
+ ulint page_no,
+ const page_t* page) const UNIV_NOTHROW
+ {
+ ulint offset;
+
+ offset = xdes_calc_descriptor_index(get_zip_size(), page_no);
+
+ return(page + XDES_ARR_OFFSET + XDES_SIZE * offset);
+ }
+
+ /**
+ Set the current page directory (xdes). If the extent descriptor is
+ marked as free then free the current extent descriptor and set it to
+ 0. This implies that all pages that are covered by this extent
+ descriptor are also freed.
+
+ @param page_no - offset of page within the file
+ @param page - page contents
+ @return DB_SUCCESS or error code. */
+ dberr_t set_current_xdes(
+ ulint page_no,
+ const page_t* page) UNIV_NOTHROW
+ {
+ m_xdes_page_no = page_no;
+
+ delete[] m_xdes;
+
+ m_xdes = 0;
+
+ ulint state;
+ const xdes_t* xdesc = page + XDES_ARR_OFFSET;
+
+ state = mach_read_ulint(xdesc + XDES_STATE, MLOG_4BYTES);
+
+ if (state != XDES_FREE) {
+
+ m_xdes = new(std::nothrow) xdes_t[m_page_size];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_13",
+ delete [] m_xdes; m_xdes = 0;);
+
+ if (m_xdes == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ memcpy(m_xdes, page, m_page_size);
+ }
+
+ return(DB_SUCCESS);
+ }
+
+ /**
+ @return true if it is a root page */
+ bool is_root_page(const page_t* page) const UNIV_NOTHROW
+ {
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
+
+ return(mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
+ && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL);
+ }
+
+ /**
+ Check if the page is marked as free in the extent descriptor.
+ @param page_no - page number to check in the extent descriptor.
+ @return true if the page is marked as free */
+ bool is_free(ulint page_no) const UNIV_NOTHROW
+ {
+ ut_a(xdes_calc_descriptor_page(get_zip_size(), page_no)
+ == m_xdes_page_no);
+
+ if (m_xdes != 0) {
+ const xdes_t* xdesc = xdes(page_no, m_xdes);
+ ulint pos = page_no % FSP_EXTENT_SIZE;
+
+ return(xdes_get_bit(xdesc, XDES_FREE_BIT, pos));
+ }
+
+ /* If the current xdes was free, the page must be free. */
+ return(true);
+ }
+
+protected:
+ /** Covering transaction. */
+ trx_t* m_trx;
+
+ /** Space id of the file being iterated over. */
+ ulint m_space;
+
+ /** Minimum page number for which the free list has not been
+ initialized: the pages >= this limit are, by definition, free;
+ note that in a single-table tablespace where size < 64 pages,
+ this number is 64, i.e., we have initialized the space about
+ the first extent, but have not physically allocted those pages
+ to the file. @see FSP_LIMIT. */
+ ulint m_free_limit;
+
+ /** Current size of the space in pages */
+ ulint m_size;
+
+ /** Current extent descriptor page */
+ xdes_t* m_xdes;
+
+ /** Physical page offset in the file of the extent descriptor */
+ ulint m_xdes_page_no;
+
+ /** Flags value read from the header page */
+ ulint m_space_flags;
+
+ /** Derived from m_space_flags and row format type, the row format
+ type is determined from the page header. */
+ ulint m_table_flags;
+};
+
+/** Determine the page size to use for traversing the tablespace
+@param file_size - size of the tablespace file in bytes
+@param block - contents of the first page in the tablespace file.
+@retval DB_SUCCESS or error code. */
+dberr_t
+AbstractCallback::init(
+ os_offset_t file_size,
+ const buf_block_t* block) UNIV_NOTHROW
+{
+ const page_t* page = block->frame;
+
+ m_space_flags = fsp_header_get_flags(page);
+
+ /* Since we don't know whether it is a compressed table
+ or not, the data is always read into the block->frame. */
+
+ dberr_t err = set_zip_size(block->frame);
+
+ if (err != DB_SUCCESS) {
+ return(DB_CORRUPTION);
+ }
+
+ /* Set the page size used to traverse the tablespace. */
+
+ m_page_size = (is_compressed_table())
+ ? get_zip_size() : fsp_flags_get_page_size(m_space_flags);
+
+ if (m_page_size == 0) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Page size is 0");
+ return(DB_CORRUPTION);
+ } else if (!is_compressed_table() && m_page_size != UNIV_PAGE_SIZE) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Page size %lu of ibd file is not the same "
+ "as the server page size %lu",
+ m_page_size, UNIV_PAGE_SIZE);
+
+ return(DB_CORRUPTION);
+
+ } else if ((file_size % m_page_size)) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "File size " UINT64PF " is not a multiple "
+ "of the page size %lu",
+ (ib_uint64_t) file_size, (ulong) m_page_size);
+
+ return(DB_CORRUPTION);
+ }
+
+ ut_a(m_space == ULINT_UNDEFINED);
+
+ m_size = mach_read_from_4(page + FSP_SIZE);
+ m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT);
+ m_space = mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID);
+
+ if ((err = set_current_xdes(0, page)) != DB_SUCCESS) {
+ return(err);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+Try and determine the index root pages by checking if the next/prev
+pointers are both FIL_NULL. We need to ensure that skip deleted pages. */
+struct FetchIndexRootPages : public AbstractCallback {
+
+ /** Index information gathered from the .ibd file. */
+ struct Index {
+
+ Index(index_id_t id, ulint page_no)
+ :
+ m_id(id),
+ m_page_no(page_no) { }
+
+ index_id_t m_id; /*!< Index id */
+ ulint m_page_no; /*!< Root page number */
+ };
+
+ typedef std::vector<Index> Indexes;
+
+ /** Constructor
+ @param trx - covering (user) transaction
+ @param table - table definition in server .*/
+ FetchIndexRootPages(const dict_table_t* table, trx_t* trx)
+ :
+ AbstractCallback(trx),
+ m_table(table) UNIV_NOTHROW { }
+
+ /** Destructor */
+ virtual ~FetchIndexRootPages() UNIV_NOTHROW { }
+
+ /**
+ @retval the space id of the tablespace being iterated over */
+ virtual ulint get_space_id() const UNIV_NOTHROW
+ {
+ return(m_space);
+ }
+
+ /**
+ Check if the .ibd file row format is the same as the table's.
+ @param ibd_table_flags - determined from space and page.
+ @return DB_SUCCESS or error code. */
+ dberr_t check_row_format(ulint ibd_table_flags) UNIV_NOTHROW
+ {
+ dberr_t err;
+ rec_format_t ibd_rec_format;
+ rec_format_t table_rec_format;
+
+ if (!dict_tf_is_valid(ibd_table_flags)) {
+
+ ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ ".ibd file has invlad table flags: %lx",
+ ibd_table_flags);
+
+ return(DB_CORRUPTION);
+ }
+
+ ibd_rec_format = dict_tf_get_rec_format(ibd_table_flags);
+ table_rec_format = dict_tf_get_rec_format(m_table->flags);
+
+ if (table_rec_format != ibd_rec_format) {
+
+ ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Table has %s row format, .ibd "
+ "file has %s row format.",
+ dict_tf_to_row_format_string(m_table->flags),
+ dict_tf_to_row_format_string(ibd_table_flags));
+
+ err = DB_CORRUPTION;
+ } else {
+ err = DB_SUCCESS;
+ }
+
+ return(err);
+ }
+
+ /**
+ Called for each block as it is read from the file.
+ @param offset - physical offset in the file
+ @param block - block to convert, it is not from the buffer pool.
+ @retval DB_SUCCESS or error code. */
+ virtual dberr_t operator() (
+ os_offset_t offset,
+ buf_block_t* block) UNIV_NOTHROW;
+
+ /** Update the import configuration that will be used to import
+ the tablespace. */
+ dberr_t build_row_import(row_import* cfg) const UNIV_NOTHROW;
+
+ /** Table definition in server. */
+ const dict_table_t* m_table;
+
+ /** Index information */
+ Indexes m_indexes;
+};
+
+/**
+Called for each block as it is read from the file. Check index pages to
+determine the exact row format. We can't get that from the tablespace
+header flags alone.
+
+@param offset - physical offset in the file
+@param block - block to convert, it is not from the buffer pool.
+@retval DB_SUCCESS or error code. */
+dberr_t
+FetchIndexRootPages::operator() (
+ os_offset_t offset,
+ buf_block_t* block) UNIV_NOTHROW
+{
+ dberr_t err;
+
+ if ((err = periodic_check()) != DB_SUCCESS) {
+ return(err);
+ }
+
+ const page_t* page = get_frame(block);
+
+ ulint page_type = fil_page_get_type(page);
+
+ if (block->page.offset * m_page_size != offset) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Page offset doesn't match file offset: "
+ "page offset: %lu, file offset: %lu",
+ (ulint) block->page.offset,
+ (ulint) (offset / m_page_size));
+
+ err = DB_CORRUPTION;
+ } else if (page_type == FIL_PAGE_TYPE_XDES) {
+ err = set_current_xdes(block->page.offset, page);
+ } else if (page_type == FIL_PAGE_INDEX
+ && !is_free(block->page.offset)
+ && is_root_page(page)) {
+
+ index_id_t id = btr_page_get_index_id(page);
+ ulint page_no = buf_block_get_page_no(block);
+
+ m_indexes.push_back(Index(id, page_no));
+
+ if (m_indexes.size() == 1) {
+
+ m_table_flags = dict_sys_tables_type_to_tf(
+ m_space_flags,
+ page_is_comp(page) ? DICT_N_COLS_COMPACT : 0);
+
+ err = check_row_format(m_table_flags);
+ }
+ }
+
+ return(err);
+}
+
+/**
+Update the import configuration that will be used to import the tablespace.
+@return error code or DB_SUCCESS */
+dberr_t
+FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW
+{
+ Indexes::const_iterator end = m_indexes.end();
+
+ ut_a(cfg->m_table == m_table);
+ cfg->m_page_size = m_page_size;
+ cfg->m_n_indexes = m_indexes.size();
+
+ if (cfg->m_n_indexes == 0) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR, "No B+Tree found in tablespace");
+
+ return(DB_CORRUPTION);
+ }
+
+ cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_11",
+ delete [] cfg->m_indexes; cfg->m_indexes = 0;);
+
+ if (cfg->m_indexes == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes);
+
+ row_index_t* cfg_index = cfg->m_indexes;
+
+ for (Indexes::const_iterator it = m_indexes.begin();
+ it != end;
+ ++it, ++cfg_index) {
+
+ char name[BUFSIZ];
+
+ ut_snprintf(name, sizeof(name), "index" IB_ID_FMT, it->m_id);
+
+ ulint len = strlen(name) + 1;
+
+ cfg_index->m_name = new(std::nothrow) byte[len];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_12",
+ delete [] cfg_index->m_name;
+ cfg_index->m_name = 0;);
+
+ if (cfg_index->m_name == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ memcpy(cfg_index->m_name, name, len);
+
+ cfg_index->m_id = it->m_id;
+
+ cfg_index->m_space = m_space;
+
+ cfg_index->m_page_no = it->m_page_no;
+ }
+
+ return(DB_SUCCESS);
+}
+
+/* Functor that is called for each physical page that is read from the
+tablespace file.
+
+ 1. Check each page for corruption.
+
+ 2. Update the space id and LSN on every page
+ * For the header page
+ - Validate the flags
+ - Update the LSN
+
+ 3. On Btree pages
+ * Set the index id
+ * Update the max trx id
+ * In a cluster index, update the system columns
+ * In a cluster index, update the BLOB ptr, set the space id
+ * Purge delete marked records, but only if they can be easily
+ removed from the page
+ * Keep a counter of number of rows, ie. non-delete-marked rows
+ * Keep a counter of number of delete marked rows
+ * Keep a counter of number of purge failure
+ * If a page is stamped with an index id that isn't in the .cfg file
+ we assume it is deleted and the page can be ignored.
+
+ 4. Set the page state to dirty so that it will be written to disk.
+*/
+class PageConverter : public AbstractCallback {
+public:
+ /** Constructor
+ * @param cfg - config of table being imported.
+ * @param trx - transaction covering the import */
+ PageConverter(row_import* cfg, trx_t* trx) UNIV_NOTHROW;
+
+ virtual ~PageConverter() UNIV_NOTHROW
+ {
+ if (m_heap != 0) {
+ mem_heap_free(m_heap);
+ }
+ }
+
+ /**
+ @retval the server space id of the tablespace being iterated over */
+ virtual ulint get_space_id() const UNIV_NOTHROW
+ {
+ return(m_cfg->m_table->space);
+ }
+
+ /**
+ Called for each block as it is read from the file.
+ @param offset - physical offset in the file
+ @param block - block to convert, it is not from the buffer pool.
+ @retval DB_SUCCESS or error code. */
+ virtual dberr_t operator() (
+ os_offset_t offset,
+ buf_block_t* block) UNIV_NOTHROW;
+private:
+
+ /** Status returned by PageConverter::validate() */
+ enum import_page_status_t {
+ IMPORT_PAGE_STATUS_OK, /*!< Page is OK */
+ IMPORT_PAGE_STATUS_ALL_ZERO, /*!< Page is all zeros */
+ IMPORT_PAGE_STATUS_CORRUPTED /*!< Page is corrupted */
+ };
+
+ /**
+ Update the page, set the space id, max trx id and index id.
+ @param block - block read from file
+ @param page_type - type of the page
+ @retval DB_SUCCESS or error code */
+ dberr_t update_page(
+ buf_block_t* block,
+ ulint& page_type) UNIV_NOTHROW;
+
+#if defined UNIV_DEBUG
+ /**
+ @return true error condition is enabled. */
+ bool trigger_corruption() UNIV_NOTHROW
+ {
+ return(false);
+ }
+ #else
+#define trigger_corruption() (false)
+#endif /* UNIV_DEBUG */
+
+ /**
+ Update the space, index id, trx id.
+ @param block - block to convert
+ @return DB_SUCCESS or error code */
+ dberr_t update_index_page(buf_block_t* block) UNIV_NOTHROW;
+
+ /** Update the BLOB refrences and write UNDO log entries for
+ rows that can't be purged optimistically.
+ @param block - block to update
+ @retval DB_SUCCESS or error code */
+ dberr_t update_records(buf_block_t* block) UNIV_NOTHROW;
+
+ /**
+ Validate the page, check for corruption.
+ @param offset - physical offset within file.
+ @param page - page read from file.
+ @return 0 on success, 1 if all zero, 2 if corrupted */
+ import_page_status_t validate(
+ os_offset_t offset,
+ buf_block_t* page) UNIV_NOTHROW;
+
+ /**
+ Validate the space flags and update tablespace header page.
+ @param block - block read from file, not from the buffer pool.
+ @retval DB_SUCCESS or error code */
+ dberr_t update_header(buf_block_t* block) UNIV_NOTHROW;
+
+ /**
+ Adjust the BLOB reference for a single column that is externally stored
+ @param rec - record to update
+ @param offsets - column offsets for the record
+ @param i - column ordinal value
+ @return DB_SUCCESS or error code */
+ dberr_t adjust_cluster_index_blob_column(
+ rec_t* rec,
+ const ulint* offsets,
+ ulint i) UNIV_NOTHROW;
+
+ /**
+ Adjusts the BLOB reference in the clustered index row for all
+ externally stored columns.
+ @param rec - record to update
+ @param offsets - column offsets for the record
+ @return DB_SUCCESS or error code */
+ dberr_t adjust_cluster_index_blob_columns(
+ rec_t* rec,
+ const ulint* offsets) UNIV_NOTHROW;
+
+ /**
+ In the clustered index, adjist the BLOB pointers as needed.
+ Also update the BLOB reference, write the new space id.
+ @param rec - record to update
+ @param offsets - column offsets for the record
+ @return DB_SUCCESS or error code */
+ dberr_t adjust_cluster_index_blob_ref(
+ rec_t* rec,
+ const ulint* offsets) UNIV_NOTHROW;
+
+ /**
+ Purge delete-marked records, only if it is possible to do
+ so without re-organising the B+tree.
+ @param offsets - current row offsets.
+ @retval true if purged */
+ bool purge(const ulint* offsets) UNIV_NOTHROW;
+
+ /**
+ Adjust the BLOB references and sys fields for the current record.
+ @param index - the index being converted
+ @param rec - record to update
+ @param offsets - column offsets for the record
+ @param deleted - true if row is delete marked
+ @return DB_SUCCESS or error code. */
+ dberr_t adjust_cluster_record(
+ const dict_index_t* index,
+ rec_t* rec,
+ const ulint* offsets,
+ bool deleted) UNIV_NOTHROW;
+
+ /**
+ Find an index with the matching id.
+ @return row_index_t* instance or 0 */
+ row_index_t* find_index(index_id_t id) UNIV_NOTHROW
+ {
+ row_index_t* index = &m_cfg->m_indexes[0];
+
+ for (ulint i = 0; i < m_cfg->m_n_indexes; ++i, ++index) {
+ if (id == index->m_id) {
+ return(index);
+ }
+ }
+
+ return(0);
+
+ }
+private:
+ /** Config for table that is being imported. */
+ row_import* m_cfg;
+
+ /** Current index whose pages are being imported */
+ row_index_t* m_index;
+
+ /** Current system LSN */
+ lsn_t m_current_lsn;
+
+ /** Alias for m_page_zip, only set for compressed pages. */
+ page_zip_des_t* m_page_zip_ptr;
+
+ /** Iterator over records in a block */
+ RecIterator m_rec_iter;
+
+ /** Record offset */
+ ulint m_offsets_[REC_OFFS_NORMAL_SIZE];
+
+ /** Pointer to m_offsets_ */
+ ulint* m_offsets;
+
+ /** Memory heap for the record offsets */
+ mem_heap_t* m_heap;
+
+ /** Cluster index instance */
+ dict_index_t* m_cluster_index;
+};
+
+/**
+row_import destructor. */
+row_import::~row_import() UNIV_NOTHROW
+{
+ for (ulint i = 0; m_indexes != 0 && i < m_n_indexes; ++i) {
+ delete [] m_indexes[i].m_name;
+
+ if (m_indexes[i].m_fields == 0) {
+ continue;
+ }
+
+ dict_field_t* fields = m_indexes[i].m_fields;
+ ulint n_fields = m_indexes[i].m_n_fields;
+
+ for (ulint j = 0; j < n_fields; ++j) {
+ delete [] fields[j].name;
+ }
+
+ delete [] fields;
+ }
+
+ for (ulint i = 0; m_col_names != 0 && i < m_n_cols; ++i) {
+ delete [] m_col_names[i];
+ }
+
+ delete [] m_cols;
+ delete [] m_indexes;
+ delete [] m_col_names;
+ delete [] m_table_name;
+ delete [] m_hostname;
+}
+
+/**
+Find the index entry in in the indexes array.
+@param name - index name
+@return instance if found else 0. */
+row_index_t*
+row_import::get_index(
+ const char* name) const UNIV_NOTHROW
+{
+ for (ulint i = 0; i < m_n_indexes; ++i) {
+ const char* index_name;
+ row_index_t* index = &m_indexes[i];
+
+ index_name = reinterpret_cast<const char*>(index->m_name);
+
+ if (strcmp(index_name, name) == 0) {
+
+ return(index);
+ }
+ }
+
+ return(0);
+}
+
+/**
+Get the number of rows in the index.
+@param name - index name
+@return number of rows (doesn't include delete marked rows). */
+ulint
+row_import::get_n_rows(
+ const char* name) const UNIV_NOTHROW
+{
+ const row_index_t* index = get_index(name);
+
+ ut_a(name != 0);
+
+ return(index->m_stats.m_n_rows);
+}
+
+/**
+Get the number of rows for which purge failed uding the convert phase.
+@param name - index name
+@return number of rows for which purge failed. */
+ulint
+row_import::get_n_purge_failed(
+ const char* name) const UNIV_NOTHROW
+{
+ const row_index_t* index = get_index(name);
+
+ ut_a(name != 0);
+
+ return(index->m_stats.m_n_purge_failed);
+}
+
+/**
+Find the ordinal value of the column name in the cfg table columns.
+@param name - of column to look for.
+@return ULINT_UNDEFINED if not found. */
+ulint
+row_import::find_col(
+ const char* name) const UNIV_NOTHROW
+{
+ for (ulint i = 0; i < m_n_cols; ++i) {
+ const char* col_name;
+
+ col_name = reinterpret_cast<const char*>(m_col_names[i]);
+
+ if (strcmp(col_name, name) == 0) {
+ return(i);
+ }
+ }
+
+ return(ULINT_UNDEFINED);
+}
+
+/**
+Find the index field entry in in the cfg indexes fields.
+@name - of the index to look for
+@return instance if found else 0. */
+const dict_field_t*
+row_import::find_field(
+ const row_index_t* cfg_index,
+ const char* name) const UNIV_NOTHROW
+{
+ const dict_field_t* field = cfg_index->m_fields;
+
+ for (ulint i = 0; i < cfg_index->m_n_fields; ++i, ++field) {
+ const char* field_name;
+
+ field_name = reinterpret_cast<const char*>(field->name);
+
+ if (strcmp(field_name, name) == 0) {
+ return(field);
+ }
+ }
+
+ return(0);
+}
+
+/**
+Check if the index schema that was read from the .cfg file matches the
+in memory index definition.
+@return DB_SUCCESS or error code. */
+dberr_t
+row_import::match_index_columns(
+ THD* thd,
+ const dict_index_t* index) UNIV_NOTHROW
+{
+ row_index_t* cfg_index;
+ dberr_t err = DB_SUCCESS;
+
+ cfg_index = get_index(index->name);
+
+ if (cfg_index == 0) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index %s not found in tablespace meta-data file.",
+ index->name);
+
+ return(DB_ERROR);
+ }
+
+ cfg_index->m_srv_index = index;
+
+ const dict_field_t* field = index->fields;
+
+ for (ulint i = 0; i < index->n_fields; ++i, ++field) {
+
+ const dict_field_t* cfg_field;
+
+ cfg_field = find_field(cfg_index, field->name);
+
+ if (cfg_field == 0) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index %s field %s not found in tablespace "
+ "meta-data file.",
+ index->name, field->name);
+
+ err = DB_ERROR;
+ } else {
+
+ if (cfg_field->prefix_len != field->prefix_len) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index %s field %s prefix len %lu "
+ "doesn't match meta-data file value "
+ "%lu",
+ index->name, field->name,
+ (ulong) field->prefix_len,
+ (ulong) cfg_field->prefix_len);
+
+ err = DB_ERROR;
+ }
+
+ if (cfg_field->fixed_len != field->fixed_len) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Index %s field %s fixed len %lu "
+ "doesn't match meta-data file value "
+ "%lu",
+ index->name, field->name,
+ (ulong) field->fixed_len,
+ (ulong) cfg_field->fixed_len);
+
+ err = DB_ERROR;
+ }
+ }
+ }
+
+ return(err);
+}
+
+/**
+Check if the table schema that was read from the .cfg file matches the
+in memory table definition.
+@param thd - MySQL session variable
+@return DB_SUCCESS or error code. */
+dberr_t
+row_import::match_table_columns(
+ THD* thd) UNIV_NOTHROW
+{
+ dberr_t err = DB_SUCCESS;
+ const dict_col_t* col = m_table->cols;
+
+ for (ulint i = 0; i < m_table->n_cols; ++i, ++col) {
+
+ const char* col_name;
+ ulint cfg_col_index;
+
+ col_name = dict_table_get_col_name(
+ m_table, dict_col_get_no(col));
+
+ cfg_col_index = find_col(col_name);
+
+ if (cfg_col_index == ULINT_UNDEFINED) {
+
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s not found in tablespace.",
+ col_name);
+
+ err = DB_ERROR;
+ } else if (cfg_col_index != col->ind) {
+
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s ordinal value mismatch, it's at "
+ "%lu in the table and %lu in the tablespace "
+ "meta-data file",
+ col_name,
+ (ulong) col->ind, (ulong) cfg_col_index);
+
+ err = DB_ERROR;
+ } else {
+ const dict_col_t* cfg_col;
+
+ cfg_col = &m_cols[cfg_col_index];
+ ut_a(cfg_col->ind == cfg_col_index);
+
+ if (cfg_col->prtype != col->prtype) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s precise type mismatch.",
+ col_name);
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->mtype != col->mtype) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s main type mismatch.",
+ col_name);
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->len != col->len) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s length mismatch.",
+ col_name);
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->mbminmaxlen != col->mbminmaxlen) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s multi-byte len mismatch.",
+ col_name);
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->ind != col->ind) {
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->ord_part != col->ord_part) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s ordering mismatch.",
+ col_name);
+ err = DB_ERROR;
+ }
+
+ if (cfg_col->max_prefix != col->max_prefix) {
+ ib_errf(thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_TABLE_SCHEMA_MISMATCH,
+ "Column %s max prefix mismatch.",
+ col_name);
+ err = DB_ERROR;
+ }
+ }
+ }
+
+ return(err);
+}
+
+/**
+Check if the table (and index) schema that was read from the .cfg file
+matches the in memory table definition.
+@param thd - MySQL session variable
+@return DB_SUCCESS or error code. */
+dberr_t
+row_import::match_schema(
+ THD* thd) UNIV_NOTHROW
+{
+ /* Do some simple checks. */
+
+ if (m_flags != m_table->flags) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+ "Table flags don't match, server table has 0x%lx "
+ "and the meta-data file has 0x%lx",
+ (ulong) m_table->n_cols, (ulong) m_flags);
+
+ return(DB_ERROR);
+ } else if (m_table->n_cols != m_n_cols) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+ "Number of columns don't match, table has %lu "
+ "columns but the tablespace meta-data file has "
+ "%lu columns",
+ (ulong) m_table->n_cols, (ulong) m_n_cols);
+
+ return(DB_ERROR);
+ } else if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
+
+ /* If the number of indexes don't match then it is better
+ to abort the IMPORT. It is easy for the user to create a
+ table matching the IMPORT definition. */
+
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+ "Number of indexes don't match, table has %lu "
+ "indexes but the tablespace meta-data file has "
+ "%lu indexes",
+ (ulong) UT_LIST_GET_LEN(m_table->indexes),
+ (ulong) m_n_indexes);
+
+ return(DB_ERROR);
+ }
+
+ dberr_t err = match_table_columns(thd);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* Check if the index definitions match. */
+
+ const dict_index_t* index;
+
+ for (index = UT_LIST_GET_FIRST(m_table->indexes);
+ index != 0;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ dberr_t index_err;
+
+ index_err = match_index_columns(thd, index);
+
+ if (index_err != DB_SUCCESS) {
+ err = index_err;
+ }
+ }
+
+ return(err);
+}
+
+/**
+Set the index root <space, pageno>, using index name. */
+void
+row_import::set_root_by_name() UNIV_NOTHROW
+{
+ row_index_t* cfg_index = m_indexes;
+
+ for (ulint i = 0; i < m_n_indexes; ++i, ++cfg_index) {
+ dict_index_t* index;
+
+ const char* index_name;
+
+ index_name = reinterpret_cast<const char*>(cfg_index->m_name);
+
+ index = dict_table_get_index_on_name(m_table, index_name);
+
+ /* We've already checked that it exists. */
+ ut_a(index != 0);
+
+ /* Set the root page number and space id. */
+ index->space = m_table->space;
+ index->page = cfg_index->m_page_no;
+ }
+}
+
+/**
+Set the index root <space, pageno>, using a heuristic.
+@return DB_SUCCESS or error code */
+dberr_t
+row_import::set_root_by_heuristic() UNIV_NOTHROW
+{
+ row_index_t* cfg_index = m_indexes;
+
+ ut_a(m_n_indexes > 0);
+
+ // TODO: For now use brute force, based on ordinality
+
+ if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
+
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name), m_table->name, FALSE);
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Table %s should have %lu indexes but the tablespace "
+ "has %lu indexes",
+ table_name,
+ UT_LIST_GET_LEN(m_table->indexes),
+ m_n_indexes);
+ }
+
+ dict_mutex_enter_for_mysql();
+
+ ulint i = 0;
+ dberr_t err = DB_SUCCESS;
+
+ for (dict_index_t* index = UT_LIST_GET_FIRST(m_table->indexes);
+ index != 0;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ if (index->type & DICT_FTS) {
+ index->type |= DICT_CORRUPT;
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Skipping FTS index: %s", index->name);
+ } else if (i < m_n_indexes) {
+
+ delete [] cfg_index[i].m_name;
+
+ ulint len = strlen(index->name) + 1;
+
+ cfg_index[i].m_name = new(std::nothrow) byte[len];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_14",
+ delete[] cfg_index[i].m_name;
+ cfg_index[i].m_name = 0;);
+
+ if (cfg_index[i].m_name == 0) {
+ err = DB_OUT_OF_MEMORY;
+ break;
+ }
+
+ memcpy(cfg_index[i].m_name, index->name, len);
+
+ cfg_index[i].m_srv_index = index;
+
+ index->space = m_table->space;
+ index->page = cfg_index[i].m_page_no;
+
+ ++i;
+ }
+ }
+
+ dict_mutex_exit_for_mysql();
+
+ return(err);
+}
+
+/**
+Purge delete marked records.
+@return DB_SUCCESS or error code. */
+dberr_t
+IndexPurge::garbage_collect() UNIV_NOTHROW
+{
+ dberr_t err;
+ ibool comp = dict_table_is_comp(m_index->table);
+
+ /* Open the persistent cursor and start the mini-transaction. */
+
+ open();
+
+ while ((err = next()) == DB_SUCCESS) {
+
+ rec_t* rec = btr_pcur_get_rec(&m_pcur);
+ ibool deleted = rec_get_deleted_flag(rec, comp);
+
+ if (!deleted) {
+ ++m_n_rows;
+ } else {
+ purge();
+ }
+ }
+
+ /* Close the persistent cursor and commit the mini-transaction. */
+
+ close();
+
+ return(err == DB_END_OF_INDEX ? DB_SUCCESS : err);
+}
+
+/**
+Begin import, position the cursor on the first record. */
+void
+IndexPurge::open() UNIV_NOTHROW
+{
+ mtr_start(&m_mtr);
+
+ mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
+
+ btr_pcur_open_at_index_side(
+ true, m_index, BTR_MODIFY_LEAF, &m_pcur, true, 0, &m_mtr);
+}
+
+/**
+Close the persistent curosr and commit the mini-transaction. */
+void
+IndexPurge::close() UNIV_NOTHROW
+{
+ btr_pcur_close(&m_pcur);
+ mtr_commit(&m_mtr);
+}
+
+/**
+Position the cursor on the next record.
+@return DB_SUCCESS or error code */
+dberr_t
+IndexPurge::next() UNIV_NOTHROW
+{
+ btr_pcur_move_to_next_on_page(&m_pcur);
+
+ /* When switching pages, commit the mini-transaction
+ in order to release the latch on the old page. */
+
+ if (!btr_pcur_is_after_last_on_page(&m_pcur)) {
+ return(DB_SUCCESS);
+ } else if (trx_is_interrupted(m_trx)) {
+ /* Check after every page because the check
+ is expensive. */
+ return(DB_INTERRUPTED);
+ }
+
+ btr_pcur_store_position(&m_pcur, &m_mtr);
+
+ mtr_commit(&m_mtr);
+
+ mtr_start(&m_mtr);
+
+ mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
+
+ btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
+
+ if (!btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr)) {
+
+ return(DB_END_OF_INDEX);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+Store the persistent cursor position and reopen the
+B-tree cursor in BTR_MODIFY_TREE mode, because the
+tree structure may be changed during a pessimistic delete. */
+void
+IndexPurge::purge_pessimistic_delete() UNIV_NOTHROW
+{
+ dberr_t err;
+
+ btr_pcur_restore_position(BTR_MODIFY_TREE, &m_pcur, &m_mtr);
+
+ ut_ad(rec_get_deleted_flag(
+ btr_pcur_get_rec(&m_pcur),
+ dict_table_is_comp(m_index->table)));
+
+ btr_cur_pessimistic_delete(
+ &err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, RB_NONE, &m_mtr);
+
+ ut_a(err == DB_SUCCESS);
+
+ /* Reopen the B-tree cursor in BTR_MODIFY_LEAF mode */
+ mtr_commit(&m_mtr);
+}
+
+/**
+Purge delete-marked records. */
+void
+IndexPurge::purge() UNIV_NOTHROW
+{
+ btr_pcur_store_position(&m_pcur, &m_mtr);
+
+ purge_pessimistic_delete();
+
+ mtr_start(&m_mtr);
+
+ mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
+
+ btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
+}
+
+/**
+Constructor
+* @param cfg - config of table being imported.
+* @param trx - transaction covering the import */
+PageConverter::PageConverter(
+ row_import* cfg,
+ trx_t* trx)
+ :
+ AbstractCallback(trx),
+ m_cfg(cfg),
+ m_page_zip_ptr(0),
+ m_heap(0) UNIV_NOTHROW
+{
+ m_index = m_cfg->m_indexes;
+
+ m_current_lsn = log_get_lsn();
+ ut_a(m_current_lsn > 0);
+
+ m_offsets = m_offsets_;
+ rec_offs_init(m_offsets_);
+
+ m_cluster_index = dict_table_get_first_index(m_cfg->m_table);
+}
+
+/**
+Adjust the BLOB reference for a single column that is externally stored
+@param rec - record to update
+@param offsets - column offsets for the record
+@param i - column ordinal value
+@return DB_SUCCESS or error code */
+dberr_t
+PageConverter::adjust_cluster_index_blob_column(
+ rec_t* rec,
+ const ulint* offsets,
+ ulint i) UNIV_NOTHROW
+{
+ ulint len;
+ byte* field;
+
+ field = rec_get_nth_field(rec, offsets, i, &len);
+
+ DBUG_EXECUTE_IF("ib_import_trigger_corruption_2",
+ len = BTR_EXTERN_FIELD_REF_SIZE - 1;);
+
+ if (len < BTR_EXTERN_FIELD_REF_SIZE) {
+
+ char index_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ index_name, sizeof(index_name),
+ m_cluster_index->name, TRUE);
+
+ ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_INNODB_INDEX_CORRUPT,
+ "Externally stored column(%lu) has a reference "
+ "length of %lu in the cluster index %s",
+ (ulong) i, (ulong) len, index_name);
+
+ return(DB_CORRUPTION);
+ }
+
+ field += BTR_EXTERN_SPACE_ID - BTR_EXTERN_FIELD_REF_SIZE + len;
+
+ if (is_compressed_table()) {
+ mach_write_to_4(field, get_space_id());
+
+ page_zip_write_blob_ptr(
+ m_page_zip_ptr, rec, m_cluster_index, offsets, i, 0);
+ } else {
+ mlog_write_ulint(field, get_space_id(), MLOG_4BYTES, 0);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+Adjusts the BLOB reference in the clustered index row for all externally
+stored columns.
+@param rec - record to update
+@param offsets - column offsets for the record
+@return DB_SUCCESS or error code */
+dberr_t
+PageConverter::adjust_cluster_index_blob_columns(
+ rec_t* rec,
+ const ulint* offsets) UNIV_NOTHROW
+{
+ ut_ad(rec_offs_any_extern(offsets));
+
+ /* Adjust the space_id in the BLOB pointers. */
+
+ for (ulint i = 0; i < rec_offs_n_fields(offsets); ++i) {
+
+ /* Only if the column is stored "externally". */
+
+ if (rec_offs_nth_extern(offsets, i)) {
+ dberr_t err;
+
+ err = adjust_cluster_index_blob_column(rec, offsets, i);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+In the clustered index, adjust BLOB pointers as needed. Also update the
+BLOB reference, write the new space id.
+@param rec - record to update
+@param offsets - column offsets for the record
+@return DB_SUCCESS or error code */
+dberr_t
+PageConverter::adjust_cluster_index_blob_ref(
+ rec_t* rec,
+ const ulint* offsets) UNIV_NOTHROW
+{
+ if (rec_offs_any_extern(offsets)) {
+ dberr_t err;
+
+ err = adjust_cluster_index_blob_columns(rec, offsets);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+Purge delete-marked records, only if it is possible to do so without
+re-organising the B+tree.
+@param offsets - current row offsets.
+@return true if purge succeeded */
+bool
+PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
+{
+ const dict_index_t* index = m_index->m_srv_index;
+
+ /* We can't have a page that is empty and not root. */
+ if (m_rec_iter.remove(index, m_page_zip_ptr, m_offsets)) {
+
+ ++m_index->m_stats.m_n_purged;
+
+ return(true);
+ } else {
+ ++m_index->m_stats.m_n_purge_failed;
+ }
+
+ return(false);
+}
+
+/**
+Adjust the BLOB references and sys fields for the current record.
+@param rec - record to update
+@param offsets - column offsets for the record
+@param deleted - true if row is delete marked
+@return DB_SUCCESS or error code. */
+dberr_t
+PageConverter::adjust_cluster_record(
+ const dict_index_t* index,
+ rec_t* rec,
+ const ulint* offsets,
+ bool deleted) UNIV_NOTHROW
+{
+ dberr_t err;
+
+ if ((err = adjust_cluster_index_blob_ref(rec, offsets)) == DB_SUCCESS) {
+
+ /* Reset DB_TRX_ID and DB_ROLL_PTR. Normally, these fields
+ are only written in conjunction with other changes to the
+ record. */
+
+ row_upd_rec_sys_fields(
+ rec, m_page_zip_ptr, m_cluster_index, m_offsets,
+ m_trx, 0);
+ }
+
+ return(err);
+}
+
+/**
+Update the BLOB refrences and write UNDO log entries for
+rows that can't be purged optimistically.
+@param block - block to update
+@retval DB_SUCCESS or error code */
+dberr_t
+PageConverter::update_records(
+ buf_block_t* block) UNIV_NOTHROW
+{
+ ibool comp = dict_table_is_comp(m_cfg->m_table);
+ bool clust_index = m_index->m_srv_index == m_cluster_index;
+
+ /* This will also position the cursor on the first user record. */
+
+ m_rec_iter.open(block);
+
+ while (!m_rec_iter.end()) {
+
+ rec_t* rec = m_rec_iter.current();
+
+ /* FIXME: Move out of the loop */
+
+ if (rec_get_status(rec) == REC_STATUS_NODE_PTR) {
+ break;
+ }
+
+ ibool deleted = rec_get_deleted_flag(rec, comp);
+
+ /* For the clustered index we have to adjust the BLOB
+ reference and the system fields irrespective of the
+ delete marked flag. The adjustment of delete marked
+ cluster records is required for purge to work later. */
+
+ if (deleted || clust_index) {
+ m_offsets = rec_get_offsets(
+ rec, m_index->m_srv_index, m_offsets,
+ ULINT_UNDEFINED, &m_heap);
+ }
+
+ if (clust_index) {
+
+ dberr_t err = adjust_cluster_record(
+ m_index->m_srv_index, rec, m_offsets,
+ deleted);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ /* If it is a delete marked record then try an
+ optimistic delete. */
+
+ if (deleted) {
+ /* A successful purge will move the cursor to the
+ next record. */
+
+ if (!purge(m_offsets)) {
+ m_rec_iter.next();
+ }
+
+ ++m_index->m_stats.m_n_deleted;
+ } else {
+ ++m_index->m_stats.m_n_rows;
+ m_rec_iter.next();
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/**
+Update the space, index id, trx id.
+@return DB_SUCCESS or error code */
+dberr_t
+PageConverter::update_index_page(
+ buf_block_t* block) UNIV_NOTHROW
+{
+ index_id_t id;
+ buf_frame_t* page = block->frame;
+
+ if (is_free(buf_block_get_page_no(block))) {
+ return(DB_SUCCESS);
+ } else if ((id = btr_page_get_index_id(page)) != m_index->m_id) {
+
+ row_index_t* index = find_index(id);
+
+ if (index == 0) {
+ m_index = 0;
+ return(DB_CORRUPTION);
+ }
+
+ /* Update current index */
+ m_index = index;
+ }
+
+ /* If the .cfg file is missing and there is an index mismatch
+ then ignore the error. */
+ if (m_cfg->m_missing && (m_index == 0 || m_index->m_srv_index == 0)) {
+ return(DB_SUCCESS);
+ }
+
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!is_compressed_table()
+ || page_zip_validate(m_page_zip_ptr, page, m_index->m_srv_index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ /* This has to be written to uncompressed index header. Set it to
+ the current index id. */
+ btr_page_set_index_id(
+ page, m_page_zip_ptr, m_index->m_srv_index->id, 0);
+
+ page_set_max_trx_id(block, m_page_zip_ptr, m_trx->id, 0);
+
+ if (page_get_n_recs(block->frame) == 0) {
+
+ /* Only a root page can be empty. */
+ if (!is_root_page(block->frame)) {
+ // TODO: We should relax this and skip secondary
+ // indexes. Mark them as corrupt because they can
+ // always be rebuilt.
+ return(DB_CORRUPTION);
+ }
+
+ return(DB_SUCCESS);
+ }
+
+ return(update_records(block));
+}
+
+/**
+Validate the space flags and update tablespace header page.
+@param block - block read from file, not from the buffer pool.
+@retval DB_SUCCESS or error code */
+dberr_t
+PageConverter::update_header(
+ buf_block_t* block) UNIV_NOTHROW
+{
+ /* Check for valid header */
+ switch(fsp_header_get_space_id(get_frame(block))) {
+ case 0:
+ return(DB_CORRUPTION);
+ case ULINT_UNDEFINED:
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Space id check in the header failed "
+ "- ignored");
+ }
+
+ ulint space_flags = fsp_header_get_flags(get_frame(block));
+
+ if (!fsp_flags_is_valid(space_flags)) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unsupported tablespace format %lu",
+ (ulong) space_flags);
+
+ return(DB_UNSUPPORTED);
+ }
+
+ mach_write_to_8(
+ get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN, m_current_lsn);
+
+ /* Write space_id to the tablespace header, page 0. */
+ mach_write_to_4(
+ get_frame(block) + FSP_HEADER_OFFSET + FSP_SPACE_ID,
+ get_space_id());
+
+ /* This is on every page in the tablespace. */
+ mach_write_to_4(
+ get_frame(block) + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
+ get_space_id());
+
+ return(DB_SUCCESS);
+}
+
+/**
+Update the page, set the space id, max trx id and index id.
+@param block - block read from file
+@retval DB_SUCCESS or error code */
+dberr_t
+PageConverter::update_page(
+ buf_block_t* block,
+ ulint& page_type) UNIV_NOTHROW
+{
+ dberr_t err = DB_SUCCESS;
+
+ switch (page_type = fil_page_get_type(get_frame(block))) {
+ case FIL_PAGE_TYPE_FSP_HDR:
+ /* Work directly on the uncompressed page headers. */
+ ut_a(buf_block_get_page_no(block) == 0);
+ return(update_header(block));
+
+ case FIL_PAGE_INDEX:
+ /* We need to decompress the contents into block->frame
+ before we can do any thing with Btree pages. */
+
+ if (is_compressed_table() && !buf_zip_decompress(block, TRUE)) {
+ return(DB_CORRUPTION);
+ }
+
+ /* This is on every page in the tablespace. */
+ mach_write_to_4(
+ get_frame(block)
+ + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
+
+ /* Only update the Btree nodes. */
+ return(update_index_page(block));
+
+ case FIL_PAGE_TYPE_SYS:
+ /* This is page 0 in the system tablespace. */
+ return(DB_CORRUPTION);
+
+ case FIL_PAGE_TYPE_XDES:
+ err = set_current_xdes(
+ buf_block_get_page_no(block), get_frame(block));
+ case FIL_PAGE_INODE:
+ case FIL_PAGE_TYPE_TRX_SYS:
+ case FIL_PAGE_IBUF_FREE_LIST:
+ case FIL_PAGE_TYPE_ALLOCATED:
+ case FIL_PAGE_IBUF_BITMAP:
+ case FIL_PAGE_TYPE_BLOB:
+ case FIL_PAGE_TYPE_ZBLOB:
+ case FIL_PAGE_TYPE_ZBLOB2:
+
+ /* Work directly on the uncompressed page headers. */
+ /* This is on every page in the tablespace. */
+ mach_write_to_4(
+ get_frame(block)
+ + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
+
+ return(err);
+ }
+
+ ib_logf(IB_LOG_LEVEL_WARN, "Unknown page type (%lu)", page_type);
+
+ return(DB_CORRUPTION);
+}
+
+/**
+Validate the page
+@param offset - physical offset within file.
+@param page - page read from file.
+@return status */
+PageConverter::import_page_status_t
+PageConverter::validate(
+ os_offset_t offset,
+ buf_block_t* block) UNIV_NOTHROW
+{
+ buf_frame_t* page = get_frame(block);
+
+ /* Check that the page number corresponds to the offset in
+ the file. Flag as corrupt if it doesn't. Disable the check
+ for LSN in buf_page_is_corrupted() */
+
+ if (buf_page_is_corrupted(false, page, get_zip_size())
+ || (page_get_page_no(page) != offset / m_page_size
+ && page_get_page_no(page) != 0)) {
+
+ return(IMPORT_PAGE_STATUS_CORRUPTED);
+
+ } else if (offset > 0 && page_get_page_no(page) == 0) {
+ const byte* b = page;
+ const byte* e = b + m_page_size;
+
+ /* If the page number is zero and offset > 0 then
+ the entire page MUST consist of zeroes. If not then
+ we flag it as corrupt. */
+
+ while (b != e) {
+
+ if (*b++ && !trigger_corruption()) {
+ return(IMPORT_PAGE_STATUS_CORRUPTED);
+ }
+ }
+
+ /* The page is all zero: do nothing. */
+ return(IMPORT_PAGE_STATUS_ALL_ZERO);
+ }
+
+ return(IMPORT_PAGE_STATUS_OK);
+}
+
+/**
+Called for every page in the tablespace. If the page was not
+updated then its state must be set to BUF_PAGE_NOT_USED.
+@param offset - physical offset within the file
+@param block - block read from file, note it is not from the buffer pool
+@retval DB_SUCCESS or error code. */
+dberr_t
+PageConverter::operator() (
+ os_offset_t offset,
+ buf_block_t* block) UNIV_NOTHROW
+{
+ ulint page_type;
+ dberr_t err = DB_SUCCESS;
+
+ if ((err = periodic_check()) != DB_SUCCESS) {
+ return(err);
+ }
+
+ if (is_compressed_table()) {
+ m_page_zip_ptr = &block->page.zip;
+ } else {
+ ut_ad(m_page_zip_ptr == 0);
+ }
+
+ switch(validate(offset, block)) {
+ case IMPORT_PAGE_STATUS_OK:
+
+ /* We have to decompress the compressed pages before
+ we can work on them */
+
+ if ((err = update_page(block, page_type)) != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* Note: For compressed pages this function will write to the
+ zip descriptor and for uncompressed pages it will write to
+ page (ie. the block->frame). Therefore the caller should write
+ out the descriptor contents and not block->frame for compressed
+ pages. */
+
+ if (!is_compressed_table() || page_type == FIL_PAGE_INDEX) {
+
+ buf_flush_init_for_writing(
+ !is_compressed_table()
+ ? block->frame : block->page.zip.data,
+ !is_compressed_table() ? 0 : m_page_zip_ptr,
+ m_current_lsn);
+ } else {
+ /* Calculate and update the checksum of non-btree
+ pages for compressed tables explicitly here. */
+
+ buf_flush_update_zip_checksum(
+ get_frame(block), get_zip_size(),
+ m_current_lsn);
+ }
+
+ break;
+
+ case IMPORT_PAGE_STATUS_ALL_ZERO:
+ /* The page is all zero: leave it as is. */
+ break;
+
+ case IMPORT_PAGE_STATUS_CORRUPTED:
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "%s: Page %lu at offset " UINT64PF " looks corrupted.",
+ m_filepath, (ulong) (offset / m_page_size), offset);
+
+ return(DB_CORRUPTION);
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Clean up after import tablespace failure, this function will acquire
+the dictionary latches on behalf of the transaction if the transaction
+hasn't already acquired them. */
+static __attribute__((nonnull))
+void
+row_import_discard_changes(
+/*=======================*/
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
+ trx_t* trx, /*!< in/out: transaction for import */
+ dberr_t err) /*!< in: error code */
+{
+ dict_table_t* table = prebuilt->table;
+
+ ut_a(err != DB_SUCCESS);
+
+ prebuilt->trx->error_info = NULL;
+
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name),
+ prebuilt->table->name, FALSE);
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Discarding tablespace of table %s: %s",
+ table_name, ut_strerr(err));
+
+ if (trx->dict_operation_lock_mode != RW_X_LATCH) {
+ ut_a(trx->dict_operation_lock_mode == 0);
+ row_mysql_lock_data_dictionary(trx);
+ }
+
+ ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+ /* Since we update the index root page numbers on disk after
+ we've done a successful import. The table will not be loadable.
+ However, we need to ensure that the in memory root page numbers
+ are reset to "NULL". */
+
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != 0;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ index->page = FIL_NULL;
+ index->space = FIL_NULL;
+ }
+
+ table->ibd_file_missing = TRUE;
+
+ fil_close_tablespace(trx, table->space);
+}
+
+/*****************************************************************//**
+Clean up after import tablespace. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_cleanup(
+/*===============*/
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
+ trx_t* trx, /*!< in/out: transaction for import */
+ dberr_t err) /*!< in: error code */
+{
+ ut_a(prebuilt->trx != trx);
+
+ if (err != DB_SUCCESS) {
+ row_import_discard_changes(prebuilt, trx, err);
+ }
+
+ ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+
+ DBUG_EXECUTE_IF("ib_import_before_commit_crash", DBUG_SUICIDE(););
+
+ trx_commit_for_mysql(trx);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx_free_for_mysql(trx);
+
+ prebuilt->trx->op_info = "";
+
+ DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
+
+ log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Report error during tablespace import. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_error(
+/*=============*/
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
+ trx_t* trx, /*!< in/out: transaction for import */
+ dberr_t err) /*!< in: error code */
+{
+ if (!trx_is_interrupted(trx)) {
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name),
+ prebuilt->table->name, FALSE);
+
+ ib_senderrf(
+ trx->mysql_thd, IB_LOG_LEVEL_WARN,
+ ER_INNODB_IMPORT_ERROR,
+ table_name, (ulong) err, ut_strerr(err));
+ }
+
+ return(row_import_cleanup(prebuilt, trx, err));
+}
+
+/*****************************************************************//**
+Adjust the root page index node and leaf node segment headers, update
+with the new space id. For all the table's secondary indexes.
+@return error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_adjust_root_pages_of_secondary_indexes(
+/*==============================================*/
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from
+ handler */
+ trx_t* trx, /*!< in: transaction used for
+ the import */
+ dict_table_t* table, /*!< in: table the indexes
+ belong to */
+ const row_import& cfg) /*!< Import context */
+{
+ dict_index_t* index;
+ ulint n_rows_in_table;
+ dberr_t err = DB_SUCCESS;
+
+ /* Skip the clustered index. */
+ index = dict_table_get_first_index(table);
+
+ n_rows_in_table = cfg.get_n_rows(index->name);
+
+ DBUG_EXECUTE_IF("ib_import_sec_rec_count_mismatch_failure",
+ n_rows_in_table++;);
+
+ /* Adjust the root pages of the secondary indexes only. */
+ while ((index = dict_table_get_next_index(index)) != NULL) {
+ char index_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ index_name, sizeof(index_name), index->name, TRUE);
+
+ ut_a(!dict_index_is_clust(index));
+
+ if (!(index->type & DICT_CORRUPT)
+ && index->space != FIL_NULL
+ && index->page != FIL_NULL) {
+
+ /* Update the Btree segment headers for index node and
+ leaf nodes in the root page. Set the new space id. */
+
+ err = btr_root_adjust_on_import(index);
+ } else {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Skip adjustment of root pages for "
+ "index %s.", index->name);
+
+ err = DB_CORRUPTION;
+ }
+
+ if (err != DB_SUCCESS) {
+
+ if (index->type & DICT_CLUSTERED) {
+ break;
+ }
+
+ ib_errf(trx->mysql_thd,
+ IB_LOG_LEVEL_WARN,
+ ER_INNODB_INDEX_CORRUPT,
+ "Index '%s' not found or corrupt, "
+ "you should recreate this index.",
+ index_name);
+
+ /* Do not bail out, so that the data
+ can be recovered. */
+
+ err = DB_SUCCESS;
+ index->type |= DICT_CORRUPT;
+ continue;
+ }
+
+ /* If we failed to purge any records in the index then
+ do it the hard way.
+
+ TODO: We can do this in the first pass by generating UNDO log
+ records for the failed rows. */
+
+ if (!cfg.requires_purge(index->name)) {
+ continue;
+ }
+
+ IndexPurge purge(trx, index);
+
+ trx->op_info = "secondary: purge delete marked records";
+
+ err = purge.garbage_collect();
+
+ trx->op_info = "";
+
+ if (err != DB_SUCCESS) {
+ break;
+ } else if (purge.get_n_rows() != n_rows_in_table) {
+
+ ib_errf(trx->mysql_thd,
+ IB_LOG_LEVEL_WARN,
+ ER_INNODB_INDEX_CORRUPT,
+ "Index '%s' contains %lu entries, "
+ "should be %lu, you should recreate "
+ "this index.", index_name,
+ (ulong) purge.get_n_rows(),
+ (ulong) n_rows_in_table);
+
+ index->type |= DICT_CORRUPT;
+
+ /* Do not bail out, so that the data
+ can be recovered. */
+
+ err = DB_SUCCESS;
+ }
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Ensure that dict_sys->row_id exceeds SELECT MAX(DB_ROW_ID).
+@return error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_set_sys_max_row_id(
+/*==========================*/
+ row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from
+ handler */
+ const dict_table_t* table) /*!< in: table to import */
+{
+ dberr_t err;
+ const rec_t* rec;
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ row_id_t row_id = 0;
+ dict_index_t* index;
+
+ index = dict_table_get_first_index(table);
+ ut_a(dict_index_is_clust(index));
+
+ mtr_start(&mtr);
+
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ btr_pcur_open_at_index_side(
+ false, // High end
+ index,
+ BTR_SEARCH_LEAF,
+ &pcur,
+ true, // Init cursor
+ 0, // Leaf level
+ &mtr);
+
+ btr_pcur_move_to_prev_on_page(&pcur);
+ rec = btr_pcur_get_rec(&pcur);
+
+ /* Check for empty table. */
+ if (!page_rec_is_infimum(rec)) {
+ ulint len;
+ const byte* field;
+ mem_heap_t* heap = NULL;
+ ulint offsets_[1 + REC_OFFS_HEADER_SIZE];
+ ulint* offsets;
+
+ rec_offs_init(offsets_);
+
+ offsets = rec_get_offsets(
+ rec, index, offsets_, ULINT_UNDEFINED, &heap);
+
+ field = rec_get_nth_field(
+ rec, offsets,
+ dict_index_get_sys_col_pos(index, DATA_ROW_ID),
+ &len);
+
+ if (len == DATA_ROW_ID_LEN) {
+ row_id = mach_read_from_6(field);
+ err = DB_SUCCESS;
+ } else {
+ err = DB_CORRUPTION;
+ }
+
+ if (heap != NULL) {
+ mem_heap_free(heap);
+ }
+ } else {
+ /* The table is empty. */
+ err = DB_SUCCESS;
+ }
+
+ btr_pcur_close(&pcur);
+ mtr_commit(&mtr);
+
+ DBUG_EXECUTE_IF("ib_import_set_max_rowid_failure",
+ err = DB_CORRUPTION;);
+
+ if (err != DB_SUCCESS) {
+ char index_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ index_name, sizeof(index_name), index->name, TRUE);
+
+ ib_errf(prebuilt->trx->mysql_thd,
+ IB_LOG_LEVEL_WARN,
+ ER_INNODB_INDEX_CORRUPT,
+ "Index '%s' corruption detected, invalid DB_ROW_ID "
+ "in index.", index_name);
+
+ return(err);
+
+ } else if (row_id > 0) {
+
+ /* Update the system row id if the imported index row id is
+ greater than the max system row id. */
+
+ mutex_enter(&dict_sys->mutex);
+
+ if (row_id >= dict_sys->row_id) {
+ dict_sys->row_id = row_id + 1;
+ dict_hdr_flush_row_id();
+ }
+
+ mutex_exit(&dict_sys->mutex);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Read the a string from the meta data file.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_import_cfg_read_string(
+/*=======================*/
+ FILE* file, /*!< in/out: File to read from */
+ byte* ptr, /*!< out: string to read */
+ ulint max_len) /*!< in: maximum length of the output
+ buffer in bytes */
+{
+ DBUG_EXECUTE_IF("ib_import_string_read_error",
+ errno = EINVAL; return(DB_IO_ERROR););
+
+ ulint len = 0;
+
+ while (!feof(file)) {
+ int ch = fgetc(file);
+
+ if (ch == EOF) {
+ break;
+ } else if (ch != 0) {
+ if (len < max_len) {
+ ptr[len++] = ch;
+ } else {
+ break;
+ }
+ /* max_len includes the NUL byte */
+ } else if (len != max_len - 1) {
+ break;
+ } else {
+ ptr[len] = 0;
+ return(DB_SUCCESS);
+ }
+ }
+
+ errno = EINVAL;
+
+ return(DB_IO_ERROR);
+}
+
+/*********************************************************************//**
+Write the meta data (index user fields) config file.
+@return DB_SUCCESS or error code. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_cfg_read_index_fields(
+/*=============================*/
+ FILE* file, /*!< in: file to write to */
+ THD* thd, /*!< in/out: session */
+ row_index_t* index, /*!< Index being read in */
+ row_import* cfg) /*!< in/out: meta-data read */
+{
+ byte row[sizeof(ib_uint32_t) * 3];
+ ulint n_fields = index->m_n_fields;
+
+ index->m_fields = new(std::nothrow) dict_field_t[n_fields];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_4",
+ delete [] index->m_fields; index->m_fields = 0;);
+
+ if (index->m_fields == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ dict_field_t* field = index->m_fields;
+
+ memset(field, 0x0, sizeof(*field) * n_fields);
+
+ for (ulint i = 0; i < n_fields; ++i, ++field) {
+ byte* ptr = row;
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_1",
+ (void) fseek(file, 0L, SEEK_END););
+
+ if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while reading index fields.");
+
+ return(DB_IO_ERROR);
+ }
+
+ field->prefix_len = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ field->fixed_len = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ /* Include the NUL byte in the length. */
+ ulint len = mach_read_from_4(ptr);
+
+ byte* name = new(std::nothrow) byte[len];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_5", delete [] name; name = 0;);
+
+ if (name == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ field->name = reinterpret_cast<const char*>(name);
+
+ dberr_t err = row_import_cfg_read_string(file, name, len);
+
+ if (err != DB_SUCCESS) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while parsing table name.");
+
+ return(err);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Read the index names and root page numbers of the indexes and set the values.
+Row format [root_page_no, len of str, str ... ]
+@return DB_SUCCESS or error code. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_read_index_data(
+/*=======================*/
+ FILE* file, /*!< in: File to read from */
+ THD* thd, /*!< in: session */
+ row_import* cfg) /*!< in/out: meta-data read */
+{
+ byte* ptr;
+ row_index_t* cfg_index;
+ byte row[sizeof(index_id_t) + sizeof(ib_uint32_t) * 9];
+
+ /* FIXME: What is the max value? */
+ ut_a(cfg->m_n_indexes > 0);
+ ut_a(cfg->m_n_indexes < 1024);
+
+ cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_6",
+ delete [] cfg->m_indexes; cfg->m_indexes = 0;);
+
+ if (cfg->m_indexes == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes);
+
+ cfg_index = cfg->m_indexes;
+
+ for (ulint i = 0; i < cfg->m_n_indexes; ++i, ++cfg_index) {
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_2",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the index data. */
+ size_t n_bytes = fread(row, 1, sizeof(row), file);
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error",
+ (void) fseek(file, 0L, SEEK_END););
+
+ if (n_bytes != sizeof(row)) {
+ char msg[BUFSIZ];
+
+ ut_snprintf(msg, sizeof(msg),
+ "while reading index meta-data, expected "
+ "to read %lu bytes but read only %lu "
+ "bytes",
+ (ulong) sizeof(row), (ulong) n_bytes);
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno), msg);
+
+ ib_logf(IB_LOG_LEVEL_ERROR, "IO Error: %s", msg);
+
+ return(DB_IO_ERROR);
+ }
+
+ ptr = row;
+
+ cfg_index->m_id = mach_read_from_8(ptr);
+ ptr += sizeof(index_id_t);
+
+ cfg_index->m_space = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_page_no = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_type = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_trx_id_offset = mach_read_from_4(ptr);
+ if (cfg_index->m_trx_id_offset != mach_read_from_4(ptr)) {
+ ut_ad(0);
+ /* Overflow. Pretend that the clustered index
+ has a variable-length PRIMARY KEY. */
+ cfg_index->m_trx_id_offset = 0;
+ }
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_n_user_defined_cols = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_n_uniq = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_n_nullable = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg_index->m_n_fields = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ /* The NUL byte is included in the name length. */
+ ulint len = mach_read_from_4(ptr);
+
+ if (len > OS_FILE_MAX_PATH) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_INNODB_INDEX_CORRUPT,
+ "Index name length (%lu) is too long, "
+ "the meta-data is corrupt", len);
+
+ return(DB_CORRUPTION);
+ }
+
+ cfg_index->m_name = new(std::nothrow) byte[len];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_7",
+ delete [] cfg_index->m_name;
+ cfg_index->m_name = 0;);
+
+ if (cfg_index->m_name == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ dberr_t err;
+
+ err = row_import_cfg_read_string(file, cfg_index->m_name, len);
+
+ if (err != DB_SUCCESS) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while parsing index name.");
+
+ return(err);
+ }
+
+ err = row_import_cfg_read_index_fields(
+ file, thd, cfg_index, cfg);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Set the index root page number for v1 format.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_import_read_indexes(
+/*====================*/
+ FILE* file, /*!< in: File to read from */
+ THD* thd, /*!< in: session */
+ row_import* cfg) /*!< in/out: meta-data read */
+{
+ byte row[sizeof(ib_uint32_t)];
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_3",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the number of indexes. */
+ if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while reading number of indexes.");
+
+ return(DB_IO_ERROR);
+ }
+
+ cfg->m_n_indexes = mach_read_from_4(row);
+
+ if (cfg->m_n_indexes == 0) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ "Number of indexes in meta-data file is 0");
+
+ return(DB_CORRUPTION);
+
+ } else if (cfg->m_n_indexes > 1024) {
+ // FIXME: What is the upper limit? */
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ "Number of indexes in meta-data file is too high: %lu",
+ (ulong) cfg->m_n_indexes);
+ cfg->m_n_indexes = 0;
+
+ return(DB_CORRUPTION);
+ }
+
+ return(row_import_read_index_data(file, thd, cfg));
+}
+
+/*********************************************************************//**
+Read the meta data (table columns) config file. Deserialise the contents of
+dict_col_t structure, along with the column name. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_read_columns(
+/*====================*/
+ FILE* file, /*!< in: file to write to */
+ THD* thd, /*!< in/out: session */
+ row_import* cfg) /*!< in/out: meta-data read */
+{
+ dict_col_t* col;
+ byte row[sizeof(ib_uint32_t) * 8];
+
+ /* FIXME: What should the upper limit be? */
+ ut_a(cfg->m_n_cols > 0);
+ ut_a(cfg->m_n_cols < 1024);
+
+ cfg->m_cols = new(std::nothrow) dict_col_t[cfg->m_n_cols];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_8",
+ delete [] cfg->m_cols; cfg->m_cols = 0;);
+
+ if (cfg->m_cols == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ cfg->m_col_names = new(std::nothrow) byte* [cfg->m_n_cols];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_9",
+ delete [] cfg->m_col_names; cfg->m_col_names = 0;);
+
+ if (cfg->m_col_names == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ memset(cfg->m_cols, 0x0, sizeof(cfg->m_cols) * cfg->m_n_cols);
+ memset(cfg->m_col_names, 0x0, sizeof(cfg->m_col_names) * cfg->m_n_cols);
+
+ col = cfg->m_cols;
+
+ for (ulint i = 0; i < cfg->m_n_cols; ++i, ++col) {
+ byte* ptr = row;
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_4",
+ (void) fseek(file, 0L, SEEK_END););
+
+ if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while reading table column meta-data.");
+
+ return(DB_IO_ERROR);
+ }
+
+ col->prtype = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ col->mtype = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ col->len = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ col->mbminmaxlen = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ col->ind = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ col->ord_part = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ col->max_prefix = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ /* Read in the column name as [len, byte array]. The len
+ includes the NUL byte. */
+
+ ulint len = mach_read_from_4(ptr);
+
+ /* FIXME: What is the maximum column name length? */
+ if (len == 0 || len > 128) {
+ ib_errf(thd, IB_LOG_LEVEL_ERROR,
+ ER_IO_READ_ERROR,
+ "Column name length %lu, is invalid",
+ (ulong) len);
+
+ return(DB_CORRUPTION);
+ }
+
+ cfg->m_col_names[i] = new(std::nothrow) byte[len];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_10",
+ delete [] cfg->m_col_names[i];
+ cfg->m_col_names[i] = 0;);
+
+ if (cfg->m_col_names[i] == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ dberr_t err;
+
+ err = row_import_cfg_read_string(
+ file, cfg->m_col_names[i], len);
+
+ if (err != DB_SUCCESS) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while parsing table column name.");
+
+ return(err);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*****************************************************************//**
+Read the contents of the <tablespace>.cfg file.
+@return DB_SUCCESS or error code. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_read_v1(
+/*===============*/
+ FILE* file, /*!< in: File to read from */
+ THD* thd, /*!< in: session */
+ row_import* cfg) /*!< out: meta data */
+{
+ byte value[sizeof(ib_uint32_t)];
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_5",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the hostname where the tablespace was exported. */
+ if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while reading meta-data export hostname length.");
+
+ return(DB_IO_ERROR);
+ }
+
+ ulint len = mach_read_from_4(value);
+
+ /* NUL byte is part of name length. */
+ cfg->m_hostname = new(std::nothrow) byte[len];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_1",
+ delete [] cfg->m_hostname; cfg->m_hostname = 0;);
+
+ if (cfg->m_hostname == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ dberr_t err = row_import_cfg_read_string(file, cfg->m_hostname, len);
+
+ if (err != DB_SUCCESS) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while parsing export hostname.");
+
+ return(err);
+ }
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_6",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the table name of tablespace that was exported. */
+ if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while reading meta-data table name length.");
+
+ return(DB_IO_ERROR);
+ }
+
+ len = mach_read_from_4(value);
+
+ /* NUL byte is part of name length. */
+ cfg->m_table_name = new(std::nothrow) byte[len];
+
+ /* Trigger OOM */
+ DBUG_EXECUTE_IF("ib_import_OOM_2",
+ delete [] cfg->m_table_name; cfg->m_table_name = 0;);
+
+ if (cfg->m_table_name == 0) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ err = row_import_cfg_read_string(file, cfg->m_table_name, len);
+
+ if (err != DB_SUCCESS) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while parsing table name.");
+
+ return(err);
+ }
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Importing tablespace for table '%s' that was exported "
+ "from host '%s'", cfg->m_table_name, cfg->m_hostname);
+
+ byte row[sizeof(ib_uint32_t) * 3];
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_7",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the autoinc value. */
+ if (fread(row, 1, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while reading autoinc value.");
+
+ return(DB_IO_ERROR);
+ }
+
+ cfg->m_autoinc = mach_read_from_8(row);
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_8",
+ (void) fseek(file, 0L, SEEK_END););
+
+ /* Read the tablespace page size. */
+ if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while reading meta-data header.");
+
+ return(DB_IO_ERROR);
+ }
+
+ byte* ptr = row;
+
+ cfg->m_page_size = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ if (cfg->m_page_size != UNIV_PAGE_SIZE) {
+
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
+ "Tablespace to be imported has a different "
+ "page size than this server. Server page size "
+ "is %lu, whereas tablespace page size is %lu",
+ UNIV_PAGE_SIZE, (ulong) cfg->m_page_size);
+
+ return(DB_ERROR);
+ }
+
+ cfg->m_flags = mach_read_from_4(ptr);
+ ptr += sizeof(ib_uint32_t);
+
+ cfg->m_n_cols = mach_read_from_4(ptr);
+
+ if (!dict_tf_is_valid(cfg->m_flags)) {
+
+ return(DB_CORRUPTION);
+
+ } else if ((err = row_import_read_columns(file, thd, cfg))
+ != DB_SUCCESS) {
+
+ return(err);
+
+ } else if ((err = row_import_read_indexes(file, thd, cfg))
+ != DB_SUCCESS) {
+
+ return(err);
+ }
+
+ ut_a(err == DB_SUCCESS);
+ return(err);
+}
+
+/**
+Read the contents of the <tablespace>.cfg file.
+@return DB_SUCCESS or error code. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_read_meta_data(
+/*======================*/
+ dict_table_t* table, /*!< in: table */
+ FILE* file, /*!< in: File to read from */
+ THD* thd, /*!< in: session */
+ row_import& cfg) /*!< out: contents of the .cfg file */
+{
+ byte row[sizeof(ib_uint32_t)];
+
+ /* Trigger EOF */
+ DBUG_EXECUTE_IF("ib_import_io_read_error_9",
+ (void) fseek(file, 0L, SEEK_END););
+
+ if (fread(&row, 1, sizeof(row), file) != sizeof(row)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ errno, strerror(errno),
+ "while reading meta-data version.");
+
+ return(DB_IO_ERROR);
+ }
+
+ cfg.m_version = mach_read_from_4(row);
+
+ /* Check the version number. */
+ switch (cfg.m_version) {
+ case IB_EXPORT_CFG_VERSION_V1:
+
+ return(row_import_read_v1(file, thd, &cfg));
+ default:
+ ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
+ "Unsupported meta-data version number (%lu), "
+ "file ignored", (ulong) cfg.m_version);
+ }
+
+ return(DB_ERROR);
+}
+
+/**
+Read the contents of the <tablename>.cfg file.
+@return DB_SUCCESS or error code. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_import_read_cfg(
+/*================*/
+ dict_table_t* table, /*!< in: table */
+ THD* thd, /*!< in: session */
+ row_import& cfg) /*!< out: contents of the .cfg file */
+{
+ dberr_t err;
+ char name[OS_FILE_MAX_PATH];
+
+ cfg.m_table = table;
+
+ srv_get_meta_data_filename(table, name, sizeof(name));
+
+ FILE* file = fopen(name, "rb");
+
+ if (file == NULL) {
+ char msg[BUFSIZ];
+
+ ut_snprintf(msg, sizeof(msg),
+ "Error opening '%s', will attempt to import "
+ "without schema verification", name);
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_READ_ERROR,
+ errno, strerror(errno), msg);
+
+ cfg.m_missing = true;
+
+ err = DB_FAIL;
+ } else {
+
+ cfg.m_missing = false;
+
+ err = row_import_read_meta_data(table, file, thd, cfg);
+ fclose(file);
+ }
+
+ return(err);
+}
+
+/*****************************************************************//**
+Update the <space, root page> of a table's indexes from the values
+in the data dictionary.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_import_update_index_root(
+/*=========================*/
+ trx_t* trx, /*!< in/out: transaction that
+ covers the update */
+ const dict_table_t* table, /*!< in: Table for which we want
+ to set the root page_no */
+ bool reset, /*!< in: if true then set to
+ FIL_NUL */
+ bool dict_locked) /*!< in: Set to true if the
+ caller already owns the
+ dict_sys_t:: mutex. */
+
+{
+ const dict_index_t* index;
+ que_t* graph = 0;
+ dberr_t err = DB_SUCCESS;
+
+ static const char sql[] = {
+ "PROCEDURE UPDATE_INDEX_ROOT() IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_INDEXES\n"
+ "SET SPACE = :space,\n"
+ " PAGE_NO = :page,\n"
+ " TYPE = :type\n"
+ "WHERE TABLE_ID = :table_id AND ID = :index_id;\n"
+ "END;\n"};
+
+ if (!dict_locked) {
+ mutex_enter(&dict_sys->mutex);
+ }
+
+ for (index = dict_table_get_first_index(table);
+ index != 0;
+ index = dict_table_get_next_index(index)) {
+
+ pars_info_t* info;
+ ib_uint32_t page;
+ ib_uint32_t space;
+ ib_uint32_t type;
+ index_id_t index_id;
+ table_id_t table_id;
+
+ info = (graph != 0) ? graph->info : pars_info_create();
+
+ mach_write_to_4(
+ reinterpret_cast<byte*>(&type),
+ index->type);
+
+ mach_write_to_4(
+ reinterpret_cast<byte*>(&page),
+ reset ? FIL_NULL : index->page);
+
+ mach_write_to_4(
+ reinterpret_cast<byte*>(&space),
+ reset ? FIL_NULL : index->space);
+
+ mach_write_to_8(
+ reinterpret_cast<byte*>(&index_id),
+ index->id);
+
+ mach_write_to_8(
+ reinterpret_cast<byte*>(&table_id),
+ table->id);
+
+ /* If we set the corrupt bit during the IMPORT phase then
+ we need to update the system tables. */
+ pars_info_bind_int4_literal(info, "type", &type);
+ pars_info_bind_int4_literal(info, "space", &space);
+ pars_info_bind_int4_literal(info, "page", &page);
+ pars_info_bind_ull_literal(info, "index_id", &index_id);
+ pars_info_bind_ull_literal(info, "table_id", &table_id);
+
+ if (graph == 0) {
+ graph = pars_sql(info, sql);
+ ut_a(graph);
+ graph->trx = trx;
+ }
+
+ que_thr_t* thr;
+
+ graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
+
+ ut_a(thr = que_fork_start_command(graph));
+
+ que_run_threads(thr);
+
+ DBUG_EXECUTE_IF("ib_import_internal_error",
+ trx->error_state = DB_ERROR;);
+
+ err = trx->error_state;
+
+ if (err != DB_SUCCESS) {
+ char index_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ index_name, sizeof(index_name),
+ index->name, TRUE);
+
+ ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_INTERNAL_ERROR,
+ "While updating the <space, root page "
+ "number> of index %s - %s",
+ index_name, ut_strerr(err));
+
+ break;
+ }
+ }
+
+ que_graph_free(graph);
+
+ if (!dict_locked) {
+ mutex_exit(&dict_sys->mutex);
+ }
+
+ return(err);
+}
+
+/** Callback arg for row_import_set_discarded. */
+struct discard_t {
+ ib_uint32_t flags2; /*!< Value read from column */
+ bool state; /*!< New state of the flag */
+ ulint n_recs; /*!< Number of recs processed */
+};
+
+/******************************************************************//**
+Fetch callback that sets or unsets the DISCARDED tablespace flag in
+SYS_TABLES. The flags is stored in MIX_LEN column.
+@return FALSE if all OK */
+static
+ibool
+row_import_set_discarded(
+/*=====================*/
+ void* row, /*!< in: sel_node_t* */
+ void* user_arg) /*!< in: bool set/unset flag */
+{
+ sel_node_t* node = static_cast<sel_node_t*>(row);
+ discard_t* discard = static_cast<discard_t*>(user_arg);
+ dfield_t* dfield = que_node_get_val(node->select_list);
+ dtype_t* type = dfield_get_type(dfield);
+ ulint len = dfield_get_len(dfield);
+
+ ut_a(dtype_get_mtype(type) == DATA_INT);
+ ut_a(len == sizeof(ib_uint32_t));
+
+ ulint flags2 = mach_read_from_4(
+ static_cast<byte*>(dfield_get_data(dfield)));
+
+ if (discard->state) {
+ flags2 |= DICT_TF2_DISCARDED;
+ } else {
+ flags2 &= ~DICT_TF2_DISCARDED;
+ }
+
+ mach_write_to_4(reinterpret_cast<byte*>(&discard->flags2), flags2);
+
+ ++discard->n_recs;
+
+ /* There should be at most one matching record. */
+ ut_a(discard->n_recs == 1);
+
+ return(FALSE);
+}
+
+/*****************************************************************//**
+Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
+@return DB_SUCCESS or error code. */
+UNIV_INTERN
+dberr_t
+row_import_update_discarded_flag(
+/*=============================*/
+ trx_t* trx, /*!< in/out: transaction that
+ covers the update */
+ table_id_t table_id, /*!< in: Table for which we want
+ to set the root table->flags2 */
+ bool discarded, /*!< in: set MIX_LEN column bit
+ to discarded, if true */
+ bool dict_locked) /*!< in: set to true if the
+ caller already owns the
+ dict_sys_t:: mutex. */
+
+{
+ pars_info_t* info;
+ discard_t discard;
+
+ static const char sql[] =
+ "PROCEDURE UPDATE_DISCARDED_FLAG() IS\n"
+ "DECLARE FUNCTION my_func;\n"
+ "DECLARE CURSOR c IS\n"
+ " SELECT MIX_LEN "
+ " FROM SYS_TABLES "
+ " WHERE ID = :table_id FOR UPDATE;"
+ "\n"
+ "BEGIN\n"
+ "OPEN c;\n"
+ "WHILE 1 = 1 LOOP\n"
+ " FETCH c INTO my_func();\n"
+ " IF c % NOTFOUND THEN\n"
+ " EXIT;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "UPDATE SYS_TABLES"
+ " SET MIX_LEN = :flags2"
+ " WHERE ID = :table_id;\n"
+ "CLOSE c;\n"
+ "END;\n";
+
+ discard.n_recs = 0;
+ discard.state = discarded;
+ discard.flags2 = ULINT32_UNDEFINED;
+
+ info = pars_info_create();
+
+ pars_info_add_ull_literal(info, "table_id", table_id);
+ pars_info_bind_int4_literal(info, "flags2", &discard.flags2);
+
+ pars_info_bind_function(
+ info, "my_func", row_import_set_discarded, &discard);
+
+ dberr_t err = que_eval_sql(info, sql, !dict_locked, trx);
+
+ ut_a(discard.n_recs == 1);
+ ut_a(discard.flags2 != ULINT32_UNDEFINED);
+
+ return(err);
+}
+
+/*****************************************************************//**
+Imports a tablespace. The space id in the .ibd file must match the space id
+of the table in the data dictionary.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_import_for_mysql(
+/*=================*/
+ dict_table_t* table, /*!< in/out: table */
+ row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL */
+{
+ dberr_t err;
+ trx_t* trx;
+ ib_uint64_t autoinc = 0;
+ char table_name[MAX_FULL_NAME_LEN + 1];
+ char* filepath = NULL;
+
+ ut_ad(!srv_read_only_mode);
+
+ innobase_format_name(
+ table_name, sizeof(table_name), table->name, FALSE);
+
+ ut_a(table->space);
+ ut_ad(prebuilt->trx);
+ ut_a(table->ibd_file_missing);
+
+ trx_start_if_not_started(prebuilt->trx);
+
+ trx = trx_allocate_for_mysql();
+
+ /* So that the table is not DROPped during recovery. */
+ trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
+
+ trx_start_if_not_started(trx);
+
+ /* So that we can send error messages to the user. */
+ trx->mysql_thd = prebuilt->trx->mysql_thd;
+
+ /* Ensure that the table will be dropped by trx_rollback_active()
+ in case of a crash. */
+
+ trx->table_id = table->id;
+
+ /* Assign an undo segment for the transaction, so that the
+ transaction will be recovered after a crash. */
+
+ mutex_enter(&trx->undo_mutex);
+
+ err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
+
+ mutex_exit(&trx->undo_mutex);
+
+ DBUG_EXECUTE_IF("ib_import_undo_assign_failure",
+ err = DB_TOO_MANY_CONCURRENT_TRXS;);
+
+ if (err != DB_SUCCESS) {
+
+ return(row_import_cleanup(prebuilt, trx, err));
+
+ } else if (trx->update_undo == 0) {
+
+ err = DB_TOO_MANY_CONCURRENT_TRXS;
+ return(row_import_cleanup(prebuilt, trx, err));
+ }
+
+ prebuilt->trx->op_info = "read meta-data file";
+
+ /* Prevent DDL operations while we are checking. */
+
+ rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__);
+
+ row_import cfg;
+
+ memset(&cfg, 0x0, sizeof(cfg));
+
+ err = row_import_read_cfg(table, trx->mysql_thd, cfg);
+
+ /* Check if the table column definitions match the contents
+ of the config file. */
+
+ if (err == DB_SUCCESS) {
+
+ /* We have a schema file, try and match it with the our
+ data dictionary. */
+
+ err = cfg.match_schema(trx->mysql_thd);
+
+ /* Update index->page and SYS_INDEXES.PAGE_NO to match the
+ B-tree root page numbers in the tablespace. Use the index
+ name from the .cfg file to find match. */
+
+ if (err == DB_SUCCESS) {
+ cfg.set_root_by_name();
+ autoinc = cfg.m_autoinc;
+ }
+
+ rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+
+ DBUG_EXECUTE_IF("ib_import_set_index_root_failure",
+ err = DB_TOO_MANY_CONCURRENT_TRXS;);
+
+ } else if (cfg.m_missing) {
+
+ rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+
+ /* We don't have a schema file, we will have to discover
+ the index root pages from the .ibd file and skip the schema
+ matching step. */
+
+ ut_a(err == DB_FAIL);
+
+ cfg.m_page_size = UNIV_PAGE_SIZE;
+
+ FetchIndexRootPages fetchIndexRootPages(table, trx);
+
+ err = fil_tablespace_iterate(
+ table, IO_BUFFER_SIZE(cfg.m_page_size),
+ fetchIndexRootPages);
+
+ if (err == DB_SUCCESS) {
+
+ err = fetchIndexRootPages.build_row_import(&cfg);
+
+ /* Update index->page and SYS_INDEXES.PAGE_NO
+ to match the B-tree root page numbers in the
+ tablespace. */
+
+ if (err == DB_SUCCESS) {
+ err = cfg.set_root_by_heuristic();
+ }
+ }
+
+ } else {
+ rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+ }
+
+ if (err != DB_SUCCESS) {
+ return(row_import_error(prebuilt, trx, err));
+ }
+
+ prebuilt->trx->op_info = "importing tablespace";
+
+ ib_logf(IB_LOG_LEVEL_INFO, "Phase I - Update all pages");
+
+ /* Iterate over all the pages and do the sanity checking and
+ the conversion required to import the tablespace. */
+
+ PageConverter converter(&cfg, trx);
+
+ /* Set the IO buffer size in pages. */
+
+ err = fil_tablespace_iterate(
+ table, IO_BUFFER_SIZE(cfg.m_page_size), converter);
+
+ DBUG_EXECUTE_IF("ib_import_reset_space_and_lsn_failure",
+ err = DB_TOO_MANY_CONCURRENT_TRXS;);
+
+ if (err != DB_SUCCESS) {
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name), table->name, FALSE);
+
+ ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_INTERNAL_ERROR,
+ "Cannot reset LSNs in table '%s' : %s",
+ table_name, ut_strerr(err));
+
+ return(row_import_cleanup(prebuilt, trx, err));
+ }
+
+ row_mysql_lock_data_dictionary(trx);
+
+ /* If the table is stored in a remote tablespace, we need to
+ determine that filepath from the link file and system tables.
+ Find the space ID in SYS_TABLES since this is an ALTER TABLE. */
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ dict_get_and_save_data_dir_path(table, true);
+ ut_a(table->data_dir_path);
+
+ filepath = os_file_make_remote_pathname(
+ table->data_dir_path, table->name, "ibd");
+ } else {
+ filepath = fil_make_ibd_name(table->name, false);
+ }
+ ut_a(filepath);
+
+ /* Open the tablespace so that we can access via the buffer pool.
+ We set the 2nd param (fix_dict = true) here because we already
+ have an x-lock on dict_operation_lock and dict_sys->mutex. */
+
+ err = fil_open_single_table_tablespace(
+ true, true, table->space,
+ dict_tf_to_fsp_flags(table->flags),
+ table->name, filepath);
+
+ DBUG_EXECUTE_IF("ib_import_open_tablespace_failure",
+ err = DB_TABLESPACE_NOT_FOUND;);
+
+ if (err != DB_SUCCESS) {
+ row_mysql_unlock_data_dictionary(trx);
+
+ ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_FILE_NOT_FOUND,
+ filepath, err, ut_strerr(err));
+
+ mem_free(filepath);
+
+ return(row_import_cleanup(prebuilt, trx, err));
+ }
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ mem_free(filepath);
+
+ err = ibuf_check_bitmap_on_import(trx, table->space);
+
+ DBUG_EXECUTE_IF("ib_import_check_bitmap_failure", err = DB_CORRUPTION;);
+
+ if (err != DB_SUCCESS) {
+ return(row_import_cleanup(prebuilt, trx, err));
+ }
+
+ /* The first index must always be the clustered index. */
+
+ dict_index_t* index = dict_table_get_first_index(table);
+
+ if (!dict_index_is_clust(index)) {
+ return(row_import_error(prebuilt, trx, DB_CORRUPTION));
+ }
+
+ /* Update the Btree segment headers for index node and
+ leaf nodes in the root page. Set the new space id. */
+
+ err = btr_root_adjust_on_import(index);
+
+ DBUG_EXECUTE_IF("ib_import_cluster_root_adjust_failure",
+ err = DB_CORRUPTION;);
+
+ if (err != DB_SUCCESS) {
+ return(row_import_error(prebuilt, trx, err));
+ }
+
+ if (err != DB_SUCCESS) {
+ return(row_import_error(prebuilt, trx, err));
+ } else if (cfg.requires_purge(index->name)) {
+
+ /* Purge any delete-marked records that couldn't be
+ purged during the page conversion phase from the
+ cluster index. */
+
+ IndexPurge purge(trx, index);
+
+ trx->op_info = "cluster: purging delete marked records";
+
+ err = purge.garbage_collect();
+
+ trx->op_info = "";
+ }
+
+ DBUG_EXECUTE_IF("ib_import_cluster_failure", err = DB_CORRUPTION;);
+
+ if (err != DB_SUCCESS) {
+ return(row_import_error(prebuilt, trx, err));
+ }
+
+ /* For secondary indexes, purge any records that couldn't be purged
+ during the page conversion phase. */
+
+ err = row_import_adjust_root_pages_of_secondary_indexes(
+ prebuilt, trx, table, cfg);
+
+ DBUG_EXECUTE_IF("ib_import_sec_root_adjust_failure",
+ err = DB_CORRUPTION;);
+
+ if (err != DB_SUCCESS) {
+ return(row_import_error(prebuilt, trx, err));
+ }
+
+ /* Ensure that the next available DB_ROW_ID is not smaller than
+ any DB_ROW_ID stored in the table. */
+
+ if (prebuilt->clust_index_was_generated) {
+
+ err = row_import_set_sys_max_row_id(prebuilt, table);
+
+ if (err != DB_SUCCESS) {
+ return(row_import_error(prebuilt, trx, err));
+ }
+ }
+
+ ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush changes to disk");
+
+ /* Ensure that all pages dirtied during the IMPORT make it to disk.
+ The only dirty pages generated should be from the pessimistic purge
+ of delete marked records that couldn't be purged in Phase I. */
+
+ buf_LRU_flush_or_remove_pages(
+ prebuilt->table->space, BUF_REMOVE_FLUSH_WRITE, trx);
+
+ if (trx_is_interrupted(trx)) {
+ ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush interrupted");
+ return(row_import_error(prebuilt, trx, DB_INTERRUPTED));
+ } else {
+ ib_logf(IB_LOG_LEVEL_INFO, "Phase IV - Flush complete");
+ }
+
+ /* The dictionary latches will be released in in row_import_cleanup()
+ after the transaction commit, for both success and error. */
+
+ row_mysql_lock_data_dictionary(trx);
+
+ /* Update the root pages of the table's indexes. */
+ err = row_import_update_index_root(trx, table, false, true);
+
+ if (err != DB_SUCCESS) {
+ return(row_import_error(prebuilt, trx, err));
+ }
+
+ /* Update the table's discarded flag, unset it. */
+ err = row_import_update_discarded_flag(trx, table->id, false, true);
+
+ if (err != DB_SUCCESS) {
+ return(row_import_error(prebuilt, trx, err));
+ }
+
+ table->ibd_file_missing = false;
+ table->flags2 &= ~DICT_TF2_DISCARDED;
+
+ if (autoinc != 0) {
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name), table->name, FALSE);
+
+ ib_logf(IB_LOG_LEVEL_INFO, "%s autoinc value set to " IB_ID_FMT,
+ table_name, autoinc);
+
+ dict_table_autoinc_lock(table);
+ dict_table_autoinc_initialize(table, autoinc);
+ dict_table_autoinc_unlock(table);
+ }
+
+ ut_a(err == DB_SUCCESS);
+
+ return(row_import_cleanup(prebuilt, trx, err));
+}
+
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index e79518e24de..c1c27152831 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -23,11 +23,8 @@ Insert into a table
Created 4/20/1996 Heikki Tuuri
*******************************************************/
-#include "m_string.h" /* for my_sys.h */
#include "row0ins.h"
-#define DEBUG_SYNC_C_IF_THD(A,B) DEBUG_SYNC(A,B)
-
#ifdef UNIV_NONINL
#include "row0ins.ic"
#endif
@@ -35,6 +32,7 @@ Created 4/20/1996 Heikki Tuuri
#include "ha_prototypes.h"
#include "dict0dict.h"
#include "dict0boot.h"
+#include "trx0rec.h"
#include "trx0undo.h"
#include "btr0btr.h"
#include "btr0cur.h"
@@ -43,6 +41,7 @@ Created 4/20/1996 Heikki Tuuri
#include "row0upd.h"
#include "row0sel.h"
#include "row0row.h"
+#include "row0log.h"
#include "rem0cmp.h"
#include "lock0lock.h"
#include "log0log.h"
@@ -52,6 +51,7 @@ Created 4/20/1996 Heikki Tuuri
#include "buf0lru.h"
#include "fts0fts.h"
#include "fts0types.h"
+#include "m_string.h"
/*************************************************************************
IMPORTANT NOTE: Any operation that generates redo MUST check that there
@@ -101,7 +101,7 @@ ins_node_create(
/***********************************************************//**
Creates an entry template for each index of a table. */
-UNIV_INTERN
+static
void
ins_node_create_entry_list(
/*=======================*/
@@ -222,68 +222,92 @@ Does an insert operation by updating a delete-marked existing record
in the index. This situation can occur if the delete-marked record is
kept in the index for consistent reads.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_ins_sec_index_entry_by_modify(
/*==============================*/
+ ulint flags, /*!< in: undo logging and locking flags */
ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether mtr holds just a leaf
latch or also a tree latch */
btr_cur_t* cursor, /*!< in: B-tree cursor */
+ ulint** offsets,/*!< in/out: offsets on cursor->page_cur.rec */
+ mem_heap_t* offsets_heap,
+ /*!< in/out: memory heap that can be emptied */
+ mem_heap_t* heap, /*!< in/out: memory heap */
const dtuple_t* entry, /*!< in: index entry to insert */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
{
big_rec_t* dummy_big_rec;
- mem_heap_t* heap;
upd_t* update;
rec_t* rec;
- ulint err;
+ dberr_t err;
rec = btr_cur_get_rec(cursor);
ut_ad(!dict_index_is_clust(cursor->index));
- ut_ad(rec_get_deleted_flag(rec,
- dict_table_is_comp(cursor->index->table)));
+ ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
+ ut_ad(!entry->info_bits);
/* We know that in the alphabetical ordering, entry and rec are
identified. But in their binary form there may be differences if
there are char fields in them. Therefore we have to calculate the
difference. */
- heap = mem_heap_create(1024);
-
update = row_upd_build_sec_rec_difference_binary(
- cursor->index, entry, rec, thr_get_trx(thr), heap);
+ rec, cursor->index, *offsets, entry, heap);
+
+ if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
+ /* We should never insert in place of a record that
+ has not been delete-marked. The only exception is when
+ online CREATE INDEX copied the changes that we already
+ made to the clustered index, and completed the
+ secondary index creation before we got here. In this
+ case, the change would already be there. The CREATE
+ INDEX should be waiting for a MySQL meta-data lock
+ upgrade at least until this INSERT or UPDATE
+ returns. After that point, the TEMP_INDEX_PREFIX
+ would be dropped from the index name in
+ commit_inplace_alter_table(). */
+ ut_a(update->n_fields == 0);
+ ut_a(*cursor->index->name == TEMP_INDEX_PREFIX);
+ ut_ad(!dict_index_is_online_ddl(cursor->index));
+ return(DB_SUCCESS);
+ }
+
if (mode == BTR_MODIFY_LEAF) {
/* Try an optimistic updating of the record, keeping changes
within the page */
- err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
- update, 0, thr, mtr);
+ /* TODO: pass only *offsets */
+ err = btr_cur_optimistic_update(
+ flags | BTR_KEEP_SYS_FLAG, cursor,
+ offsets, &offsets_heap, update, 0, thr,
+ thr_get_trx(thr)->id, mtr);
switch (err) {
case DB_OVERFLOW:
case DB_UNDERFLOW:
case DB_ZIP_OVERFLOW:
err = DB_FAIL;
+ default:
+ break;
}
} else {
ut_a(mode == BTR_MODIFY_TREE);
if (buf_LRU_buf_pool_running_out()) {
- err = DB_LOCK_TABLE_FULL;
-
- goto func_exit;
+ return(DB_LOCK_TABLE_FULL);
}
- err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor,
- &heap, &dummy_big_rec, update,
- 0, thr, mtr);
+ err = btr_cur_pessimistic_update(
+ flags | BTR_KEEP_SYS_FLAG, cursor,
+ offsets, &offsets_heap,
+ heap, &dummy_big_rec, update, 0,
+ thr, thr_get_trx(thr)->id, mtr);
ut_ad(!dummy_big_rec);
}
-func_exit:
- mem_heap_free(heap);
return(err);
}
@@ -293,15 +317,20 @@ Does an insert operation by delete unmarking and updating a delete marked
existing record in the index. This situation can occur if the delete marked
record is kept in the index for consistent reads.
@return DB_SUCCESS, DB_FAIL, or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_ins_clust_index_entry_by_modify(
/*================================*/
+ ulint flags, /*!< in: undo logging and locking flags */
ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether mtr holds just a leaf
latch or also a tree latch */
btr_cur_t* cursor, /*!< in: B-tree cursor */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
+ ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
+ mem_heap_t** offsets_heap,
+ /*!< in/out: pointer to memory heap that can
+ be emptied, or NULL */
+ mem_heap_t* heap, /*!< in/out: memory heap */
big_rec_t** big_rec,/*!< out: possible big rec vector of fields
which have to be stored externally by the
caller */
@@ -310,9 +339,9 @@ row_ins_clust_index_entry_by_modify(
mtr_t* mtr) /*!< in: mtr; must be committed before
latching any further pages */
{
- rec_t* rec;
- upd_t* update;
- ulint err;
+ const rec_t* rec;
+ const upd_t* update;
+ dberr_t err;
ut_ad(dict_index_is_clust(cursor->index));
@@ -323,38 +352,40 @@ row_ins_clust_index_entry_by_modify(
ut_ad(rec_get_deleted_flag(rec,
dict_table_is_comp(cursor->index->table)));
- if (!*heap) {
- *heap = mem_heap_create(1024);
- }
-
/* Build an update vector containing all the fields to be modified;
NOTE that this vector may NOT contain system columns trx_id or
roll_ptr */
- update = row_upd_build_difference_binary(cursor->index, entry, rec,
- thr_get_trx(thr), *heap);
- if (mode == BTR_MODIFY_LEAF) {
+ update = row_upd_build_difference_binary(
+ cursor->index, entry, rec, NULL, true,
+ thr_get_trx(thr), heap);
+ if (mode != BTR_MODIFY_TREE) {
+ ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
+
/* Try optimistic updating of the record, keeping changes
within the page */
- err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
- mtr);
+ err = btr_cur_optimistic_update(
+ flags, cursor, offsets, offsets_heap, update, 0, thr,
+ thr_get_trx(thr)->id, mtr);
switch (err) {
case DB_OVERFLOW:
case DB_UNDERFLOW:
case DB_ZIP_OVERFLOW:
err = DB_FAIL;
+ default:
+ break;
}
} else {
- ut_a(mode == BTR_MODIFY_TREE);
if (buf_LRU_buf_pool_running_out()) {
return(DB_LOCK_TABLE_FULL);
}
err = btr_cur_pessimistic_update(
- BTR_KEEP_POS_FLAG, cursor, heap, big_rec, update,
- 0, thr, mtr);
+ flags | BTR_KEEP_POS_FLAG,
+ cursor, offsets, offsets_heap, heap,
+ big_rec, update, 0, thr, thr_get_trx(thr)->id, mtr);
}
return(err);
@@ -394,7 +425,7 @@ row_ins_cascade_ancestor_updates_table(
Returns the number of ancestor UPDATE or DELETE nodes of a
cascaded update/delete node.
@return number of ancestors */
-static
+static __attribute__((nonnull, warn_unused_result))
ulint
row_ins_cascade_n_ancestors(
/*========================*/
@@ -420,7 +451,7 @@ a cascaded update.
can also be 0 if no foreign key fields changed; the returned value is
ULINT_UNDEFINED if the column type in the child table is too short to
fit the new value in the parent table: that means the update fails */
-static
+static __attribute__((nonnull, warn_unused_result))
ulint
row_ins_cascade_calc_update_vec(
/*============================*/
@@ -691,6 +722,8 @@ row_ins_set_detailed(
trx_t* trx, /*!< in: transaction */
dict_foreign_t* foreign) /*!< in: foreign key constraint */
{
+ ut_ad(!srv_read_only_mode);
+
mutex_enter(&srv_misc_tmpfile_mutex);
rewind(srv_misc_tmpfile);
@@ -717,13 +750,17 @@ row_ins_foreign_trx_print(
/*======================*/
trx_t* trx) /*!< in: transaction */
{
- ulint n_lock_rec;
- ulint n_lock_struct;
+ ulint n_rec_locks;
+ ulint n_trx_locks;
ulint heap_size;
+ if (srv_read_only_mode) {
+ return;
+ }
+
lock_mutex_enter();
- n_lock_rec = lock_number_of_rows_locked(&trx->lock);
- n_lock_struct = UT_LIST_GET_LEN(trx->lock.trx_locks);
+ n_rec_locks = lock_number_of_rows_locked(&trx->lock);
+ n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
heap_size = mem_heap_get_size(trx->lock.lock_heap);
lock_mutex_exit();
@@ -735,7 +772,7 @@ row_ins_foreign_trx_print(
fputs(" Transaction:\n", dict_foreign_err_file);
trx_print_low(dict_foreign_err_file, trx, 600,
- n_lock_rec, n_lock_struct, heap_size);
+ n_rec_locks, n_trx_locks, heap_size);
mutex_exit(&trx_sys->mutex);
@@ -759,6 +796,10 @@ row_ins_foreign_report_err(
const dtuple_t* entry) /*!< in: index entry in the parent
table */
{
+ if (srv_read_only_mode) {
+ return;
+ }
+
FILE* ef = dict_foreign_err_file;
trx_t* trx = thr_get_trx(thr);
@@ -810,6 +851,10 @@ row_ins_foreign_report_add_err(
const dtuple_t* entry) /*!< in: index entry to insert in the
child table */
{
+ if (srv_read_only_mode) {
+ return;
+ }
+
FILE* ef = dict_foreign_err_file;
row_ins_set_detailed(trx, foreign);
@@ -879,8 +924,8 @@ Perform referential actions or checks when a parent row is deleted or updated
and the constraint had an ON DELETE or ON UPDATE condition which was not
RESTRICT.
@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_ins_foreign_check_on_constraint(
/*================================*/
que_thr_t* thr, /*!< in: query thread whose run_node
@@ -906,7 +951,7 @@ row_ins_foreign_check_on_constraint(
const buf_block_t* clust_block;
upd_t* update;
ulint n_to_update;
- ulint err;
+ dberr_t err;
ulint i;
trx_t* trx;
mem_heap_t* tmp_heap = NULL;
@@ -1242,6 +1287,9 @@ row_ins_foreign_check_on_constraint(
release the latch. */
row_mysql_unfreeze_data_dictionary(thr_get_trx(thr));
+
+ DEBUG_SYNC_C("innodb_dml_cascade_dict_unfreeze");
+
row_mysql_freeze_data_dictionary(thr_get_trx(thr));
mtr_start(mtr);
@@ -1284,7 +1332,7 @@ Sets a shared lock on a record. Used in locking possible duplicate key
records and also in checking foreign key constraints.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
static
-enum db_err
+dberr_t
row_ins_set_shared_rec_lock(
/*========================*/
ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
@@ -1295,7 +1343,7 @@ row_ins_set_shared_rec_lock(
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
que_thr_t* thr) /*!< in: query thread */
{
- enum db_err err;
+ dberr_t err;
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -1315,7 +1363,7 @@ Sets a exclusive lock on a record. Used in locking possible duplicate key
records
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
static
-enum db_err
+dberr_t
row_ins_set_exclusive_rec_lock(
/*===========================*/
ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
@@ -1326,7 +1374,7 @@ row_ins_set_exclusive_rec_lock(
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
que_thr_t* thr) /*!< in: query thread */
{
- enum db_err err;
+ dberr_t err;
ut_ad(rec_offs_validate(rec, index, offsets));
@@ -1347,7 +1395,7 @@ which lock either the success or the failure of the constraint. NOTE that
the caller must have a shared latch on dict_operation_lock.
@return DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */
UNIV_INTERN
-ulint
+dberr_t
row_ins_check_foreign_constraint(
/*=============================*/
ibool check_ref,/*!< in: TRUE if we want to check that
@@ -1361,7 +1409,7 @@ row_ins_check_foreign_constraint(
dtuple_t* entry, /*!< in: index entry for index */
que_thr_t* thr) /*!< in: query thread */
{
- ulint err;
+ dberr_t err;
upd_node_t* upd_node;
dict_table_t* check_table;
dict_index_t* check_index;
@@ -1433,9 +1481,11 @@ run_again:
check_index = foreign->foreign_index;
}
- if (check_table == NULL || check_table->ibd_file_missing
+ if (check_table == NULL
+ || check_table->ibd_file_missing
|| check_index == NULL) {
- if (check_ref) {
+
+ if (!srv_read_only_mode && check_ref) {
FILE* ef = dict_foreign_err_file;
row_ins_set_detailed(trx, foreign);
@@ -1611,6 +1661,8 @@ run_again:
} else {
err = DB_SUCCESS;
}
+ default:
+ break;
}
goto end_scan;
@@ -1635,18 +1687,43 @@ end_scan:
do_possible_lock_wait:
if (err == DB_LOCK_WAIT) {
- trx->error_state = static_cast<enum db_err>(err);
+ bool verified = false;
+
+ trx->error_state = err;
que_thr_stop_for_mysql(thr);
lock_wait_suspend_thread(thr);
- if (trx->error_state == DB_SUCCESS) {
+ if (check_table->to_be_dropped) {
+ /* The table is being dropped. We shall timeout
+ this operation */
+ err = DB_LOCK_WAIT_TIMEOUT;
+ goto exit_func;
+ }
- goto run_again;
+ /* We had temporarily released dict_operation_lock in
+ above lock sleep wait, now we have the lock again, and
+ we will need to re-check whether the foreign key has been
+ dropped */
+ for (const dict_foreign_t* check_foreign = UT_LIST_GET_FIRST(
+ table->referenced_list);
+ check_foreign;
+ check_foreign = UT_LIST_GET_NEXT(
+ referenced_list, check_foreign)) {
+ if (check_foreign == foreign) {
+ verified = true;
+ break;
+ }
}
- err = trx->error_state;
+ if (!verified) {
+ err = DB_DICT_CHANGED;
+ } else if (trx->error_state == DB_SUCCESS) {
+ goto run_again;
+ } else {
+ err = trx->error_state;
+ }
}
exit_func:
@@ -1663,8 +1740,8 @@ Otherwise does searches to the indexes of referenced tables and
sets shared locks which lock either the success or the failure of
a constraint.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_ins_check_foreign_constraints(
/*==============================*/
dict_table_t* table, /*!< in: table */
@@ -1673,7 +1750,7 @@ row_ins_check_foreign_constraints(
que_thr_t* thr) /*!< in: query thread */
{
dict_foreign_t* foreign;
- ulint err;
+ dberr_t err;
trx_t* trx;
ibool got_s_lock = FALSE;
@@ -1681,14 +1758,21 @@ row_ins_check_foreign_constraints(
foreign = UT_LIST_GET_FIRST(table->foreign_list);
+ DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+ "foreign_constraint_check_for_ins");
+
while (foreign) {
if (foreign->foreign_index == index) {
dict_table_t* ref_table = NULL;
+ dict_table_t* foreign_table = foreign->foreign_table;
+ dict_table_t* referenced_table
+ = foreign->referenced_table;
- if (foreign->referenced_table == NULL) {
+ if (referenced_table == NULL) {
ref_table = dict_table_open_on_name(
- foreign->referenced_table_name_lookup, FALSE);
+ foreign->referenced_table_name_lookup,
+ FALSE, FALSE, DICT_ERR_IGNORE_NONE);
}
if (0 == trx->dict_operation_lock_mode) {
@@ -1697,9 +1781,9 @@ row_ins_check_foreign_constraints(
row_mysql_freeze_data_dictionary(trx);
}
- if (foreign->referenced_table) {
+ if (referenced_table) {
os_inc_counter(dict_sys->mutex,
- foreign->foreign_table
+ foreign_table
->n_foreign_key_checks_running);
}
@@ -1711,9 +1795,12 @@ row_ins_check_foreign_constraints(
err = row_ins_check_foreign_constraint(
TRUE, foreign, table, entry, thr);
- if (foreign->referenced_table) {
+ DBUG_EXECUTE_IF("row_ins_dict_change_err",
+ err = DB_DICT_CHANGED;);
+
+ if (referenced_table) {
os_dec_counter(dict_sys->mutex,
- foreign->foreign_table
+ foreign_table
->n_foreign_key_checks_running);
}
@@ -1722,7 +1809,7 @@ row_ins_check_foreign_constraints(
}
if (ref_table != NULL) {
- dict_table_close(ref_table, FALSE);
+ dict_table_close(ref_table, FALSE, FALSE);
}
if (err != DB_SUCCESS) {
@@ -1778,8 +1865,7 @@ row_ins_dupl_error_with_rec(
if (!dict_index_is_clust(index)) {
for (i = 0; i < n_unique; i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(entry, i))) {
+ if (dfield_is_null(dtuple_get_nth_field(entry, i))) {
return(FALSE);
}
@@ -1794,26 +1880,30 @@ Scans a unique non-clustered index at a given index entry to determine
whether a uniqueness violation has occurred for the key value of the entry.
Set shared locks on possible duplicate records.
@return DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_ins_scan_sec_index_for_duplicate(
/*=================================*/
+ ulint flags, /*!< in: undo logging and locking flags */
dict_index_t* index, /*!< in: non-clustered unique index */
dtuple_t* entry, /*!< in: index entry */
- que_thr_t* thr) /*!< in: query thread */
+ que_thr_t* thr, /*!< in: query thread */
+ bool s_latch,/*!< in: whether index->lock is being held */
+ mtr_t* mtr, /*!< in/out: mini-transaction */
+ mem_heap_t* offsets_heap)
+ /*!< in/out: memory heap that can be emptied */
{
ulint n_unique;
- ulint i;
int cmp;
ulint n_fields_cmp;
btr_pcur_t pcur;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
ulint allow_duplicates;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
+ ulint* offsets = NULL;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(s_latch == rw_lock_own(&index->lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
n_unique = dict_index_get_n_unique(index);
@@ -1821,7 +1911,7 @@ row_ins_scan_sec_index_for_duplicate(
n_unique first fields is NULL, a unique key violation cannot occur,
since we define NULL != NULL in this case */
- for (i = 0; i < n_unique; i++) {
+ for (ulint i = 0; i < n_unique; i++) {
if (UNIV_SQL_NULL == dfield_get_len(
dtuple_get_nth_field(entry, i))) {
@@ -1829,15 +1919,17 @@ row_ins_scan_sec_index_for_duplicate(
}
}
- mtr_start(&mtr);
-
/* Store old value on n_fields_cmp */
n_fields_cmp = dtuple_get_n_fields_cmp(entry);
- dtuple_set_n_fields_cmp(entry, dict_index_get_n_unique(index));
+ dtuple_set_n_fields_cmp(entry, n_unique);
- btr_pcur_open(index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr);
+ btr_pcur_open(index, entry, PAGE_CUR_GE,
+ s_latch
+ ? BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED
+ : BTR_SEARCH_LEAF,
+ &pcur, mtr);
allow_duplicates = thr_get_trx(thr)->duplicates;
@@ -1853,9 +1945,12 @@ row_ins_scan_sec_index_for_duplicate(
}
offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
+ ULINT_UNDEFINED, &offsets_heap);
- if (allow_duplicates) {
+ if (flags & BTR_NO_LOCKING_FLAG) {
+ /* Set no locks when applying log
+ in online table rebuild. */
+ } else if (allow_duplicates) {
/* If the SQL-query will update or replace
duplicate key we will take X-lock for
@@ -1901,37 +1996,115 @@ row_ins_scan_sec_index_for_duplicate(
ut_a(cmp < 0);
goto end_scan;
}
- } while (btr_pcur_move_to_next(&pcur, &mtr));
+ } while (btr_pcur_move_to_next(&pcur, mtr));
end_scan:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- mtr_commit(&mtr);
-
/* Restore old value */
dtuple_set_n_fields_cmp(entry, n_fields_cmp);
return(err);
}
+/** Checks for a duplicate when the table is being rebuilt online.
+@retval DB_SUCCESS when no duplicate is detected
+@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or
+a newer version of entry (the entry should not be inserted)
+@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_ins_duplicate_online(
+/*=====================*/
+ ulint n_uniq, /*!< in: offset of DB_TRX_ID */
+ const dtuple_t* entry, /*!< in: entry that is being inserted */
+ const rec_t* rec, /*!< in: clustered index record */
+ ulint* offsets)/*!< in/out: rec_get_offsets(rec) */
+{
+ ulint fields = 0;
+ ulint bytes = 0;
+
+ /* During rebuild, there should not be any delete-marked rows
+ in the new table. */
+ ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
+ ut_ad(dtuple_get_n_fields_cmp(entry) == n_uniq);
+
+ /* Compare the PRIMARY KEY fields and the
+ DB_TRX_ID, DB_ROLL_PTR. */
+ cmp_dtuple_rec_with_match_low(
+ entry, rec, offsets, n_uniq + 2, &fields, &bytes);
+
+ if (fields < n_uniq) {
+ /* Not a duplicate. */
+ return(DB_SUCCESS);
+ }
+
+ if (fields == n_uniq + 2) {
+ /* rec is an exact match of entry. */
+ ut_ad(bytes == 0);
+ return(DB_SUCCESS_LOCKED_REC);
+ }
+
+ return(DB_DUPLICATE_KEY);
+}
+
+/** Checks for a duplicate when the table is being rebuilt online.
+@retval DB_SUCCESS when no duplicate is detected
+@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or
+a newer version of entry (the entry should not be inserted)
+@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_ins_duplicate_error_in_clust_online(
+/*====================================*/
+ ulint n_uniq, /*!< in: offset of DB_TRX_ID */
+ const dtuple_t* entry, /*!< in: entry that is being inserted */
+ const btr_cur_t*cursor, /*!< in: cursor on insert position */
+ ulint** offsets,/*!< in/out: rec_get_offsets(rec) */
+ mem_heap_t** heap) /*!< in/out: heap for offsets */
+{
+ dberr_t err = DB_SUCCESS;
+ const rec_t* rec = btr_cur_get_rec(cursor);
+
+ if (cursor->low_match >= n_uniq && !page_rec_is_infimum(rec)) {
+ *offsets = rec_get_offsets(rec, cursor->index, *offsets,
+ ULINT_UNDEFINED, heap);
+ err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets);
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ rec = page_rec_get_next_const(btr_cur_get_rec(cursor));
+
+ if (cursor->up_match >= n_uniq && !page_rec_is_supremum(rec)) {
+ *offsets = rec_get_offsets(rec, cursor->index, *offsets,
+ ULINT_UNDEFINED, heap);
+ err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets);
+ }
+
+ return(err);
+}
+
/***************************************************************//**
Checks if a unique key violation error would occur at an index entry
insert. Sets shared locks on possible duplicate records. Works only
for a clustered index!
-@return DB_SUCCESS if no error, DB_DUPLICATE_KEY if error,
-DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate
-record */
-static
-ulint
+@retval DB_SUCCESS if no error
+@retval DB_DUPLICATE_KEY if error,
+@retval DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate
+record
+@retval DB_SUCCESS_LOCKED_REC if an exact match of the record was found
+in online table rebuild (flags & (BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG)) */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_ins_duplicate_error_in_clust(
/*=============================*/
+ ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: B-tree cursor */
const dtuple_t* entry, /*!< in: entry to insert */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr) /*!< in: mtr */
{
- ulint err;
+ dberr_t err;
rec_t* rec;
ulint n_unique;
trx_t* trx = thr_get_trx(thr);
@@ -1942,8 +2115,7 @@ row_ins_duplicate_error_in_clust(
UT_NOT_USED(mtr);
- ut_a(dict_index_is_clust(cursor->index));
- ut_ad(dict_index_is_unique(cursor->index));
+ ut_ad(dict_index_is_clust(cursor->index));
/* NOTE: For unique non-clustered indexes there may be any number
of delete marked records with the same value for the non-clustered
@@ -2002,6 +2174,7 @@ row_ins_duplicate_error_in_clust(
if (row_ins_dupl_error_with_rec(
rec, entry, cursor->index, offsets)) {
+duplicate:
trx->error_info = cursor->index;
err = DB_DUPLICATE_KEY;
goto func_exit;
@@ -2046,14 +2219,12 @@ row_ins_duplicate_error_in_clust(
if (row_ins_dupl_error_with_rec(
rec, entry, cursor->index, offsets)) {
- trx->error_info = cursor->index;
- err = DB_DUPLICATE_KEY;
- goto func_exit;
+ goto duplicate;
}
}
- ut_a(!dict_index_is_clust(cursor->index));
/* This should never happen */
+ ut_error;
}
err = DB_SUCCESS;
@@ -2081,12 +2252,12 @@ row_ins_must_modify_rec(
/*====================*/
const btr_cur_t* cursor) /*!< in: B-tree cursor */
{
- /* NOTE: (compare to the note in row_ins_duplicate_error) Because node
- pointers on upper levels of the B-tree may match more to entry than
- to actual user records on the leaf level, we have to check if the
- candidate record is actually a user record. In a clustered index
- node pointers contain index->n_unique first fields, and in the case
- of a secondary index, all fields of the index. */
+ /* NOTE: (compare to the note in row_ins_duplicate_error_in_clust)
+ Because node pointers on upper levels of the B-tree may match more
+ to entry than to actual user records on the leaf level, we
+ have to check if the candidate record is actually a user record.
+ A clustered index node pointer contains index->n_unique first fields,
+ and a secondary index node pointer contains all index fields. */
return(cursor->low_match
>= dict_index_get_n_unique_in_tree(cursor->index)
@@ -2094,56 +2265,359 @@ row_ins_must_modify_rec(
}
/***************************************************************//**
-Tries to insert an index entry to an index. If the index is clustered
-and a record with the same unique key is found, the other record is
-necessarily marked deleted by a committed transaction, or a unique key
-violation error occurs. The delete marked record is then updated to an
-existing record, and we must write an undo log record on the delete
-marked record. If the index is secondary, and a record with exactly the
-same fields is found, the other record is necessarily marked deleted.
-It is then unmarked. Otherwise, the entry is just inserted to the index.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL if pessimistic retry needed,
-or error code */
-static
-ulint
-row_ins_index_entry_low(
-/*====================*/
+Tries to insert an entry into a clustered index, ignoring foreign key
+constraints. If a record with the same unique key is found, the other
+record is necessarily marked deleted by a committed transaction, or a
+unique key violation error occurs. The delete marked record is then
+updated to an existing record, and we must write an undo log record on
+the delete marked record.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
+UNIV_INTERN
+dberr_t
+row_ins_clust_index_entry_low(
+/*==========================*/
+ ulint flags, /*!< in: undo logging and locking flags */
ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
depending on whether we wish optimistic or
pessimistic descent down the index tree */
- dict_index_t* index, /*!< in: index */
+ dict_index_t* index, /*!< in: clustered index */
+ ulint n_uniq, /*!< in: 0 or index->n_uniq */
dtuple_t* entry, /*!< in/out: index entry to insert */
ulint n_ext, /*!< in: number of externally stored columns */
que_thr_t* thr) /*!< in: query thread */
{
btr_cur_t cursor;
- ulint search_mode;
- ibool modify = FALSE;
- rec_t* insert_rec;
- rec_t* rec;
- ulint* offsets;
- ulint err;
- ulint n_unique;
- big_rec_t* big_rec = NULL;
+ ulint* offsets = NULL;
+ dberr_t err;
+ big_rec_t* big_rec = NULL;
mtr_t mtr;
- mem_heap_t* heap = NULL;
+ mem_heap_t* offsets_heap = NULL;
- log_free_check();
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(!dict_index_is_unique(index)
+ || n_uniq == dict_index_get_n_unique(index));
+ ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index));
mtr_start(&mtr);
+ if (mode == BTR_MODIFY_LEAF && dict_index_is_online_ddl(index)) {
+ mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ }
+
cursor.thr = thr;
/* Note that we use PAGE_CUR_LE as the search mode, because then
the function will return in both low_match and up_match of the
cursor sensible values */
- if (dict_index_is_clust(index)) {
- search_mode = mode;
- } else if (!(thr_get_trx(thr)->check_unique_secondary)) {
- search_mode = mode | BTR_INSERT | BTR_IGNORE_SEC_UNIQUE;
+ btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, mode,
+ &cursor, 0, __FILE__, __LINE__, &mtr);
+
+#ifdef UNIV_DEBUG
+ {
+ page_t* page = btr_cur_get_page(&cursor);
+ rec_t* first_rec = page_rec_get_next(
+ page_get_infimum_rec(page));
+
+ ut_ad(page_rec_is_supremum(first_rec)
+ || rec_get_n_fields(first_rec, index)
+ == dtuple_get_n_fields(entry));
+ }
+#endif
+
+ if (n_uniq && (cursor.up_match >= n_uniq
+ || cursor.low_match >= n_uniq)) {
+
+ if (flags
+ == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
+ | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG)) {
+ /* Set no locks when applying log
+ in online table rebuild. Only check for duplicates. */
+ err = row_ins_duplicate_error_in_clust_online(
+ n_uniq, entry, &cursor,
+ &offsets, &offsets_heap);
+
+ switch (err) {
+ case DB_SUCCESS:
+ break;
+ default:
+ ut_ad(0);
+ /* fall through */
+ case DB_SUCCESS_LOCKED_REC:
+ case DB_DUPLICATE_KEY:
+ thr_get_trx(thr)->error_info = cursor.index;
+ }
+ } else {
+ /* Note that the following may return also
+ DB_LOCK_WAIT */
+
+ err = row_ins_duplicate_error_in_clust(
+ flags, &cursor, entry, thr, &mtr);
+ }
+
+ if (err != DB_SUCCESS) {
+err_exit:
+ mtr_commit(&mtr);
+ goto func_exit;
+ }
+ }
+
+ if (row_ins_must_modify_rec(&cursor)) {
+ /* There is already an index entry with a long enough common
+ prefix, we must convert the insert into a modify of an
+ existing record */
+ mem_heap_t* entry_heap = mem_heap_create(1024);
+
+ err = row_ins_clust_index_entry_by_modify(
+ flags, mode, &cursor, &offsets, &offsets_heap,
+ entry_heap, &big_rec, entry, thr, &mtr);
+
+ rec_t* rec = btr_cur_get_rec(&cursor);
+
+ if (big_rec) {
+ ut_a(err == DB_SUCCESS);
+ /* Write out the externally stored
+ columns while still x-latching
+ index->lock and block->lock. Allocate
+ pages for big_rec in the mtr that
+ modified the B-tree, but be sure to skip
+ any pages that were freed in mtr. We will
+ write out the big_rec pages before
+ committing the B-tree mini-transaction. If
+ the system crashes so that crash recovery
+ will not replay the mtr_commit(&mtr), the
+ big_rec pages will be left orphaned until
+ the pages are allocated for something else.
+
+ TODO: If the allocation extends the
+ tablespace, it will not be redo
+ logged, in either mini-transaction.
+ Tablespace extension should be
+ redo-logged in the big_rec
+ mini-transaction, so that recovery
+ will not fail when the big_rec was
+ written to the extended portion of the
+ file, in case the file was somehow
+ truncated in the crash. */
+
+ DEBUG_SYNC_C_IF_THD(
+ thr_get_trx(thr)->mysql_thd,
+ "before_row_ins_upd_extern");
+ err = btr_store_big_rec_extern_fields(
+ index, btr_cur_get_block(&cursor),
+ rec, offsets, big_rec, &mtr,
+ BTR_STORE_INSERT_UPDATE);
+ DEBUG_SYNC_C_IF_THD(
+ thr_get_trx(thr)->mysql_thd,
+ "after_row_ins_upd_extern");
+ /* If writing big_rec fails (for
+ example, because of DB_OUT_OF_FILE_SPACE),
+ the record will be corrupted. Even if
+ we did not update any externally
+ stored columns, our update could cause
+ the record to grow so that a
+ non-updated column was selected for
+ external storage. This non-update
+ would not have been written to the
+ undo log, and thus the record cannot
+ be rolled back.
+
+ However, because we have not executed
+ mtr_commit(mtr) yet, the update will
+ not be replayed in crash recovery, and
+ the following assertion failure will
+ effectively "roll back" the operation. */
+ ut_a(err == DB_SUCCESS);
+ dtuple_big_rec_free(big_rec);
+ }
+
+ if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
+ row_log_table_insert(rec, index, offsets);
+ }
+
+ mtr_commit(&mtr);
+ mem_heap_free(entry_heap);
} else {
- search_mode = mode | BTR_INSERT;
+ rec_t* insert_rec;
+
+ if (mode != BTR_MODIFY_TREE) {
+ ut_ad((mode & ~BTR_ALREADY_S_LATCHED)
+ == BTR_MODIFY_LEAF);
+ err = btr_cur_optimistic_insert(
+ flags, &cursor, &offsets, &offsets_heap,
+ entry, &insert_rec, &big_rec,
+ n_ext, thr, &mtr);
+ } else {
+ if (buf_LRU_buf_pool_running_out()) {
+
+ err = DB_LOCK_TABLE_FULL;
+ goto err_exit;
+ }
+
+ err = btr_cur_optimistic_insert(
+ flags, &cursor,
+ &offsets, &offsets_heap,
+ entry, &insert_rec, &big_rec,
+ n_ext, thr, &mtr);
+
+ if (err == DB_FAIL) {
+ err = btr_cur_pessimistic_insert(
+ flags, &cursor,
+ &offsets, &offsets_heap,
+ entry, &insert_rec, &big_rec,
+ n_ext, thr, &mtr);
+ }
+ }
+
+ if (UNIV_LIKELY_NULL(big_rec)) {
+ mtr_commit(&mtr);
+
+ /* Online table rebuild could read (and
+ ignore) the incomplete record at this point.
+ If online rebuild is in progress, the
+ row_ins_index_entry_big_rec() will write log. */
+
+ DBUG_EXECUTE_IF(
+ "row_ins_extern_checkpoint",
+ log_make_checkpoint_at(
+ IB_ULONGLONG_MAX, TRUE););
+ err = row_ins_index_entry_big_rec(
+ entry, big_rec, offsets, &offsets_heap, index,
+ thr_get_trx(thr)->mysql_thd,
+ __FILE__, __LINE__);
+ dtuple_convert_back_big_rec(index, entry, big_rec);
+ } else {
+ if (err == DB_SUCCESS
+ && dict_index_is_online_ddl(index)) {
+ row_log_table_insert(
+ insert_rec, index, offsets);
+ }
+
+ mtr_commit(&mtr);
+ }
+ }
+
+func_exit:
+ if (offsets_heap) {
+ mem_heap_free(offsets_heap);
+ }
+
+ return(err);
+}
+
+/***************************************************************//**
+Starts a mini-transaction and checks if the index will be dropped.
+@return true if the index is to be dropped */
+static __attribute__((nonnull, warn_unused_result))
+bool
+row_ins_sec_mtr_start_and_check_if_aborted(
+/*=======================================*/
+ mtr_t* mtr, /*!< out: mini-transaction */
+ dict_index_t* index, /*!< in/out: secondary index */
+ bool check, /*!< in: whether to check */
+ ulint search_mode)
+ /*!< in: flags */
+{
+ ut_ad(!dict_index_is_clust(index));
+
+ mtr_start(mtr);
+
+ if (!check) {
+ return(false);
+ }
+
+ if (search_mode & BTR_ALREADY_S_LATCHED) {
+ mtr_s_lock(dict_index_get_lock(index), mtr);
+ } else {
+ mtr_x_lock(dict_index_get_lock(index), mtr);
+ }
+
+ switch (index->online_status) {
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ ut_ad(*index->name == TEMP_INDEX_PREFIX);
+ return(true);
+ case ONLINE_INDEX_COMPLETE:
+ return(false);
+ case ONLINE_INDEX_CREATION:
+ break;
+ }
+
+ ut_error;
+ return(true);
+}
+
+/***************************************************************//**
+Tries to insert an entry into a secondary index. If a record with exactly the
+same fields is found, the other record is necessarily marked deleted.
+It is then unmarked. Otherwise, the entry is just inserted to the index.
+@retval DB_SUCCESS on success
+@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
+@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
+@return error code */
+UNIV_INTERN
+dberr_t
+row_ins_sec_index_entry_low(
+/*========================*/
+ ulint flags, /*!< in: undo logging and locking flags */
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
+ depending on whether we wish optimistic or
+ pessimistic descent down the index tree */
+ dict_index_t* index, /*!< in: secondary index */
+ mem_heap_t* offsets_heap,
+ /*!< in/out: memory heap that can be emptied */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ dtuple_t* entry, /*!< in/out: index entry to insert */
+ trx_id_t trx_id, /*!< in: PAGE_MAX_TRX_ID during
+ row_log_table_apply(), or 0 */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ btr_cur_t cursor;
+ ulint search_mode = mode | BTR_INSERT;
+ dberr_t err = DB_SUCCESS;
+ ulint n_unique;
+ mtr_t mtr;
+ ulint* offsets = NULL;
+
+ ut_ad(!dict_index_is_clust(index));
+ ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
+
+ cursor.thr = thr;
+ ut_ad(thr_get_trx(thr)->id);
+ mtr_start(&mtr);
+
+ /* Ensure that we acquire index->lock when inserting into an
+ index with index->online_status == ONLINE_INDEX_COMPLETE, but
+ could still be subject to rollback_inplace_alter_table().
+ This prevents a concurrent change of index->online_status.
+ The memory object cannot be freed as long as we have an open
+ reference to the table, or index->table->n_ref_count > 0. */
+ const bool check = *index->name == TEMP_INDEX_PREFIX;
+ if (check) {
+ DEBUG_SYNC_C("row_ins_sec_index_enter");
+ if (mode == BTR_MODIFY_LEAF) {
+ search_mode |= BTR_ALREADY_S_LATCHED;
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ } else {
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
+ }
+
+ if (row_log_online_op_try(
+ index, entry, thr_get_trx(thr)->id)) {
+ goto func_exit;
+ }
+ }
+
+ /* Note that we use PAGE_CUR_LE as the search mode, because then
+ the function will return in both low_match and up_match of the
+ cursor sensible values */
+
+ if (!thr_get_trx(thr)->check_unique_secondary) {
+ search_mode |= BTR_IGNORE_SEC_UNIQUE;
}
btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
@@ -2151,13 +2625,8 @@ row_ins_index_entry_low(
&cursor, 0, __FILE__, __LINE__, &mtr);
if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
- /* The insertion was made to the insert buffer already during
- the search: we are done */
-
- ut_ad(search_mode & BTR_INSERT);
- err = DB_SUCCESS;
-
- goto function_exit;
+ /* The insert was buffered during the search: we are done */
+ goto func_exit;
}
#ifdef UNIV_DEBUG
@@ -2174,213 +2643,250 @@ row_ins_index_entry_low(
n_unique = dict_index_get_n_unique(index);
- if (dict_index_is_unique(index) && (cursor.up_match >= n_unique
- || cursor.low_match >= n_unique)) {
+ if (dict_index_is_unique(index)
+ && (cursor.low_match >= n_unique || cursor.up_match >= n_unique)) {
+ mtr_commit(&mtr);
- if (dict_index_is_clust(index)) {
- /* Note that the following may return also
- DB_LOCK_WAIT */
+ DEBUG_SYNC_C("row_ins_sec_index_unique");
- err = row_ins_duplicate_error_in_clust(
- &cursor, entry, thr, &mtr);
- if (err != DB_SUCCESS) {
+ if (row_ins_sec_mtr_start_and_check_if_aborted(
+ &mtr, index, check, search_mode)) {
+ goto func_exit;
+ }
- goto function_exit;
- }
- } else {
- mtr_commit(&mtr);
- err = row_ins_scan_sec_index_for_duplicate(
- index, entry, thr);
- mtr_start(&mtr);
+ err = row_ins_scan_sec_index_for_duplicate(
+ flags, index, entry, thr, check, &mtr, offsets_heap);
- if (err != DB_SUCCESS) {
- goto function_exit;
+ mtr_commit(&mtr);
+
+ switch (err) {
+ case DB_SUCCESS:
+ break;
+ case DB_DUPLICATE_KEY:
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ ut_ad(!thr_get_trx(thr)
+ ->dict_operation_lock_mode);
+ mutex_enter(&dict_sys->mutex);
+ dict_set_corrupted_index_cache_only(
+ index, index->table);
+ mutex_exit(&dict_sys->mutex);
+ /* Do not return any error to the
+ caller. The duplicate will be reported
+ by ALTER TABLE or CREATE UNIQUE INDEX.
+ Unfortunately we cannot report the
+ duplicate key value to the DDL thread,
+ because the altered_table object is
+ private to its call stack. */
+ err = DB_SUCCESS;
}
+ /* fall through */
+ default:
+ return(err);
+ }
- /* We did not find a duplicate and we have now
- locked with s-locks the necessary records to
- prevent any insertion of a duplicate by another
- transaction. Let us now reposition the cursor and
- continue the insertion. */
-
- btr_cur_search_to_nth_level(index, 0, entry,
- PAGE_CUR_LE,
- mode | BTR_INSERT,
- &cursor, 0,
- __FILE__, __LINE__, &mtr);
+ if (row_ins_sec_mtr_start_and_check_if_aborted(
+ &mtr, index, check, search_mode)) {
+ goto func_exit;
}
- }
- modify = row_ins_must_modify_rec(&cursor);
+ /* We did not find a duplicate and we have now
+ locked with s-locks the necessary records to
+ prevent any insertion of a duplicate by another
+ transaction. Let us now reposition the cursor and
+ continue the insertion. */
+
+ btr_cur_search_to_nth_level(
+ index, 0, entry, PAGE_CUR_LE,
+ search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE),
+ &cursor, 0, __FILE__, __LINE__, &mtr);
+ }
- if (modify) {
+ if (row_ins_must_modify_rec(&cursor)) {
/* There is already an index entry with a long enough common
prefix, we must convert the insert into a modify of an
existing record */
+ offsets = rec_get_offsets(
+ btr_cur_get_rec(&cursor), index, offsets,
+ ULINT_UNDEFINED, &offsets_heap);
- if (dict_index_is_clust(index)) {
- err = row_ins_clust_index_entry_by_modify(
- mode, &cursor, &heap, &big_rec, entry,
- thr, &mtr);
-
- if (big_rec) {
- ut_a(err == DB_SUCCESS);
- /* Write out the externally stored
- columns while still x-latching
- index->lock and block->lock. Allocate
- pages for big_rec in the mtr that
- modified the B-tree, but be sure to skip
- any pages that were freed in mtr. We will
- write out the big_rec pages before
- committing the B-tree mini-transaction. If
- the system crashes so that crash recovery
- will not replay the mtr_commit(&mtr), the
- big_rec pages will be left orphaned until
- the pages are allocated for something else.
-
- TODO: If the allocation extends the
- tablespace, it will not be redo
- logged, in either mini-transaction.
- Tablespace extension should be
- redo-logged in the big_rec
- mini-transaction, so that recovery
- will not fail when the big_rec was
- written to the extended portion of the
- file, in case the file was somehow
- truncated in the crash. */
-
- rec = btr_cur_get_rec(&cursor);
- offsets = rec_get_offsets(
- rec, index, NULL,
- ULINT_UNDEFINED, &heap);
-
- DEBUG_SYNC_C_IF_THD((THD*)
- thr_get_trx(thr)->mysql_thd,
- "before_row_ins_upd_extern");
- err = btr_store_big_rec_extern_fields(
- index, btr_cur_get_block(&cursor),
- rec, offsets, big_rec, &mtr,
- BTR_STORE_INSERT_UPDATE);
- DEBUG_SYNC_C_IF_THD((THD*)
- thr_get_trx(thr)->mysql_thd,
- "after_row_ins_upd_extern");
- /* If writing big_rec fails (for
- example, because of DB_OUT_OF_FILE_SPACE),
- the record will be corrupted. Even if
- we did not update any externally
- stored columns, our update could cause
- the record to grow so that a
- non-updated column was selected for
- external storage. This non-update
- would not have been written to the
- undo log, and thus the record cannot
- be rolled back.
-
- However, because we have not executed
- mtr_commit(mtr) yet, the update will
- not be replayed in crash recovery, and
- the following assertion failure will
- effectively "roll back" the operation. */
- ut_a(err == DB_SUCCESS);
- goto stored_big_rec;
- }
- } else {
- ut_ad(!n_ext);
- err = row_ins_sec_index_entry_by_modify(
- mode, &cursor, entry, thr, &mtr);
- }
+ err = row_ins_sec_index_entry_by_modify(
+ flags, mode, &cursor, &offsets,
+ offsets_heap, heap, entry, thr, &mtr);
} else {
+ rec_t* insert_rec;
+ big_rec_t* big_rec;
+
if (mode == BTR_MODIFY_LEAF) {
err = btr_cur_optimistic_insert(
- 0, &cursor, entry, &insert_rec, &big_rec,
- n_ext, thr, &mtr);
+ flags, &cursor, &offsets, &offsets_heap,
+ entry, &insert_rec,
+ &big_rec, 0, thr, &mtr);
} else {
- ut_a(mode == BTR_MODIFY_TREE);
+ ut_ad(mode == BTR_MODIFY_TREE);
if (buf_LRU_buf_pool_running_out()) {
err = DB_LOCK_TABLE_FULL;
-
- goto function_exit;
+ goto func_exit;
}
err = btr_cur_optimistic_insert(
- 0, &cursor, entry, &insert_rec, &big_rec,
- n_ext, thr, &mtr);
-
+ flags, &cursor,
+ &offsets, &offsets_heap,
+ entry, &insert_rec,
+ &big_rec, 0, thr, &mtr);
if (err == DB_FAIL) {
err = btr_cur_pessimistic_insert(
- 0, &cursor, entry, &insert_rec,
- &big_rec, n_ext, thr, &mtr);
+ flags, &cursor,
+ &offsets, &offsets_heap,
+ entry, &insert_rec,
+ &big_rec, 0, thr, &mtr);
}
}
+
+ if (err == DB_SUCCESS && trx_id) {
+ page_update_max_trx_id(
+ btr_cur_get_block(&cursor),
+ btr_cur_get_page_zip(&cursor),
+ trx_id, &mtr);
+ }
+
+ ut_ad(!big_rec);
}
-function_exit:
+func_exit:
mtr_commit(&mtr);
+ return(err);
+}
- if (UNIV_LIKELY_NULL(big_rec)) {
- DBUG_EXECUTE_IF(
- "row_ins_extern_checkpoint",
- log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE););
-
- mtr_start(&mtr);
-
- DEBUG_SYNC_C_IF_THD((THD*)
- thr_get_trx(thr)->mysql_thd,
- "before_row_ins_extern_latch");
- btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, 0,
- __FILE__, __LINE__, &mtr);
- rec = btr_cur_get_rec(&cursor);
- offsets = rec_get_offsets(rec, index, NULL,
- ULINT_UNDEFINED, &heap);
-
- DEBUG_SYNC_C_IF_THD((THD*)
- thr_get_trx(thr)->mysql_thd,
- "before_row_ins_extern");
- err = btr_store_big_rec_extern_fields(
- index, btr_cur_get_block(&cursor),
- rec, offsets, big_rec, &mtr, BTR_STORE_INSERT);
- DEBUG_SYNC_C_IF_THD((THD*)
- thr_get_trx(thr)->mysql_thd,
- "after_row_ins_extern");
-
-stored_big_rec:
- if (modify) {
- dtuple_big_rec_free(big_rec);
- } else {
- dtuple_convert_back_big_rec(index, entry, big_rec);
+/***************************************************************//**
+Tries to insert the externally stored fields (off-page columns)
+of a clustered index entry.
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+UNIV_INTERN
+dberr_t
+row_ins_index_entry_big_rec_func(
+/*=============================*/
+ const dtuple_t* entry, /*!< in/out: index entry to insert */
+ const big_rec_t* big_rec,/*!< in: externally stored fields */
+ ulint* offsets,/*!< in/out: rec offsets */
+ mem_heap_t** heap, /*!< in/out: memory heap */
+ dict_index_t* index, /*!< in: index */
+ const char* file, /*!< in: file name of caller */
+#ifndef DBUG_OFF
+ const void* thd, /*!< in: connection, or NULL */
+#endif /* DBUG_OFF */
+ ulint line) /*!< in: line number of caller */
+{
+ mtr_t mtr;
+ btr_cur_t cursor;
+ rec_t* rec;
+ dberr_t error;
+
+ ut_ad(dict_index_is_clust(index));
+
+ DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern_latch");
+
+ mtr_start(&mtr);
+ btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
+ BTR_MODIFY_TREE, &cursor, 0,
+ file, line, &mtr);
+ rec = btr_cur_get_rec(&cursor);
+ offsets = rec_get_offsets(rec, index, offsets,
+ ULINT_UNDEFINED, heap);
+
+ DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern");
+ error = btr_store_big_rec_extern_fields(
+ index, btr_cur_get_block(&cursor),
+ rec, offsets, big_rec, &mtr, BTR_STORE_INSERT);
+ DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern");
+
+ if (error == DB_SUCCESS
+ && dict_index_is_online_ddl(index)) {
+ row_log_table_insert(rec, index, offsets);
+ }
+
+ mtr_commit(&mtr);
+
+ return(error);
+}
+
+/***************************************************************//**
+Inserts an entry into a clustered index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+UNIV_INTERN
+dberr_t
+row_ins_clust_index_entry(
+/*======================*/
+ dict_index_t* index, /*!< in: clustered index */
+ dtuple_t* entry, /*!< in/out: index entry to insert */
+ que_thr_t* thr, /*!< in: query thread */
+ ulint n_ext) /*!< in: number of externally stored columns */
+{
+ dberr_t err;
+ ulint n_uniq;
+
+ if (UT_LIST_GET_FIRST(index->table->foreign_list)) {
+ err = row_ins_check_foreign_constraints(
+ index->table, index, entry, thr);
+ if (err != DB_SUCCESS) {
+
+ return(err);
}
+ }
- mtr_commit(&mtr);
+ n_uniq = dict_index_is_unique(index) ? index->n_uniq : 0;
+
+ /* Try first optimistic descent to the B-tree */
+
+ log_free_check();
+
+ err = row_ins_clust_index_entry_low(
+ 0, BTR_MODIFY_LEAF, index, n_uniq, entry, n_ext, thr);
+
+#ifdef UNIV_DEBUG
+ /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
+ Once it is fixed, remove the 'ifdef', 'if' and this comment. */
+ if (!thr_get_trx(thr)->ddl) {
+ DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+ "after_row_ins_clust_index_entry_leaf");
}
+#endif /* UNIV_DEBUG */
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
+ if (err != DB_FAIL) {
+ DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after");
+ return(err);
}
- return(err);
+
+ /* Try then pessimistic descent to the B-tree */
+
+ log_free_check();
+
+ return(row_ins_clust_index_entry_low(
+ 0, BTR_MODIFY_TREE, index, n_uniq, entry, n_ext, thr));
}
/***************************************************************//**
-Inserts an index entry to index. Tries first optimistic, then pessimistic
-descent down the tree. If the entry matches enough to a delete marked record,
-performs the insert by updating or delete unmarking the delete marked
-record.
+Inserts an entry into a secondary index. Tries first optimistic,
+then pessimistic descent down the tree. If the entry matches enough
+to a delete marked record, performs the insert by updating or delete
+unmarking the delete marked record.
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
UNIV_INTERN
-ulint
-row_ins_index_entry(
-/*================*/
- dict_index_t* index, /*!< in: index */
+dberr_t
+row_ins_sec_index_entry(
+/*====================*/
+ dict_index_t* index, /*!< in: secondary index */
dtuple_t* entry, /*!< in/out: index entry to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- ibool foreign,/*!< in: TRUE=check foreign key constraints
- (foreign=FALSE only during CREATE INDEX) */
que_thr_t* thr) /*!< in: query thread */
{
- ulint err;
+ dberr_t err;
+ mem_heap_t* offsets_heap;
+ mem_heap_t* heap;
- if (foreign && UT_LIST_GET_FIRST(index->table->foreign_list)) {
+ if (UT_LIST_GET_FIRST(index->table->foreign_list)) {
err = row_ins_check_foreign_constraints(index->table, index,
entry, thr);
if (err != DB_SUCCESS) {
@@ -2389,26 +2895,59 @@ row_ins_index_entry(
}
}
+ ut_ad(thr_get_trx(thr)->id);
+
+ offsets_heap = mem_heap_create(1024);
+ heap = mem_heap_create(1024);
+
/* Try first optimistic descent to the B-tree */
- err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
- n_ext, thr);
- if (err != DB_FAIL) {
+ log_free_check();
- return(err);
- }
+ err = row_ins_sec_index_entry_low(
+ 0, BTR_MODIFY_LEAF, index, offsets_heap, heap, entry, 0, thr);
+ if (err == DB_FAIL) {
+ mem_heap_empty(heap);
- /* Try then pessimistic descent to the B-tree */
+ /* Try then pessimistic descent to the B-tree */
+
+ log_free_check();
- err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry,
- n_ext, thr);
+ err = row_ins_sec_index_entry_low(
+ 0, BTR_MODIFY_TREE, index,
+ offsets_heap, heap, entry, 0, thr);
+ }
+
+ mem_heap_free(heap);
+ mem_heap_free(offsets_heap);
return(err);
}
+/***************************************************************//**
+Inserts an index entry to index. Tries first optimistic, then pessimistic
+descent down the tree. If the entry matches enough to a delete marked record,
+performs the insert by updating or delete unmarking the delete marked
+record.
+@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
+static
+dberr_t
+row_ins_index_entry(
+/*================*/
+ dict_index_t* index, /*!< in: index */
+ dtuple_t* entry, /*!< in/out: index entry to insert */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ if (dict_index_is_clust(index)) {
+ return(row_ins_clust_index_entry(index, entry, thr, 0));
+ } else {
+ return(row_ins_sec_index_entry(index, entry, thr));
+ }
+}
+
/***********************************************************//**
Sets the values of the dtuple fields in entry from the values of appropriate
columns in row. */
-static
+static __attribute__((nonnull))
void
row_ins_index_entry_set_vals(
/*=========================*/
@@ -2419,8 +2958,6 @@ row_ins_index_entry_set_vals(
ulint n_fields;
ulint i;
- ut_ad(entry && row);
-
n_fields = dtuple_get_n_fields(entry);
for (i = 0; i < n_fields; i++) {
@@ -2463,14 +3000,14 @@ row_ins_index_entry_set_vals(
Inserts a single index entry to the table.
@return DB_SUCCESS if operation successfully completed, else error
code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_ins_index_entry_step(
/*=====================*/
ins_node_t* node, /*!< in: row insert node */
que_thr_t* thr) /*!< in: query thread */
{
- ulint err;
+ dberr_t err;
ut_ad(dtuple_check_typed(node->row));
@@ -2478,7 +3015,16 @@ row_ins_index_entry_step(
ut_ad(dtuple_check_typed(node->entry));
- err = row_ins_index_entry(node->index, node->entry, 0, TRUE, thr);
+ err = row_ins_index_entry(node->index, node->entry, thr);
+
+#ifdef UNIV_DEBUG
+ /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
+ Once it is fixed, remove the 'ifdef', 'if' and this comment. */
+ if (!thr_get_trx(thr)->ddl) {
+ DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+ "after_row_ins_index_entry_step");
+ }
+#endif /* UNIV_DEBUG */
return(err);
}
@@ -2577,16 +3123,14 @@ row_ins_get_row_from_select(
Inserts a row to a table.
@return DB_SUCCESS if operation successfully completed, else error
code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_ins(
/*====*/
ins_node_t* node, /*!< in: row insert node */
que_thr_t* thr) /*!< in: query thread */
{
- ulint err;
-
- ut_ad(node && thr);
+ dberr_t err;
if (node->state == INS_NODE_ALLOC_ROW_ID) {
@@ -2622,6 +3166,10 @@ row_ins(
node->index = dict_table_get_next_index(node->index);
node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
+ DBUG_EXECUTE_IF(
+ "row_ins_skip_sec",
+ node->index = NULL; node->entry = NULL; break;);
+
/* Skip corrupted secondary index and its entry */
while (node->index && dict_index_is_corrupted(node->index)) {
@@ -2651,7 +3199,7 @@ row_ins_step(
que_node_t* parent;
sel_node_t* sel_node;
trx_t* trx;
- ulint err;
+ dberr_t err;
ut_ad(thr);
@@ -2684,6 +3232,8 @@ row_ins_step(
if (node->state == INS_NODE_SET_IX_LOCK) {
+ node->state = INS_NODE_ALLOC_ROW_ID;
+
/* It may be that the current session has not yet started
its transaction, or it has been committed: */
@@ -2695,6 +3245,9 @@ row_ins_step(
err = lock_table(0, node->table, LOCK_IX, thr);
+ DBUG_EXECUTE_IF("ib_row_ins_ix_lock_wait",
+ err = DB_LOCK_WAIT;);
+
if (err != DB_SUCCESS) {
goto error_handling;
@@ -2702,8 +3255,6 @@ row_ins_step(
node->trx_id = trx->id;
same_trx:
- node->state = INS_NODE_ALLOC_ROW_ID;
-
if (node->ins_type == INS_SEARCHED) {
/* Reset the cursor */
sel_node->state = SEL_NODE_OPEN;
@@ -2732,7 +3283,7 @@ same_trx:
err = row_ins(node, thr);
error_handling:
- trx->error_state = static_cast<enum db_err>(err);
+ trx->error_state = err;
if (err != DB_SUCCESS) {
/* err == DB_LOCK_WAIT or SQL error detected */
diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc
new file mode 100644
index 00000000000..b373b70ab7a
--- /dev/null
+++ b/storage/innobase/row/row0log.cc
@@ -0,0 +1,3219 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0log.cc
+Modification log for online index creation and online table rebuild
+
+Created 2011-05-26 Marko Makela
+*******************************************************/
+
+#include "row0log.h"
+
+#ifdef UNIV_NONINL
+#include "row0log.ic"
+#endif
+
+#include "row0row.h"
+#include "row0ins.h"
+#include "row0upd.h"
+#include "row0merge.h"
+#include "row0ext.h"
+#include "data0data.h"
+#include "que0que.h"
+#include "handler0alter.h"
+
+#include<set>
+
+/** Table row modification operations during online table rebuild.
+Delete-marked records are not copied to the rebuilt table. */
+enum row_tab_op {
+ /** Insert a record */
+ ROW_T_INSERT = 0x41,
+ /** Update a record in place */
+ ROW_T_UPDATE,
+ /** Delete (purge) a record */
+ ROW_T_DELETE
+};
+
+/** Index record modification operations during online index creation */
+enum row_op {
+ /** Insert a record */
+ ROW_OP_INSERT = 0x61,
+ /** Delete a record */
+ ROW_OP_DELETE
+};
+
+#ifdef UNIV_DEBUG
+/** Write information about the applied record to the error log */
+# define ROW_LOG_APPLY_PRINT
+#endif /* UNIV_DEBUG */
+
+#ifdef ROW_LOG_APPLY_PRINT
+/** When set, write information about the applied record to the error log */
+static bool row_log_apply_print;
+#endif /* ROW_LOG_APPLY_PRINT */
+
+/** Size of the modification log entry header, in bytes */
+#define ROW_LOG_HEADER_SIZE 2/*op, extra_size*/
+
+/** Log block for modifications during online index creation */
+struct row_log_buf_t {
+ byte* block; /*!< file block buffer */
+ mrec_buf_t buf; /*!< buffer for accessing a record
+ that spans two blocks */
+ ulint blocks; /*!< current position in blocks */
+ ulint bytes; /*!< current position within buf */
+};
+
+/** Set of transactions that rolled back inserts of BLOBs during
+online table rebuild */
+typedef std::set<trx_id_t> trx_id_set;
+
+/** @brief Buffer for logging modifications during online index creation
+
+All modifications to an index that is being created will be logged by
+row_log_online_op() to this buffer.
+
+All modifications to a table that is being rebuilt will be logged by
+row_log_table_delete(), row_log_table_update(), row_log_table_insert()
+to this buffer.
+
+When head.blocks == tail.blocks, the reader will access tail.block
+directly. When also head.bytes == tail.bytes, both counts will be
+reset to 0 and the file will be truncated. */
+struct row_log_t {
+ int fd; /*!< file descriptor */
+ ib_mutex_t mutex; /*!< mutex protecting trx_log, error,
+ max_trx and tail */
+ trx_id_set* trx_rb; /*!< set of transactions that rolled back
+ inserts of BLOBs during online table rebuild;
+ protected by mutex */
+ dict_table_t* table; /*!< table that is being rebuilt,
+ or NULL when this is a secondary
+ index that is being created online */
+ bool same_pk;/*!< whether the definition of the PRIMARY KEY
+ has remained the same */
+ const dtuple_t* add_cols;
+ /*!< default values of added columns, or NULL */
+ const ulint* col_map;/*!< mapping of old column numbers to
+ new ones, or NULL if !table */
+ dberr_t error; /*!< error that occurred during online
+ table rebuild */
+ trx_id_t max_trx;/*!< biggest observed trx_id in
+ row_log_online_op();
+ protected by mutex and index->lock S-latch,
+ or by index->lock X-latch only */
+ row_log_buf_t tail; /*!< writer context;
+ protected by mutex and index->lock S-latch,
+ or by index->lock X-latch only */
+ row_log_buf_t head; /*!< reader context; protected by MDL only;
+ modifiable by row_log_apply_ops() */
+ ulint size; /*!< allocated size */
+};
+
+/******************************************************//**
+Logs an operation to a secondary index that is (or was) being created. */
+UNIV_INTERN
+void
+row_log_online_op(
+/*==============*/
+ dict_index_t* index, /*!< in/out: index, S or X latched */
+ const dtuple_t* tuple, /*!< in: index tuple */
+ trx_id_t trx_id) /*!< in: transaction ID for insert,
+ or 0 for delete */
+{
+ byte* b;
+ ulint extra_size;
+ ulint size;
+ ulint mrec_size;
+ ulint avail_size;
+ row_log_t* log;
+
+ ut_ad(dtuple_validate(tuple));
+ ut_ad(dtuple_get_n_fields(tuple) == dict_index_get_n_fields(index));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
+ || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ if (dict_index_is_corrupted(index)) {
+ return;
+ }
+
+ ut_ad(dict_index_is_online_ddl(index));
+
+ /* Compute the size of the record. This differs from
+ row_merge_buf_encode(), because here we do not encode
+ extra_size+1 (and reserve 0 as the end-of-chunk marker). */
+
+ size = rec_get_converted_size_temp(
+ index, tuple->fields, tuple->n_fields, &extra_size);
+ ut_ad(size >= extra_size);
+ ut_ad(size <= sizeof log->tail.buf);
+
+ mrec_size = ROW_LOG_HEADER_SIZE
+ + (extra_size >= 0x80) + size
+ + (trx_id ? DATA_TRX_ID_LEN : 0);
+
+ log = index->online_log;
+ mutex_enter(&log->mutex);
+
+ if (trx_id > log->max_trx) {
+ log->max_trx = trx_id;
+ }
+
+ UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
+
+ ut_ad(log->tail.bytes < srv_sort_buf_size);
+ avail_size = srv_sort_buf_size - log->tail.bytes;
+
+ if (mrec_size > avail_size) {
+ b = log->tail.buf;
+ } else {
+ b = log->tail.block + log->tail.bytes;
+ }
+
+ if (trx_id != 0) {
+ *b++ = ROW_OP_INSERT;
+ trx_write_trx_id(b, trx_id);
+ b += DATA_TRX_ID_LEN;
+ } else {
+ *b++ = ROW_OP_DELETE;
+ }
+
+ if (extra_size < 0x80) {
+ *b++ = (byte) extra_size;
+ } else {
+ ut_ad(extra_size < 0x8000);
+ *b++ = (byte) (0x80 | (extra_size >> 8));
+ *b++ = (byte) extra_size;
+ }
+
+ rec_convert_dtuple_to_temp(
+ b + extra_size, index, tuple->fields, tuple->n_fields);
+ b += size;
+
+ if (mrec_size >= avail_size) {
+ const os_offset_t byte_offset
+ = (os_offset_t) log->tail.blocks
+ * srv_sort_buf_size;
+ ibool ret;
+
+ if (byte_offset + srv_sort_buf_size >= srv_online_max_size) {
+ goto write_failed;
+ }
+
+ if (mrec_size == avail_size) {
+ ut_ad(b == &log->tail.block[srv_sort_buf_size]);
+ } else {
+ ut_ad(b == log->tail.buf + mrec_size);
+ memcpy(log->tail.block + log->tail.bytes,
+ log->tail.buf, avail_size);
+ }
+ UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size);
+ ret = os_file_write(
+ "(modification log)",
+ OS_FILE_FROM_FD(log->fd),
+ log->tail.block, byte_offset, srv_sort_buf_size);
+ log->tail.blocks++;
+ if (!ret) {
+write_failed:
+ /* We set the flag directly instead of invoking
+ dict_set_corrupted_index_cache_only(index) here,
+ because the index is not "public" yet. */
+ index->type |= DICT_CORRUPT;
+ }
+ UNIV_MEM_INVALID(log->tail.block, srv_sort_buf_size);
+ memcpy(log->tail.block, log->tail.buf + avail_size,
+ mrec_size - avail_size);
+ log->tail.bytes = mrec_size - avail_size;
+ } else {
+ log->tail.bytes += mrec_size;
+ ut_ad(b == log->tail.block + log->tail.bytes);
+ }
+
+ UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
+ mutex_exit(&log->mutex);
+}
+
+/******************************************************//**
+Gets the error status of the online index rebuild log.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+row_log_table_get_error(
+/*====================*/
+ const dict_index_t* index) /*!< in: clustered index of a table
+ that is being rebuilt online */
+{
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(dict_index_is_online_ddl(index));
+ return(index->online_log->error);
+}
+
+/******************************************************//**
+Starts logging an operation to a table that is being rebuilt.
+@return pointer to log, or NULL if no logging is necessary */
+static __attribute__((nonnull, warn_unused_result))
+byte*
+row_log_table_open(
+/*===============*/
+ row_log_t* log, /*!< in/out: online rebuild log */
+ ulint size, /*!< in: size of log record */
+ ulint* avail) /*!< out: available size for log record */
+{
+ mutex_enter(&log->mutex);
+
+ UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
+
+ if (log->error != DB_SUCCESS) {
+ mutex_exit(&log->mutex);
+ return(NULL);
+ }
+
+ ut_ad(log->tail.bytes < srv_sort_buf_size);
+ *avail = srv_sort_buf_size - log->tail.bytes;
+
+ if (size > *avail) {
+ return(log->tail.buf);
+ } else {
+ return(log->tail.block + log->tail.bytes);
+ }
+}
+
+/******************************************************//**
+Stops logging an operation to a table that is being rebuilt. */
+static __attribute__((nonnull))
+void
+row_log_table_close_func(
+/*=====================*/
+ row_log_t* log, /*!< in/out: online rebuild log */
+#ifdef UNIV_DEBUG
+ const byte* b, /*!< in: end of log record */
+#endif /* UNIV_DEBUG */
+ ulint size, /*!< in: size of log record */
+ ulint avail) /*!< in: available size for log record */
+{
+ ut_ad(mutex_own(&log->mutex));
+
+ if (size >= avail) {
+ const os_offset_t byte_offset
+ = (os_offset_t) log->tail.blocks
+ * srv_sort_buf_size;
+ ibool ret;
+
+ if (byte_offset + srv_sort_buf_size >= srv_online_max_size) {
+ goto write_failed;
+ }
+
+ if (size == avail) {
+ ut_ad(b == &log->tail.block[srv_sort_buf_size]);
+ } else {
+ ut_ad(b == log->tail.buf + size);
+ memcpy(log->tail.block + log->tail.bytes,
+ log->tail.buf, avail);
+ }
+ UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size);
+ ret = os_file_write(
+ "(modification log)",
+ OS_FILE_FROM_FD(log->fd),
+ log->tail.block, byte_offset, srv_sort_buf_size);
+ log->tail.blocks++;
+ if (!ret) {
+write_failed:
+ log->error = DB_ONLINE_LOG_TOO_BIG;
+ }
+ UNIV_MEM_INVALID(log->tail.block, srv_sort_buf_size);
+ memcpy(log->tail.block, log->tail.buf + avail, size - avail);
+ log->tail.bytes = size - avail;
+ } else {
+ log->tail.bytes += size;
+ ut_ad(b == log->tail.block + log->tail.bytes);
+ }
+
+ UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
+ mutex_exit(&log->mutex);
+}
+
+#ifdef UNIV_DEBUG
+# define row_log_table_close(log, b, size, avail) \
+ row_log_table_close_func(log, b, size, avail)
+#else /* UNIV_DEBUG */
+# define row_log_table_close(log, b, size, avail) \
+ row_log_table_close_func(log, size, avail)
+#endif /* UNIV_DEBUG */
+
+/******************************************************//**
+Logs a delete operation to a table that is being rebuilt.
+This will be merged in row_log_table_apply_delete(). */
+UNIV_INTERN
+void
+row_log_table_delete(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ trx_id_t trx_id) /*!< in: DB_TRX_ID of the record before
+ it was deleted */
+{
+ ulint old_pk_extra_size;
+ ulint old_pk_size;
+ ulint ext_size = 0;
+ ulint mrec_size;
+ ulint avail_size;
+ mem_heap_t* heap = NULL;
+ const dtuple_t* old_pk;
+ row_ext_t* ext;
+
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
+ ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
+ || rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ if (dict_index_is_corrupted(index)
+ || !dict_index_is_online_ddl(index)
+ || index->online_log->error != DB_SUCCESS) {
+ return;
+ }
+
+ dict_table_t* new_table = index->online_log->table;
+ dict_index_t* new_index = dict_table_get_first_index(new_table);
+
+ ut_ad(dict_index_is_clust(new_index));
+ ut_ad(!dict_index_is_online_ddl(new_index));
+
+ /* Create the tuple PRIMARY KEY, DB_TRX_ID in the new_table. */
+ if (index->online_log->same_pk) {
+ byte* db_trx_id;
+ dtuple_t* tuple;
+ ut_ad(new_index->n_uniq == index->n_uniq);
+
+ /* The PRIMARY KEY and DB_TRX_ID are in the first
+ fields of the record. */
+ heap = mem_heap_create(
+ DATA_TRX_ID_LEN
+ + DTUPLE_EST_ALLOC(new_index->n_uniq + 1));
+ old_pk = tuple = dtuple_create(heap, new_index->n_uniq + 1);
+ dict_index_copy_types(tuple, new_index, tuple->n_fields);
+ dtuple_set_n_fields_cmp(tuple, new_index->n_uniq);
+
+ for (ulint i = 0; i < new_index->n_uniq; i++) {
+ ulint len;
+ const void* field = rec_get_nth_field(
+ rec, offsets, i, &len);
+ dfield_t* dfield = dtuple_get_nth_field(
+ tuple, i);
+ ut_ad(len != UNIV_SQL_NULL);
+ ut_ad(!rec_offs_nth_extern(offsets, i));
+ dfield_set_data(dfield, field, len);
+ }
+
+ db_trx_id = static_cast<byte*>(
+ mem_heap_alloc(heap, DATA_TRX_ID_LEN));
+ trx_write_trx_id(db_trx_id, trx_id);
+
+ dfield_set_data(dtuple_get_nth_field(tuple, new_index->n_uniq),
+ db_trx_id, DATA_TRX_ID_LEN);
+ } else {
+ /* The PRIMARY KEY has changed. Translate the tuple. */
+ dfield_t* dfield;
+
+ old_pk = row_log_table_get_pk(rec, index, offsets, &heap);
+
+ if (!old_pk) {
+ ut_ad(index->online_log->error != DB_SUCCESS);
+ return;
+ }
+
+ /* Remove DB_ROLL_PTR. */
+ ut_ad(dtuple_get_n_fields_cmp(old_pk)
+ == dict_index_get_n_unique(new_index));
+ ut_ad(dtuple_get_n_fields(old_pk)
+ == dict_index_get_n_unique(new_index) + 2);
+ const_cast<ulint&>(old_pk->n_fields)--;
+
+ /* Overwrite DB_TRX_ID with the old trx_id. */
+ dfield = dtuple_get_nth_field(old_pk, new_index->n_uniq);
+ ut_ad(dfield_get_type(dfield)->mtype == DATA_SYS);
+ ut_ad(dfield_get_type(dfield)->prtype
+ == (DATA_NOT_NULL | DATA_TRX_ID));
+ ut_ad(dfield_get_len(dfield) == DATA_TRX_ID_LEN);
+ trx_write_trx_id(static_cast<byte*>(dfield->data), trx_id);
+ }
+
+ ut_ad(dtuple_get_n_fields(old_pk) > 1);
+ ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field(
+ old_pk, old_pk->n_fields - 1)->len);
+ old_pk_size = rec_get_converted_size_temp(
+ new_index, old_pk->fields, old_pk->n_fields,
+ &old_pk_extra_size);
+ ut_ad(old_pk_extra_size < 0x100);
+
+ mrec_size = 4 + old_pk_size;
+
+ /* If the row is marked as rollback, we will need to
+ log the enough prefix of the BLOB unless both the
+ old and new table are in COMPACT or REDUNDANT format */
+ if ((dict_table_get_format(index->table) >= UNIV_FORMAT_B
+ || dict_table_get_format(new_table) >= UNIV_FORMAT_B)
+ && row_log_table_is_rollback(index, trx_id)) {
+ if (rec_offs_any_extern(offsets)) {
+ /* Build a cache of those off-page column
+ prefixes that are referenced by secondary
+ indexes. It can be that none of the off-page
+ columns are needed. */
+ row_build(ROW_COPY_DATA, index, rec,
+ offsets, NULL, NULL, NULL, &ext, heap);
+ if (ext) {
+ /* Log the row_ext_t, ext->ext and ext->buf */
+ ext_size = ext->n_ext * ext->max_len
+ + sizeof(*ext)
+ + ext->n_ext * sizeof(ulint)
+ + (ext->n_ext - 1) * sizeof ext->len;
+ mrec_size += ext_size;
+ }
+ }
+ }
+
+ if (byte* b = row_log_table_open(index->online_log,
+ mrec_size, &avail_size)) {
+ *b++ = ROW_T_DELETE;
+ *b++ = static_cast<byte>(old_pk_extra_size);
+
+ /* Log the size of external prefix we saved */
+ mach_write_to_2(b, ext_size);
+ b += 2;
+
+ rec_convert_dtuple_to_temp(
+ b + old_pk_extra_size, new_index,
+ old_pk->fields, old_pk->n_fields);
+
+ b += old_pk_size;
+
+ if (ext_size) {
+ ulint cur_ext_size = sizeof(*ext)
+ + (ext->n_ext - 1) * sizeof ext->len;
+
+ memcpy(b, ext, cur_ext_size);
+ b += cur_ext_size;
+
+ /* Check if we need to col_map to adjust the column
+ number. If columns were added/removed/reordered,
+ adjust the column number. */
+ if (const ulint* col_map =
+ index->online_log->col_map) {
+ for (ulint i = 0; i < ext->n_ext; i++) {
+ const_cast<ulint&>(ext->ext[i]) =
+ col_map[ext->ext[i]];
+ }
+ }
+
+ memcpy(b, ext->ext, ext->n_ext * sizeof(*ext->ext));
+ b += ext->n_ext * sizeof(*ext->ext);
+
+ ext_size -= cur_ext_size
+ + ext->n_ext * sizeof(*ext->ext);
+ memcpy(b, ext->buf, ext_size);
+ b += ext_size;
+ }
+
+ row_log_table_close(
+ index->online_log, b, mrec_size, avail_size);
+ }
+
+ mem_heap_free(heap);
+}
+
+/******************************************************//**
+Logs an insert or update to a table that is being rebuilt. */
+static __attribute__((nonnull(1,2,3)))
+void
+row_log_table_low_redundant(
+/*========================*/
+ const rec_t* rec, /*!< in: clustered index leaf
+ page record in ROW_FORMAT=REDUNDANT,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ bool insert, /*!< in: true if insert,
+ false if update */
+ const dtuple_t* old_pk, /*!< in: old PRIMARY KEY value
+ (if !insert and a PRIMARY KEY
+ is being created) */
+ const dict_index_t* new_index)
+ /*!< in: clustered index of the
+ new table, not latched */
+{
+ ulint old_pk_size;
+ ulint old_pk_extra_size;
+ ulint size;
+ ulint extra_size;
+ ulint mrec_size;
+ ulint avail_size;
+ mem_heap_t* heap = NULL;
+ dtuple_t* tuple;
+
+ ut_ad(!page_is_comp(page_align(rec)));
+ ut_ad(dict_index_get_n_fields(index) == rec_get_n_fields_old(rec));
+
+ heap = mem_heap_create(DTUPLE_EST_ALLOC(index->n_fields));
+ tuple = dtuple_create(heap, index->n_fields);
+ dict_index_copy_types(tuple, index, index->n_fields);
+ dtuple_set_n_fields_cmp(tuple, dict_index_get_n_unique(index));
+
+ if (rec_get_1byte_offs_flag(rec)) {
+ for (ulint i = 0; i < index->n_fields; i++) {
+ dfield_t* dfield;
+ ulint len;
+ const void* field;
+
+ dfield = dtuple_get_nth_field(tuple, i);
+ field = rec_get_nth_field_old(rec, i, &len);
+
+ dfield_set_data(dfield, field, len);
+ }
+ } else {
+ for (ulint i = 0; i < index->n_fields; i++) {
+ dfield_t* dfield;
+ ulint len;
+ const void* field;
+
+ dfield = dtuple_get_nth_field(tuple, i);
+ field = rec_get_nth_field_old(rec, i, &len);
+
+ dfield_set_data(dfield, field, len);
+
+ if (rec_2_is_field_extern(rec, i)) {
+ dfield_set_ext(dfield);
+ }
+ }
+ }
+
+ size = rec_get_converted_size_temp(
+ index, tuple->fields, tuple->n_fields, &extra_size);
+
+ mrec_size = ROW_LOG_HEADER_SIZE + size + (extra_size >= 0x80);
+
+ if (insert || index->online_log->same_pk) {
+ ut_ad(!old_pk);
+ old_pk_extra_size = old_pk_size = 0;
+ } else {
+ ut_ad(old_pk);
+ ut_ad(old_pk->n_fields == 2 + old_pk->n_fields_cmp);
+ ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field(
+ old_pk, old_pk->n_fields - 2)->len);
+ ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field(
+ old_pk, old_pk->n_fields - 1)->len);
+
+ old_pk_size = rec_get_converted_size_temp(
+ new_index, old_pk->fields, old_pk->n_fields,
+ &old_pk_extra_size);
+ ut_ad(old_pk_extra_size < 0x100);
+ mrec_size += 1/*old_pk_extra_size*/ + old_pk_size;
+ }
+
+ if (byte* b = row_log_table_open(index->online_log,
+ mrec_size, &avail_size)) {
+ *b++ = insert ? ROW_T_INSERT : ROW_T_UPDATE;
+
+ if (old_pk_size) {
+ *b++ = static_cast<byte>(old_pk_extra_size);
+
+ rec_convert_dtuple_to_temp(
+ b + old_pk_extra_size, new_index,
+ old_pk->fields, old_pk->n_fields);
+ b += old_pk_size;
+ }
+
+ if (extra_size < 0x80) {
+ *b++ = static_cast<byte>(extra_size);
+ } else {
+ ut_ad(extra_size < 0x8000);
+ *b++ = static_cast<byte>(0x80 | (extra_size >> 8));
+ *b++ = static_cast<byte>(extra_size);
+ }
+
+ rec_convert_dtuple_to_temp(
+ b + extra_size, index, tuple->fields, tuple->n_fields);
+ b += size;
+
+ row_log_table_close(
+ index->online_log, b, mrec_size, avail_size);
+ }
+
+ mem_heap_free(heap);
+}
+
+/******************************************************//**
+Logs an insert or update to a table that is being rebuilt. */
+static __attribute__((nonnull(1,2,3)))
+void
+row_log_table_low(
+/*==============*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ bool insert, /*!< in: true if insert, false if update */
+ const dtuple_t* old_pk) /*!< in: old PRIMARY KEY value (if !insert
+ and a PRIMARY KEY is being created) */
+{
+ ulint omit_size;
+ ulint old_pk_size;
+ ulint old_pk_extra_size;
+ ulint extra_size;
+ ulint mrec_size;
+ ulint avail_size;
+ const dict_index_t* new_index = dict_table_get_first_index(
+ index->online_log->table);
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(dict_index_is_clust(new_index));
+ ut_ad(!dict_index_is_online_ddl(new_index));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
+ ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
+ || rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
+ ut_ad(page_is_leaf(page_align(rec)));
+ ut_ad(!page_is_comp(page_align(rec)) == !rec_offs_comp(offsets));
+
+ if (dict_index_is_corrupted(index)
+ || !dict_index_is_online_ddl(index)
+ || index->online_log->error != DB_SUCCESS) {
+ return;
+ }
+
+ if (!rec_offs_comp(offsets)) {
+ row_log_table_low_redundant(
+ rec, index, offsets, insert, old_pk, new_index);
+ return;
+ }
+
+ ut_ad(page_is_comp(page_align(rec)));
+ ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
+
+ omit_size = REC_N_NEW_EXTRA_BYTES;
+
+ extra_size = rec_offs_extra_size(offsets) - omit_size;
+
+ mrec_size = rec_offs_size(offsets) - omit_size
+ + ROW_LOG_HEADER_SIZE + (extra_size >= 0x80);
+
+ if (insert || index->online_log->same_pk) {
+ ut_ad(!old_pk);
+ old_pk_extra_size = old_pk_size = 0;
+ } else {
+ ut_ad(old_pk);
+ ut_ad(old_pk->n_fields == 2 + old_pk->n_fields_cmp);
+ ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field(
+ old_pk, old_pk->n_fields - 2)->len);
+ ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field(
+ old_pk, old_pk->n_fields - 1)->len);
+
+ old_pk_size = rec_get_converted_size_temp(
+ new_index, old_pk->fields, old_pk->n_fields,
+ &old_pk_extra_size);
+ ut_ad(old_pk_extra_size < 0x100);
+ mrec_size += 1/*old_pk_extra_size*/ + old_pk_size;
+ }
+
+ if (byte* b = row_log_table_open(index->online_log,
+ mrec_size, &avail_size)) {
+ *b++ = insert ? ROW_T_INSERT : ROW_T_UPDATE;
+
+ if (old_pk_size) {
+ *b++ = static_cast<byte>(old_pk_extra_size);
+
+ rec_convert_dtuple_to_temp(
+ b + old_pk_extra_size, new_index,
+ old_pk->fields, old_pk->n_fields);
+ b += old_pk_size;
+ }
+
+ if (extra_size < 0x80) {
+ *b++ = static_cast<byte>(extra_size);
+ } else {
+ ut_ad(extra_size < 0x8000);
+ *b++ = static_cast<byte>(0x80 | (extra_size >> 8));
+ *b++ = static_cast<byte>(extra_size);
+ }
+
+ memcpy(b, rec - rec_offs_extra_size(offsets), extra_size);
+ b += extra_size;
+ memcpy(b, rec, rec_offs_data_size(offsets));
+ b += rec_offs_data_size(offsets);
+
+ row_log_table_close(
+ index->online_log, b, mrec_size, avail_size);
+ }
+}
+
+/******************************************************//**
+Logs an update to a table that is being rebuilt.
+This will be merged in row_log_table_apply_update(). */
+UNIV_INTERN
+void
+row_log_table_update(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ const dtuple_t* old_pk) /*!< in: row_log_table_get_pk()
+ before the update */
+{
+ row_log_table_low(rec, index, offsets, false, old_pk);
+}
+
+/******************************************************//**
+Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
+of a table that is being rebuilt.
+@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
+or NULL if the PRIMARY KEY definition does not change */
+UNIV_INTERN
+const dtuple_t*
+row_log_table_get_pk(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
+ mem_heap_t** heap) /*!< in/out: memory heap where allocated */
+{
+ dtuple_t* tuple = NULL;
+ row_log_t* log = index->online_log;
+
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(dict_index_is_online_ddl(index));
+ ut_ad(!offsets || rec_offs_validate(rec, index, offsets));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
+ || rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ ut_ad(log);
+ ut_ad(log->table);
+
+ if (log->same_pk) {
+ /* The PRIMARY KEY columns are unchanged. */
+ return(NULL);
+ }
+
+ mutex_enter(&log->mutex);
+
+ /* log->error is protected by log->mutex. */
+ if (log->error == DB_SUCCESS) {
+ dict_table_t* new_table = log->table;
+ dict_index_t* new_index
+ = dict_table_get_first_index(new_table);
+ const ulint new_n_uniq
+ = dict_index_get_n_unique(new_index);
+
+ if (!*heap) {
+ ulint size = 0;
+
+ if (!offsets) {
+ size += (1 + REC_OFFS_HEADER_SIZE
+ + index->n_fields)
+ * sizeof *offsets;
+ }
+
+ for (ulint i = 0; i < new_n_uniq; i++) {
+ size += dict_col_get_min_size(
+ dict_index_get_nth_col(new_index, i));
+ }
+
+ *heap = mem_heap_create(
+ DTUPLE_EST_ALLOC(new_n_uniq + 2) + size);
+ }
+
+ if (!offsets) {
+ offsets = rec_get_offsets(rec, index, NULL,
+ ULINT_UNDEFINED, heap);
+ }
+
+ tuple = dtuple_create(*heap, new_n_uniq + 2);
+ dict_index_copy_types(tuple, new_index, tuple->n_fields);
+ dtuple_set_n_fields_cmp(tuple, new_n_uniq);
+
+ for (ulint new_i = 0; new_i < new_n_uniq; new_i++) {
+ dict_field_t* ifield;
+ dfield_t* dfield;
+ const dict_col_t* new_col;
+ const dict_col_t* col;
+ ulint col_no;
+ ulint i;
+ ulint len;
+ const byte* field;
+
+ ifield = dict_index_get_nth_field(new_index, new_i);
+ dfield = dtuple_get_nth_field(tuple, new_i);
+ new_col = dict_field_get_col(ifield);
+ col_no = new_col->ind;
+
+ for (ulint old_i = 0; old_i < index->table->n_cols;
+ old_i++) {
+ if (col_no == log->col_map[old_i]) {
+ col_no = old_i;
+ goto copy_col;
+ }
+ }
+
+ /* No matching column was found in the old
+ table, so this must be an added column.
+ Copy the default value. */
+ ut_ad(log->add_cols);
+ dfield_copy(dfield,
+ dtuple_get_nth_field(
+ log->add_cols, col_no));
+ continue;
+
+copy_col:
+ col = dict_table_get_nth_col(index->table, col_no);
+
+ i = dict_col_get_clust_pos(col, index);
+
+ if (i == ULINT_UNDEFINED) {
+ ut_ad(0);
+ log->error = DB_CORRUPTION;
+ tuple = NULL;
+ goto func_exit;
+ }
+
+ field = rec_get_nth_field(rec, offsets, i, &len);
+
+ if (len == UNIV_SQL_NULL) {
+ log->error = DB_INVALID_NULL;
+ tuple = NULL;
+ goto func_exit;
+ }
+
+ if (rec_offs_nth_extern(offsets, i)) {
+ ulint field_len = ifield->prefix_len;
+ byte* blob_field;
+ const ulint max_len =
+ DICT_MAX_FIELD_LEN_BY_FORMAT(
+ new_table);
+
+ if (!field_len) {
+ field_len = ifield->fixed_len;
+ if (!field_len) {
+ field_len = max_len + 1;
+ }
+ }
+
+ blob_field = static_cast<byte*>(
+ mem_heap_alloc(*heap, field_len));
+
+ len = btr_copy_externally_stored_field_prefix(
+ blob_field, field_len,
+ dict_table_zip_size(index->table),
+ field, len);
+ if (len == max_len + 1) {
+ log->error = DB_TOO_BIG_INDEX_COL;
+ tuple = NULL;
+ goto func_exit;
+ }
+
+ dfield_set_data(dfield, blob_field, len);
+ } else {
+ if (ifield->prefix_len
+ && ifield->prefix_len < len) {
+ len = ifield->prefix_len;
+ }
+
+ dfield_set_data(
+ dfield,
+ mem_heap_dup(*heap, field, len), len);
+ }
+ }
+
+ const byte* trx_roll = rec
+ + row_get_trx_id_offset(index, offsets);
+
+ dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq),
+ trx_roll, DATA_TRX_ID_LEN);
+ dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq + 1),
+ trx_roll + DATA_TRX_ID_LEN, DATA_ROLL_PTR_LEN);
+ }
+
+func_exit:
+ mutex_exit(&log->mutex);
+ return(tuple);
+}
+
+/******************************************************//**
+Logs an insert to a table that is being rebuilt.
+This will be merged in row_log_table_apply_insert(). */
+UNIV_INTERN
+void
+row_log_table_insert(
+/*=================*/
+ const rec_t* rec, /*!< in: clustered index leaf page record,
+ page X-latched */
+ dict_index_t* index, /*!< in/out: clustered index, S-latched
+ or X-latched */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec,index) */
+{
+ row_log_table_low(rec, index, offsets, true, NULL);
+}
+
+/******************************************************//**
+Notes that a transaction is being rolled back. */
+UNIV_INTERN
+void
+row_log_table_rollback(
+/*===================*/
+ dict_index_t* index, /*!< in/out: clustered index */
+ trx_id_t trx_id) /*!< in: transaction being rolled back */
+{
+ ut_ad(dict_index_is_clust(index));
+#ifdef UNIV_DEBUG
+ ibool corrupt = FALSE;
+ ut_ad(trx_rw_is_active(trx_id, &corrupt));
+ ut_ad(!corrupt);
+#endif /* UNIV_DEBUG */
+
+ /* Protect transitions of index->online_status and access to
+ index->online_log. */
+ rw_lock_s_lock(&index->lock);
+
+ if (dict_index_is_online_ddl(index)) {
+ ut_ad(index->online_log);
+ ut_ad(index->online_log->table);
+ mutex_enter(&index->online_log->mutex);
+ trx_id_set* trxs = index->online_log->trx_rb;
+
+ if (!trxs) {
+ index->online_log->trx_rb = trxs = new trx_id_set();
+ }
+
+ trxs->insert(trx_id);
+
+ mutex_exit(&index->online_log->mutex);
+ }
+
+ rw_lock_s_unlock(&index->lock);
+}
+
+/******************************************************//**
+Check if a transaction rollback has been initiated.
+@return true if inserts of this transaction were rolled back */
+UNIV_INTERN
+bool
+row_log_table_is_rollback(
+/*======================*/
+ const dict_index_t* index, /*!< in: clustered index */
+ trx_id_t trx_id) /*!< in: transaction id */
+{
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(dict_index_is_online_ddl(index));
+ ut_ad(index->online_log);
+
+ if (const trx_id_set* trxs = index->online_log->trx_rb) {
+ mutex_enter(&index->online_log->mutex);
+ bool is_rollback = trxs->find(trx_id) != trxs->end();
+ mutex_exit(&index->online_log->mutex);
+
+ return(is_rollback);
+ }
+
+ return(false);
+}
+
+/******************************************************//**
+Converts a log record to a table row.
+@return converted row, or NULL if the conversion fails
+or the transaction has been rolled back */
+static __attribute__((nonnull, warn_unused_result))
+const dtuple_t*
+row_log_table_apply_convert_mrec(
+/*=============================*/
+ const mrec_t* mrec, /*!< in: merge record */
+ dict_index_t* index, /*!< in: index of mrec */
+ const ulint* offsets, /*!< in: offsets of mrec */
+ const row_log_t* log, /*!< in: rebuild context */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ trx_id_t trx_id, /*!< in: DB_TRX_ID of mrec */
+ dberr_t* error) /*!< out: DB_SUCCESS or
+ reason of failure */
+{
+ dtuple_t* row;
+
+#ifdef UNIV_SYNC_DEBUG
+ /* This prevents BLOBs from being freed, in case an insert
+ transaction rollback starts after row_log_table_is_rollback(). */
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ if (row_log_table_is_rollback(index, trx_id)) {
+ row = NULL;
+ goto func_exit;
+ }
+
+ /* This is based on row_build(). */
+ if (log->add_cols) {
+ row = dtuple_copy(log->add_cols, heap);
+ /* dict_table_copy_types() would set the fields to NULL */
+ for (ulint i = 0; i < dict_table_get_n_cols(log->table); i++) {
+ dict_col_copy_type(
+ dict_table_get_nth_col(log->table, i),
+ dfield_get_type(dtuple_get_nth_field(row, i)));
+ }
+ } else {
+ row = dtuple_create(heap, dict_table_get_n_cols(log->table));
+ dict_table_copy_types(row, log->table);
+ }
+
+ for (ulint i = 0; i < rec_offs_n_fields(offsets); i++) {
+ const dict_field_t* ind_field
+ = dict_index_get_nth_field(index, i);
+
+ if (ind_field->prefix_len) {
+ /* Column prefixes can only occur in key
+ fields, which cannot be stored externally. For
+ a column prefix, there should also be the full
+ field in the clustered index tuple. The row
+ tuple comprises full fields, not prefixes. */
+ ut_ad(!rec_offs_nth_extern(offsets, i));
+ continue;
+ }
+
+ const dict_col_t* col
+ = dict_field_get_col(ind_field);
+ ulint col_no
+ = log->col_map[dict_col_get_no(col)];
+
+ if (col_no == ULINT_UNDEFINED) {
+ /* dropped column */
+ continue;
+ }
+
+ dfield_t* dfield
+ = dtuple_get_nth_field(row, col_no);
+ ulint len;
+ const void* data;
+
+ if (rec_offs_nth_extern(offsets, i)) {
+ ut_ad(rec_offs_any_extern(offsets));
+ data = btr_rec_copy_externally_stored_field(
+ mrec, offsets,
+ dict_table_zip_size(index->table),
+ i, &len, heap);
+ ut_a(data);
+ } else {
+ data = rec_get_nth_field(mrec, offsets, i, &len);
+ }
+
+ dfield_set_data(dfield, data, len);
+
+ /* See if any columns were changed to NULL or NOT NULL. */
+ const dict_col_t* new_col
+ = dict_table_get_nth_col(log->table, col_no);
+ ut_ad(new_col->mtype == col->mtype);
+
+ /* Assert that prtype matches except for nullability. */
+ ut_ad(!((new_col->prtype ^ col->prtype) & ~DATA_NOT_NULL));
+ ut_ad(!((new_col->prtype ^ dfield_get_type(dfield)->prtype)
+ & ~DATA_NOT_NULL));
+
+ if (new_col->prtype == col->prtype) {
+ continue;
+ }
+
+ if ((new_col->prtype & DATA_NOT_NULL)
+ && dfield_is_null(dfield)) {
+ /* We got a NULL value for a NOT NULL column. */
+ *error = DB_INVALID_NULL;
+ return(NULL);
+ }
+
+ /* Adjust the DATA_NOT_NULL flag in the parsed row. */
+ dfield_get_type(dfield)->prtype = new_col->prtype;
+
+ ut_ad(dict_col_type_assert_equal(new_col,
+ dfield_get_type(dfield)));
+ }
+
+func_exit:
+ *error = DB_SUCCESS;
+ return(row);
+}
+
+/******************************************************//**
+Replays an insert operation on a table that was rebuilt.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_log_table_apply_insert_low(
+/*===========================*/
+ que_thr_t* thr, /*!< in: query graph */
+ const dtuple_t* row, /*!< in: table row
+ in the old table definition */
+ trx_id_t trx_id, /*!< in: trx_id of the row */
+ mem_heap_t* offsets_heap, /*!< in/out: memory heap
+ that can be emptied */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ row_merge_dup_t* dup) /*!< in/out: for reporting
+ duplicate key errors */
+{
+ dberr_t error;
+ dtuple_t* entry;
+ const row_log_t*log = dup->index->online_log;
+ dict_index_t* index = dict_table_get_first_index(log->table);
+
+ ut_ad(dtuple_validate(row));
+ ut_ad(trx_id);
+
+#ifdef ROW_LOG_APPLY_PRINT
+ if (row_log_apply_print) {
+ fprintf(stderr, "table apply insert "
+ IB_ID_FMT " " IB_ID_FMT "\n",
+ index->table->id, index->id);
+ dtuple_print(stderr, row);
+ }
+#endif /* ROW_LOG_APPLY_PRINT */
+
+ static const ulint flags
+ = (BTR_CREATE_FLAG
+ | BTR_NO_LOCKING_FLAG
+ | BTR_NO_UNDO_LOG_FLAG
+ | BTR_KEEP_SYS_FLAG);
+
+ entry = row_build_index_entry(row, NULL, index, heap);
+
+ error = row_ins_clust_index_entry_low(
+ flags, BTR_MODIFY_TREE, index, index->n_uniq, entry, 0, thr);
+
+ switch (error) {
+ case DB_SUCCESS:
+ break;
+ case DB_SUCCESS_LOCKED_REC:
+ /* The row had already been copied to the table. */
+ return(DB_SUCCESS);
+ default:
+ return(error);
+ }
+
+ do {
+ if (!(index = dict_table_get_next_index(index))) {
+ break;
+ }
+
+ if (index->type & DICT_FTS) {
+ continue;
+ }
+
+ entry = row_build_index_entry(row, NULL, index, heap);
+ error = row_ins_sec_index_entry_low(
+ flags, BTR_MODIFY_TREE,
+ index, offsets_heap, heap, entry, trx_id, thr);
+ } while (error == DB_SUCCESS);
+
+ return(error);
+}
+
+/******************************************************//**
+Replays an insert operation on a table that was rebuilt.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_log_table_apply_insert(
+/*=======================*/
+ que_thr_t* thr, /*!< in: query graph */
+ const mrec_t* mrec, /*!< in: record to insert */
+ const ulint* offsets, /*!< in: offsets of mrec */
+ mem_heap_t* offsets_heap, /*!< in/out: memory heap
+ that can be emptied */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ row_merge_dup_t* dup, /*!< in/out: for reporting
+ duplicate key errors */
+ trx_id_t trx_id) /*!< in: DB_TRX_ID of mrec */
+{
+ const row_log_t*log = dup->index->online_log;
+ dberr_t error;
+ const dtuple_t* row = row_log_table_apply_convert_mrec(
+ mrec, dup->index, offsets, log, heap, trx_id, &error);
+
+ ut_ad(error == DB_SUCCESS || !row);
+ /* Handling of duplicate key error requires storing
+ of offending key in a record buffer. */
+ ut_ad(error != DB_DUPLICATE_KEY);
+
+ if (error != DB_SUCCESS)
+ return(error);
+
+ if (row) {
+ error = row_log_table_apply_insert_low(
+ thr, row, trx_id, offsets_heap, heap, dup);
+ if (error != DB_SUCCESS) {
+ /* Report the erroneous row using the new
+ version of the table. */
+ innobase_row_to_mysql(dup->table, log->table, row);
+ }
+ }
+ return(error);
+}
+
+/******************************************************//**
+Deletes a record from a table that is being rebuilt.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull(1, 2, 4, 5), warn_unused_result))
+dberr_t
+row_log_table_apply_delete_low(
+/*===========================*/
+ btr_pcur_t* pcur, /*!< in/out: B-tree cursor,
+ will be trashed */
+ const ulint* offsets, /*!< in: offsets on pcur */
+ const row_ext_t* save_ext, /*!< in: saved external field
+ info, or NULL */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ mtr_t* mtr) /*!< in/out: mini-transaction,
+ will be committed */
+{
+ dberr_t error;
+ row_ext_t* ext;
+ dtuple_t* row;
+ dict_index_t* index = btr_pcur_get_btr_cur(pcur)->index;
+
+ ut_ad(dict_index_is_clust(index));
+
+#ifdef ROW_LOG_APPLY_PRINT
+ if (row_log_apply_print) {
+ fprintf(stderr, "table apply delete "
+ IB_ID_FMT " " IB_ID_FMT "\n",
+ index->table->id, index->id);
+ rec_print_new(stderr, btr_pcur_get_rec(pcur), offsets);
+ }
+#endif /* ROW_LOG_APPLY_PRINT */
+ if (dict_table_get_next_index(index)) {
+ /* Build a row template for purging secondary index entries. */
+ row = row_build(
+ ROW_COPY_DATA, index, btr_pcur_get_rec(pcur),
+ offsets, NULL, NULL, NULL,
+ save_ext ? NULL : &ext, heap);
+ if (!save_ext) {
+ save_ext = ext;
+ }
+ } else {
+ row = NULL;
+ }
+
+ btr_cur_pessimistic_delete(&error, FALSE, btr_pcur_get_btr_cur(pcur),
+ BTR_CREATE_FLAG, RB_NONE, mtr);
+ mtr_commit(mtr);
+
+ if (error != DB_SUCCESS) {
+ return(error);
+ }
+
+ while ((index = dict_table_get_next_index(index)) != NULL) {
+ if (index->type & DICT_FTS) {
+ continue;
+ }
+
+ const dtuple_t* entry = row_build_index_entry(
+ row, save_ext, index, heap);
+ mtr_start(mtr);
+ btr_pcur_open(index, entry, PAGE_CUR_LE,
+ BTR_MODIFY_TREE, pcur, mtr);
+#ifdef UNIV_DEBUG
+ switch (btr_pcur_get_btr_cur(pcur)->flag) {
+ case BTR_CUR_DELETE_REF:
+ case BTR_CUR_DEL_MARK_IBUF:
+ case BTR_CUR_DELETE_IBUF:
+ case BTR_CUR_INSERT_TO_IBUF:
+ /* We did not request buffering. */
+ break;
+ case BTR_CUR_HASH:
+ case BTR_CUR_HASH_FAIL:
+ case BTR_CUR_BINARY:
+ goto flag_ok;
+ }
+ ut_ad(0);
+flag_ok:
+#endif /* UNIV_DEBUG */
+
+ if (page_rec_is_infimum(btr_pcur_get_rec(pcur))
+ || btr_pcur_get_low_match(pcur) < index->n_uniq) {
+ /* All secondary index entries should be
+ found, because new_table is being modified by
+ this thread only, and all indexes should be
+ updated in sync. */
+ mtr_commit(mtr);
+ return(DB_INDEX_CORRUPT);
+ }
+
+ btr_cur_pessimistic_delete(&error, FALSE,
+ btr_pcur_get_btr_cur(pcur),
+ BTR_CREATE_FLAG, RB_NONE, mtr);
+ mtr_commit(mtr);
+ }
+
+ return(error);
+}
+
+/******************************************************//**
+Replays a delete operation on a table that was rebuilt.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull(1, 3, 4, 5, 6, 7), warn_unused_result))
+dberr_t
+row_log_table_apply_delete(
+/*=======================*/
+ que_thr_t* thr, /*!< in: query graph */
+ ulint trx_id_col, /*!< in: position of
+ DB_TRX_ID in the new
+ clustered index */
+ const mrec_t* mrec, /*!< in: merge record */
+ const ulint* moffsets, /*!< in: offsets of mrec */
+ mem_heap_t* offsets_heap, /*!< in/out: memory heap
+ that can be emptied */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ dict_table_t* new_table, /*!< in: rebuilt table */
+ const row_ext_t* save_ext) /*!< in: saved external field
+ info, or NULL */
+{
+ dict_index_t* index = dict_table_get_first_index(new_table);
+ dtuple_t* old_pk;
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ ulint* offsets;
+
+ ut_ad(rec_offs_n_fields(moffsets)
+ == dict_index_get_n_unique(index) + 1);
+ ut_ad(!rec_offs_any_extern(moffsets));
+
+ /* Convert the row to a search tuple. */
+ old_pk = dtuple_create(heap, index->n_uniq + 1);
+ dict_index_copy_types(old_pk, index, old_pk->n_fields);
+ dtuple_set_n_fields_cmp(old_pk, index->n_uniq);
+
+ for (ulint i = 0; i <= index->n_uniq; i++) {
+ ulint len;
+ const void* field;
+ field = rec_get_nth_field(mrec, moffsets, i, &len);
+ ut_ad(len != UNIV_SQL_NULL);
+ dfield_set_data(dtuple_get_nth_field(old_pk, i),
+ field, len);
+ }
+
+ mtr_start(&mtr);
+ btr_pcur_open(index, old_pk, PAGE_CUR_LE,
+ BTR_MODIFY_TREE, &pcur, &mtr);
+#ifdef UNIV_DEBUG
+ switch (btr_pcur_get_btr_cur(&pcur)->flag) {
+ case BTR_CUR_DELETE_REF:
+ case BTR_CUR_DEL_MARK_IBUF:
+ case BTR_CUR_DELETE_IBUF:
+ case BTR_CUR_INSERT_TO_IBUF:
+ /* We did not request buffering. */
+ break;
+ case BTR_CUR_HASH:
+ case BTR_CUR_HASH_FAIL:
+ case BTR_CUR_BINARY:
+ goto flag_ok;
+ }
+ ut_ad(0);
+flag_ok:
+#endif /* UNIV_DEBUG */
+
+ if (page_rec_is_infimum(btr_pcur_get_rec(&pcur))
+ || btr_pcur_get_low_match(&pcur) < index->n_uniq) {
+all_done:
+ mtr_commit(&mtr);
+ /* The record was not found. All done. */
+ return(DB_SUCCESS);
+ }
+
+ offsets = rec_get_offsets(btr_pcur_get_rec(&pcur), index, NULL,
+ ULINT_UNDEFINED, &offsets_heap);
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ ut_a(!rec_offs_any_null_extern(btr_pcur_get_rec(&pcur), offsets));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+
+ /* Only remove the record if DB_TRX_ID matches what was
+ buffered. */
+
+ {
+ ulint len;
+ const void* mrec_trx_id
+ = rec_get_nth_field(mrec, moffsets, trx_id_col, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ const void* rec_trx_id
+ = rec_get_nth_field(btr_pcur_get_rec(&pcur), offsets,
+ trx_id_col, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ if (memcmp(mrec_trx_id, rec_trx_id, DATA_TRX_ID_LEN)) {
+ goto all_done;
+ }
+ }
+
+ return(row_log_table_apply_delete_low(&pcur, offsets, save_ext,
+ heap, &mtr));
+}
+
+/******************************************************//**
+Replays an update operation on a table that was rebuilt.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_log_table_apply_update(
+/*=======================*/
+ que_thr_t* thr, /*!< in: query graph */
+ ulint trx_id_col, /*!< in: position of
+ DB_TRX_ID in the
+ old clustered index */
+ ulint new_trx_id_col, /*!< in: position of
+ DB_TRX_ID in the new
+ clustered index */
+ const mrec_t* mrec, /*!< in: new value */
+ const ulint* offsets, /*!< in: offsets of mrec */
+ mem_heap_t* offsets_heap, /*!< in/out: memory heap
+ that can be emptied */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ row_merge_dup_t* dup, /*!< in/out: for reporting
+ duplicate key errors */
+ trx_id_t trx_id, /*!< in: DB_TRX_ID of mrec */
+ const dtuple_t* old_pk) /*!< in: PRIMARY KEY and
+ DB_TRX_ID,DB_ROLL_PTR
+ of the old value,
+ or PRIMARY KEY if same_pk */
+{
+ const row_log_t*log = dup->index->online_log;
+ const dtuple_t* row;
+ dict_index_t* index = dict_table_get_first_index(log->table);
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ dberr_t error;
+
+ ut_ad(dtuple_get_n_fields_cmp(old_pk)
+ == dict_index_get_n_unique(index));
+ ut_ad(dtuple_get_n_fields(old_pk)
+ == dict_index_get_n_unique(index)
+ + (dup->index->online_log->same_pk ? 0 : 2));
+
+ row = row_log_table_apply_convert_mrec(
+ mrec, dup->index, offsets, log, heap, trx_id, &error);
+
+ ut_ad(error == DB_SUCCESS || !row);
+ /* Handling of duplicate key error requires storing
+ of offending key in a record buffer. */
+ ut_ad(error != DB_DUPLICATE_KEY);
+
+ if (!row) {
+ return(error);
+ }
+
+ mtr_start(&mtr);
+ btr_pcur_open(index, old_pk, PAGE_CUR_LE,
+ BTR_MODIFY_TREE, &pcur, &mtr);
+#ifdef UNIV_DEBUG
+ switch (btr_pcur_get_btr_cur(&pcur)->flag) {
+ case BTR_CUR_DELETE_REF:
+ case BTR_CUR_DEL_MARK_IBUF:
+ case BTR_CUR_DELETE_IBUF:
+ case BTR_CUR_INSERT_TO_IBUF:
+ ut_ad(0);/* We did not request buffering. */
+ case BTR_CUR_HASH:
+ case BTR_CUR_HASH_FAIL:
+ case BTR_CUR_BINARY:
+ break;
+ }
+#endif /* UNIV_DEBUG */
+
+ if (page_rec_is_infimum(btr_pcur_get_rec(&pcur))
+ || btr_pcur_get_low_match(&pcur) < index->n_uniq) {
+ mtr_commit(&mtr);
+insert:
+ ut_ad(mtr.state == MTR_COMMITTED);
+ /* The row was not found. Insert it. */
+ error = row_log_table_apply_insert_low(
+ thr, row, trx_id, offsets_heap, heap, dup);
+ if (error != DB_SUCCESS) {
+err_exit:
+ /* Report the erroneous row using the new
+ version of the table. */
+ innobase_row_to_mysql(dup->table, log->table, row);
+ }
+
+ return(error);
+ }
+
+ /* Update the record. */
+ ulint* cur_offsets = rec_get_offsets(
+ btr_pcur_get_rec(&pcur),
+ index, NULL, ULINT_UNDEFINED, &offsets_heap);
+
+ dtuple_t* entry = row_build_index_entry(
+ row, NULL, index, heap);
+ const upd_t* update = row_upd_build_difference_binary(
+ index, entry, btr_pcur_get_rec(&pcur), cur_offsets,
+ false, NULL, heap);
+
+ error = DB_SUCCESS;
+
+ if (!update->n_fields) {
+ /* Nothing to do. */
+ goto func_exit;
+ }
+
+ if (rec_offs_any_extern(cur_offsets)) {
+ /* If the record contains any externally stored
+ columns, perform the update by delete and insert,
+ because we will not write any undo log that would
+ allow purge to free any orphaned externally stored
+ columns. */
+delete_insert:
+ error = row_log_table_apply_delete_low(
+ &pcur, cur_offsets, NULL, heap, &mtr);
+ ut_ad(mtr.state == MTR_COMMITTED);
+
+ if (error != DB_SUCCESS) {
+ goto err_exit;
+ }
+
+ goto insert;
+ }
+
+ if (upd_get_nth_field(update, 0)->field_no < new_trx_id_col) {
+ if (dup->index->online_log->same_pk) {
+ /* The ROW_T_UPDATE log record should only be
+ written when the PRIMARY KEY fields of the
+ record did not change in the old table. We
+ can only get a change of PRIMARY KEY columns
+ in the rebuilt table if the PRIMARY KEY was
+ redefined (!same_pk). */
+ ut_ad(0);
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
+
+ /* The PRIMARY KEY columns have changed.
+ Delete the record with the old PRIMARY KEY value,
+ provided that it carries the same
+ DB_TRX_ID,DB_ROLL_PTR. Then, insert the new row. */
+ ulint len;
+ const byte* cur_trx_roll = rec_get_nth_field(
+ mrec, offsets, trx_id_col, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ const dfield_t* new_trx_roll = dtuple_get_nth_field(
+ old_pk, new_trx_id_col);
+ /* We assume that DB_TRX_ID,DB_ROLL_PTR are stored
+ in one contiguous block. */
+ ut_ad(rec_get_nth_field(mrec, offsets, trx_id_col + 1, &len)
+ == cur_trx_roll + DATA_TRX_ID_LEN);
+ ut_ad(len == DATA_ROLL_PTR_LEN);
+ ut_ad(new_trx_roll->len == DATA_TRX_ID_LEN);
+ ut_ad(dtuple_get_nth_field(old_pk, new_trx_id_col + 1)
+ -> len == DATA_ROLL_PTR_LEN);
+ ut_ad(static_cast<const byte*>(
+ dtuple_get_nth_field(old_pk, new_trx_id_col + 1)
+ ->data)
+ == static_cast<const byte*>(new_trx_roll->data)
+ + DATA_TRX_ID_LEN);
+
+ if (!memcmp(cur_trx_roll, new_trx_roll->data,
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) {
+ /* The old row exists. Remove it. */
+ goto delete_insert;
+ }
+
+ /* Unless we called row_log_table_apply_delete_low(),
+ this will likely cause a duplicate key error. */
+ mtr_commit(&mtr);
+ goto insert;
+ }
+
+ dtuple_t* old_row;
+ row_ext_t* old_ext;
+
+ if (dict_table_get_next_index(index)) {
+ /* Construct the row corresponding to the old value of
+ the record. */
+ old_row = row_build(
+ ROW_COPY_DATA, index, btr_pcur_get_rec(&pcur),
+ cur_offsets, NULL, NULL, NULL, &old_ext, heap);
+ ut_ad(old_row);
+#ifdef ROW_LOG_APPLY_PRINT
+ if (row_log_apply_print) {
+ fprintf(stderr, "table apply update "
+ IB_ID_FMT " " IB_ID_FMT "\n",
+ index->table->id, index->id);
+ dtuple_print(stderr, old_row);
+ dtuple_print(stderr, row);
+ }
+#endif /* ROW_LOG_APPLY_PRINT */
+ } else {
+ old_row = NULL;
+ old_ext = NULL;
+ }
+
+ big_rec_t* big_rec;
+
+ error = btr_cur_pessimistic_update(
+ BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
+ | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG
+ | BTR_KEEP_POS_FLAG,
+ btr_pcur_get_btr_cur(&pcur),
+ &cur_offsets, &offsets_heap, heap, &big_rec,
+ update, 0, NULL, 0, &mtr);
+
+ if (big_rec) {
+ if (error == DB_SUCCESS) {
+ error = btr_store_big_rec_extern_fields(
+ index, btr_pcur_get_block(&pcur),
+ btr_pcur_get_rec(&pcur), cur_offsets,
+ big_rec, &mtr, BTR_STORE_UPDATE);
+ }
+
+ dtuple_big_rec_free(big_rec);
+ }
+
+ while ((index = dict_table_get_next_index(index)) != NULL) {
+ if (error != DB_SUCCESS) {
+ break;
+ }
+
+ if (index->type & DICT_FTS) {
+ continue;
+ }
+
+ if (!row_upd_changes_ord_field_binary(
+ index, update, thr, old_row, NULL)) {
+ continue;
+ }
+
+ mtr_commit(&mtr);
+
+ entry = row_build_index_entry(old_row, old_ext, index, heap);
+ if (!entry) {
+ ut_ad(0);
+ return(DB_CORRUPTION);
+ }
+
+ mtr_start(&mtr);
+
+ if (ROW_FOUND != row_search_index_entry(
+ index, entry, BTR_MODIFY_TREE, &pcur, &mtr)) {
+ ut_ad(0);
+ error = DB_CORRUPTION;
+ break;
+ }
+
+ btr_cur_pessimistic_delete(
+ &error, FALSE, btr_pcur_get_btr_cur(&pcur),
+ BTR_CREATE_FLAG, RB_NONE, &mtr);
+
+ if (error != DB_SUCCESS) {
+ break;
+ }
+
+ mtr_commit(&mtr);
+
+ entry = row_build_index_entry(row, NULL, index, heap);
+ error = row_ins_sec_index_entry_low(
+ BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
+ | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG,
+ BTR_MODIFY_TREE, index, offsets_heap, heap,
+ entry, trx_id, thr);
+
+ mtr_start(&mtr);
+ }
+
+func_exit:
+ mtr_commit(&mtr);
+ if (error != DB_SUCCESS) {
+ goto err_exit;
+ }
+
+ return(error);
+}
+
+/******************************************************//**
+Applies an operation to a table that was rebuilt.
+@return NULL on failure (mrec corruption) or when out of data;
+pointer to next record on success */
+static __attribute__((nonnull, warn_unused_result))
+const mrec_t*
+row_log_table_apply_op(
+/*===================*/
+ que_thr_t* thr, /*!< in: query graph */
+ ulint trx_id_col, /*!< in: position of
+ DB_TRX_ID in old index */
+ ulint new_trx_id_col, /*!< in: position of
+ DB_TRX_ID in new index */
+ row_merge_dup_t* dup, /*!< in/out: for reporting
+ duplicate key errors */
+ dberr_t* error, /*!< out: DB_SUCCESS
+ or error code */
+ mem_heap_t* offsets_heap, /*!< in/out: memory heap
+ that can be emptied */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ const mrec_t* mrec, /*!< in: merge record */
+ const mrec_t* mrec_end, /*!< in: end of buffer */
+ ulint* offsets) /*!< in/out: work area
+ for parsing mrec */
+{
+ const row_log_t*log = dup->index->online_log;
+ dict_index_t* new_index = dict_table_get_first_index(log->table);
+ ulint extra_size;
+ const mrec_t* next_mrec;
+ dtuple_t* old_pk;
+ row_ext_t* ext;
+ ulint ext_size;
+
+ ut_ad(dict_index_is_clust(dup->index));
+ ut_ad(dup->index->table != log->table);
+
+ *error = DB_SUCCESS;
+
+ /* 3 = 1 (op type) + 1 (ext_size) + at least 1 byte payload */
+ if (mrec + 3 >= mrec_end) {
+ return(NULL);
+ }
+
+ switch (*mrec++) {
+ default:
+ ut_ad(0);
+ *error = DB_CORRUPTION;
+ return(NULL);
+ case ROW_T_INSERT:
+ extra_size = *mrec++;
+
+ if (extra_size >= 0x80) {
+ /* Read another byte of extra_size. */
+
+ extra_size = (extra_size & 0x7f) << 8;
+ extra_size |= *mrec++;
+ }
+
+ mrec += extra_size;
+
+ if (mrec > mrec_end) {
+ return(NULL);
+ }
+
+ rec_offs_set_n_fields(offsets, dup->index->n_fields);
+ rec_init_offsets_temp(mrec, dup->index, offsets);
+
+ next_mrec = mrec + rec_offs_data_size(offsets);
+
+ if (next_mrec > mrec_end) {
+ return(NULL);
+ } else {
+ ulint len;
+ const byte* db_trx_id
+ = rec_get_nth_field(
+ mrec, offsets, trx_id_col, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ *error = row_log_table_apply_insert(
+ thr, mrec, offsets, offsets_heap,
+ heap, dup, trx_read_trx_id(db_trx_id));
+ }
+ break;
+
+ case ROW_T_DELETE:
+ /* 1 (extra_size) + 2 (ext_size) + at least 1 (payload) */
+ if (mrec + 4 >= mrec_end) {
+ return(NULL);
+ }
+
+ extra_size = *mrec++;
+ ext_size = mach_read_from_2(mrec);
+ mrec += 2;
+ ut_ad(mrec < mrec_end);
+
+ /* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
+ For fixed-length PRIMARY key columns, it is 0. */
+ mrec += extra_size;
+
+ rec_offs_set_n_fields(offsets, new_index->n_uniq + 1);
+ rec_init_offsets_temp(mrec, new_index, offsets);
+ next_mrec = mrec + rec_offs_data_size(offsets) + ext_size;
+ if (next_mrec > mrec_end) {
+ return(NULL);
+ }
+
+ /* If there are external fields, retrieve those logged
+ prefix info and reconstruct the row_ext_t */
+ if (ext_size) {
+ /* We use memcpy to avoid unaligned
+ access on some non-x86 platforms.*/
+ ext = static_cast<row_ext_t*>(
+ mem_heap_dup(heap,
+ mrec + rec_offs_data_size(offsets),
+ ext_size));
+
+ byte* ext_start = reinterpret_cast<byte*>(ext);
+
+ ulint ext_len = sizeof(*ext)
+ + (ext->n_ext - 1) * sizeof ext->len;
+
+ ext->ext = reinterpret_cast<ulint*>(ext_start + ext_len);
+ ext_len += ext->n_ext * sizeof(*ext->ext);
+
+ ext->buf = static_cast<byte*>(ext_start + ext_len);
+ } else {
+ ext = NULL;
+ }
+
+ *error = row_log_table_apply_delete(
+ thr, new_trx_id_col,
+ mrec, offsets, offsets_heap, heap,
+ log->table, ext);
+ break;
+
+ case ROW_T_UPDATE:
+ /* Logically, the log entry consists of the
+ (PRIMARY KEY,DB_TRX_ID) of the old value (converted
+ to the new primary key definition) followed by
+ the new value in the old table definition. If the
+ definition of the columns belonging to PRIMARY KEY
+ is not changed, the log will only contain
+ DB_TRX_ID,new_row. */
+
+ if (dup->index->online_log->same_pk) {
+ ut_ad(new_index->n_uniq == dup->index->n_uniq);
+
+ extra_size = *mrec++;
+
+ if (extra_size >= 0x80) {
+ /* Read another byte of extra_size. */
+
+ extra_size = (extra_size & 0x7f) << 8;
+ extra_size |= *mrec++;
+ }
+
+ mrec += extra_size;
+
+ if (mrec > mrec_end) {
+ return(NULL);
+ }
+
+ rec_offs_set_n_fields(offsets, dup->index->n_fields);
+ rec_init_offsets_temp(mrec, dup->index, offsets);
+
+ next_mrec = mrec + rec_offs_data_size(offsets);
+
+ if (next_mrec > mrec_end) {
+ return(NULL);
+ }
+
+ old_pk = dtuple_create(heap, new_index->n_uniq);
+ dict_index_copy_types(
+ old_pk, new_index, old_pk->n_fields);
+
+ /* Copy the PRIMARY KEY fields from mrec to old_pk. */
+ for (ulint i = 0; i < new_index->n_uniq; i++) {
+ const void* field;
+ ulint len;
+ dfield_t* dfield;
+
+ ut_ad(!rec_offs_nth_extern(offsets, i));
+
+ field = rec_get_nth_field(
+ mrec, offsets, i, &len);
+ ut_ad(len != UNIV_SQL_NULL);
+
+ dfield = dtuple_get_nth_field(old_pk, i);
+ dfield_set_data(dfield, field, len);
+ }
+ } else {
+ /* We assume extra_size < 0x100
+ for the PRIMARY KEY prefix. */
+ mrec += *mrec + 1;
+
+ if (mrec > mrec_end) {
+ return(NULL);
+ }
+
+ /* Get offsets for PRIMARY KEY,
+ DB_TRX_ID, DB_ROLL_PTR. */
+ rec_offs_set_n_fields(offsets, new_index->n_uniq + 2);
+ rec_init_offsets_temp(mrec, new_index, offsets);
+
+ next_mrec = mrec + rec_offs_data_size(offsets);
+ if (next_mrec + 2 > mrec_end) {
+ return(NULL);
+ }
+
+ /* Copy the PRIMARY KEY fields and
+ DB_TRX_ID, DB_ROLL_PTR from mrec to old_pk. */
+ old_pk = dtuple_create(heap, new_index->n_uniq + 2);
+ dict_index_copy_types(old_pk, new_index,
+ old_pk->n_fields);
+
+ for (ulint i = 0;
+ i < dict_index_get_n_unique(new_index) + 2;
+ i++) {
+ const void* field;
+ ulint len;
+ dfield_t* dfield;
+
+ ut_ad(!rec_offs_nth_extern(offsets, i));
+
+ field = rec_get_nth_field(
+ mrec, offsets, i, &len);
+ ut_ad(len != UNIV_SQL_NULL);
+
+ dfield = dtuple_get_nth_field(old_pk, i);
+ dfield_set_data(dfield, field, len);
+ }
+
+ mrec = next_mrec;
+
+ /* Fetch the new value of the row as it was
+ in the old table definition. */
+ extra_size = *mrec++;
+
+ if (extra_size >= 0x80) {
+ /* Read another byte of extra_size. */
+
+ extra_size = (extra_size & 0x7f) << 8;
+ extra_size |= *mrec++;
+ }
+
+ mrec += extra_size;
+
+ if (mrec > mrec_end) {
+ return(NULL);
+ }
+
+ rec_offs_set_n_fields(offsets, dup->index->n_fields);
+ rec_init_offsets_temp(mrec, dup->index, offsets);
+
+ next_mrec = mrec + rec_offs_data_size(offsets);
+
+ if (next_mrec > mrec_end) {
+ return(NULL);
+ }
+ }
+
+ ut_ad(next_mrec <= mrec_end);
+ dtuple_set_n_fields_cmp(old_pk, new_index->n_uniq);
+
+ {
+ ulint len;
+ const byte* db_trx_id
+ = rec_get_nth_field(
+ mrec, offsets, trx_id_col, &len);
+ ut_ad(len == DATA_TRX_ID_LEN);
+ *error = row_log_table_apply_update(
+ thr, trx_id_col, new_trx_id_col,
+ mrec, offsets, offsets_heap,
+ heap, dup, trx_read_trx_id(db_trx_id), old_pk);
+ }
+
+ break;
+ }
+
+ mem_heap_empty(offsets_heap);
+ mem_heap_empty(heap);
+ return(next_mrec);
+}
+
+/******************************************************//**
+Applies operations to a table was rebuilt.
+@return DB_SUCCESS, or error code on failure */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_log_table_apply_ops(
+/*====================*/
+ que_thr_t* thr, /*!< in: query graph */
+ row_merge_dup_t*dup) /*!< in/out: for reporting duplicate key
+ errors */
+{
+ dberr_t error;
+ const mrec_t* mrec = NULL;
+ const mrec_t* next_mrec;
+ const mrec_t* mrec_end = NULL; /* silence bogus warning */
+ const mrec_t* next_mrec_end;
+ mem_heap_t* heap;
+ mem_heap_t* offsets_heap;
+ ulint* offsets;
+ bool has_index_lock;
+ dict_index_t* index = const_cast<dict_index_t*>(
+ dup->index);
+ dict_table_t* new_table = index->online_log->table;
+ dict_index_t* new_index = dict_table_get_first_index(
+ new_table);
+ const ulint i = 1 + REC_OFFS_HEADER_SIZE
+ + ut_max(dict_index_get_n_fields(index),
+ dict_index_get_n_unique(new_index) + 2);
+ const ulint trx_id_col = dict_col_get_clust_pos(
+ dict_table_get_sys_col(index->table, DATA_TRX_ID), index);
+ const ulint new_trx_id_col = dict_col_get_clust_pos(
+ dict_table_get_sys_col(new_table, DATA_TRX_ID), new_index);
+ trx_t* trx = thr_get_trx(thr);
+
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(dict_index_is_online_ddl(index));
+ ut_ad(trx->mysql_thd);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!dict_index_is_online_ddl(new_index));
+ ut_ad(trx_id_col > 0);
+ ut_ad(trx_id_col != ULINT_UNDEFINED);
+ ut_ad(new_trx_id_col > 0);
+ ut_ad(new_trx_id_col != ULINT_UNDEFINED);
+
+ UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end);
+
+ offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets));
+ offsets[0] = i;
+ offsets[1] = dict_index_get_n_fields(index);
+
+ heap = mem_heap_create(UNIV_PAGE_SIZE);
+ offsets_heap = mem_heap_create(UNIV_PAGE_SIZE);
+ has_index_lock = true;
+
+next_block:
+ ut_ad(has_index_lock);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(index->online_log->head.bytes == 0);
+
+ if (trx_is_interrupted(trx)) {
+ goto interrupted;
+ }
+
+ if (dict_index_is_corrupted(index)) {
+ error = DB_INDEX_CORRUPT;
+ goto func_exit;
+ }
+
+ ut_ad(dict_index_is_online_ddl(index));
+
+ error = index->online_log->error;
+
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(index->online_log->head.blocks
+ > index->online_log->tail.blocks)) {
+unexpected_eof:
+ fprintf(stderr, "InnoDB: unexpected end of temporary file"
+ " for table %s\n", index->table_name);
+corruption:
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
+
+ if (index->online_log->head.blocks
+ == index->online_log->tail.blocks) {
+ if (index->online_log->head.blocks) {
+#ifdef HAVE_FTRUNCATE
+ /* Truncate the file in order to save space. */
+ ftruncate(index->online_log->fd, 0);
+#endif /* HAVE_FTRUNCATE */
+ index->online_log->head.blocks
+ = index->online_log->tail.blocks = 0;
+ }
+
+ next_mrec = index->online_log->tail.block;
+ next_mrec_end = next_mrec + index->online_log->tail.bytes;
+
+ if (next_mrec_end == next_mrec) {
+ /* End of log reached. */
+all_done:
+ ut_ad(has_index_lock);
+ ut_ad(index->online_log->head.blocks == 0);
+ ut_ad(index->online_log->tail.blocks == 0);
+ index->online_log->head.bytes = 0;
+ index->online_log->tail.bytes = 0;
+ error = DB_SUCCESS;
+ goto func_exit;
+ }
+ } else {
+ os_offset_t ofs;
+ ibool success;
+
+ ofs = (os_offset_t) index->online_log->head.blocks
+ * srv_sort_buf_size;
+
+ ut_ad(has_index_lock);
+ has_index_lock = false;
+ rw_lock_x_unlock(dict_index_get_lock(index));
+
+ log_free_check();
+
+ ut_ad(dict_index_is_online_ddl(index));
+
+ success = os_file_read_no_error_handling(
+ OS_FILE_FROM_FD(index->online_log->fd),
+ index->online_log->head.block, ofs,
+ srv_sort_buf_size);
+
+ if (!success) {
+ fprintf(stderr, "InnoDB: unable to read temporary file"
+ " for table %s\n", index->table_name);
+ goto corruption;
+ }
+
+#ifdef POSIX_FADV_DONTNEED
+ /* Each block is read exactly once. Free up the file cache. */
+ posix_fadvise(index->online_log->fd,
+ ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
+#endif /* POSIX_FADV_DONTNEED */
+#ifdef FALLOC_FL_PUNCH_HOLE
+ /* Try to deallocate the space for the file on disk.
+ This should work on ext4 on Linux 2.6.39 and later,
+ and be ignored when the operation is unsupported. */
+ fallocate(index->online_log->fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ ofs, srv_buf_size);
+#endif /* FALLOC_FL_PUNCH_HOLE */
+
+ next_mrec = index->online_log->head.block;
+ next_mrec_end = next_mrec + srv_sort_buf_size;
+ }
+
+ /* This read is not protected by index->online_log->mutex for
+ performance reasons. We will eventually notice any error that
+ was flagged by a DML thread. */
+ error = index->online_log->error;
+
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ }
+
+ if (mrec) {
+ /* A partial record was read from the previous block.
+ Copy the temporary buffer full, as we do not know the
+ length of the record. Parse subsequent records from
+ the bigger buffer index->online_log->head.block
+ or index->online_log->tail.block. */
+
+ ut_ad(mrec == index->online_log->head.buf);
+ ut_ad(mrec_end > mrec);
+ ut_ad(mrec_end < (&index->online_log->head.buf)[1]);
+
+ memcpy((mrec_t*) mrec_end, next_mrec,
+ (&index->online_log->head.buf)[1] - mrec_end);
+ mrec = row_log_table_apply_op(
+ thr, trx_id_col, new_trx_id_col,
+ dup, &error, offsets_heap, heap,
+ index->online_log->head.buf,
+ (&index->online_log->head.buf)[1], offsets);
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ } else if (UNIV_UNLIKELY(mrec == NULL)) {
+ /* The record was not reassembled properly. */
+ goto corruption;
+ }
+ /* The record was previously found out to be
+ truncated. Now that the parse buffer was extended,
+ it should proceed beyond the old end of the buffer. */
+ ut_a(mrec > mrec_end);
+
+ index->online_log->head.bytes = mrec - mrec_end;
+ next_mrec += index->online_log->head.bytes;
+ }
+
+ ut_ad(next_mrec <= next_mrec_end);
+ /* The following loop must not be parsing the temporary
+ buffer, but head.block or tail.block. */
+
+ /* mrec!=NULL means that the next record starts from the
+ middle of the block */
+ ut_ad((mrec == NULL) == (index->online_log->head.bytes == 0));
+
+#ifdef UNIV_DEBUG
+ if (next_mrec_end == index->online_log->head.block
+ + srv_sort_buf_size) {
+ /* If tail.bytes == 0, next_mrec_end can also be at
+ the end of tail.block. */
+ if (index->online_log->tail.bytes == 0) {
+ ut_ad(next_mrec == next_mrec_end);
+ ut_ad(index->online_log->tail.blocks == 0);
+ ut_ad(index->online_log->head.blocks == 0);
+ ut_ad(index->online_log->head.bytes == 0);
+ } else {
+ ut_ad(next_mrec == index->online_log->head.block
+ + index->online_log->head.bytes);
+ ut_ad(index->online_log->tail.blocks
+ > index->online_log->head.blocks);
+ }
+ } else if (next_mrec_end == index->online_log->tail.block
+ + index->online_log->tail.bytes) {
+ ut_ad(next_mrec == index->online_log->tail.block
+ + index->online_log->head.bytes);
+ ut_ad(index->online_log->tail.blocks == 0);
+ ut_ad(index->online_log->head.blocks == 0);
+ ut_ad(index->online_log->head.bytes
+ <= index->online_log->tail.bytes);
+ } else {
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+
+ mrec_end = next_mrec_end;
+
+ while (!trx_is_interrupted(trx)) {
+ mrec = next_mrec;
+ ut_ad(mrec < mrec_end);
+
+ if (!has_index_lock) {
+ /* We are applying operations from a different
+ block than the one that is being written to.
+ We do not hold index->lock in order to
+ allow other threads to concurrently buffer
+ modifications. */
+ ut_ad(mrec >= index->online_log->head.block);
+ ut_ad(mrec_end == index->online_log->head.block
+ + srv_sort_buf_size);
+ ut_ad(index->online_log->head.bytes
+ < srv_sort_buf_size);
+
+ /* Take the opportunity to do a redo log
+ checkpoint if needed. */
+ log_free_check();
+ } else {
+ /* We are applying operations from the last block.
+ Do not allow other threads to buffer anything,
+ so that we can finally catch up and synchronize. */
+ ut_ad(index->online_log->head.blocks == 0);
+ ut_ad(index->online_log->tail.blocks == 0);
+ ut_ad(mrec_end == index->online_log->tail.block
+ + index->online_log->tail.bytes);
+ ut_ad(mrec >= index->online_log->tail.block);
+ }
+
+ /* This read is not protected by index->online_log->mutex
+ for performance reasons. We will eventually notice any
+ error that was flagged by a DML thread. */
+ error = index->online_log->error;
+
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ }
+
+ next_mrec = row_log_table_apply_op(
+ thr, trx_id_col, new_trx_id_col,
+ dup, &error, offsets_heap, heap,
+ mrec, mrec_end, offsets);
+
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ } else if (next_mrec == next_mrec_end) {
+ /* The record happened to end on a block boundary.
+ Do we have more blocks left? */
+ if (has_index_lock) {
+ /* The index will be locked while
+ applying the last block. */
+ goto all_done;
+ }
+
+ mrec = NULL;
+process_next_block:
+ rw_lock_x_lock(dict_index_get_lock(index));
+ has_index_lock = true;
+
+ index->online_log->head.bytes = 0;
+ index->online_log->head.blocks++;
+ goto next_block;
+ } else if (next_mrec != NULL) {
+ ut_ad(next_mrec < next_mrec_end);
+ index->online_log->head.bytes += next_mrec - mrec;
+ } else if (has_index_lock) {
+ /* When mrec is within tail.block, it should
+ be a complete record, because we are holding
+ index->lock and thus excluding the writer. */
+ ut_ad(index->online_log->tail.blocks == 0);
+ ut_ad(mrec_end == index->online_log->tail.block
+ + index->online_log->tail.bytes);
+ ut_ad(0);
+ goto unexpected_eof;
+ } else {
+ memcpy(index->online_log->head.buf, mrec,
+ mrec_end - mrec);
+ mrec_end += index->online_log->head.buf - mrec;
+ mrec = index->online_log->head.buf;
+ goto process_next_block;
+ }
+ }
+
+interrupted:
+ error = DB_INTERRUPTED;
+func_exit:
+ if (!has_index_lock) {
+ rw_lock_x_lock(dict_index_get_lock(index));
+ }
+
+ mem_heap_free(offsets_heap);
+ mem_heap_free(heap);
+ ut_free(offsets);
+ return(error);
+}
+
+/******************************************************//**
+Apply the row_log_table log to a table upon completing rebuild.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_table_apply(
+/*================*/
+ que_thr_t* thr, /*!< in: query graph */
+ dict_table_t* old_table,
+ /*!< in: old table */
+ struct TABLE* table) /*!< in/out: MySQL table
+ (for reporting duplicates) */
+{
+ dberr_t error;
+ dict_index_t* clust_index;
+
+ thr_get_trx(thr)->error_key_num = 0;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+ clust_index = dict_table_get_first_index(old_table);
+
+ rw_lock_x_lock(dict_index_get_lock(clust_index));
+
+ if (!clust_index->online_log) {
+ ut_ad(dict_index_get_online_status(clust_index)
+ == ONLINE_INDEX_COMPLETE);
+ /* This function should not be called unless
+ rebuilding a table online. Build in some fault
+ tolerance. */
+ ut_ad(0);
+ error = DB_ERROR;
+ } else {
+ row_merge_dup_t dup = {
+ clust_index, table,
+ clust_index->online_log->col_map, 0
+ };
+
+ error = row_log_table_apply_ops(thr, &dup);
+ }
+
+ rw_lock_x_unlock(dict_index_get_lock(clust_index));
+ return(error);
+}
+
+/******************************************************//**
+Allocate the row log for an index and flag the index
+for online creation.
+@retval true if success, false if not */
+UNIV_INTERN
+bool
+row_log_allocate(
+/*=============*/
+ dict_index_t* index, /*!< in/out: index */
+ dict_table_t* table, /*!< in/out: new table being rebuilt,
+ or NULL when creating a secondary index */
+ bool same_pk,/*!< in: whether the definition of the
+ PRIMARY KEY has remained the same */
+ const dtuple_t* add_cols,
+ /*!< in: default values of
+ added columns, or NULL */
+ const ulint* col_map)/*!< in: mapping of old column
+ numbers to new ones, or NULL if !table */
+{
+ byte* buf;
+ row_log_t* log;
+ ulint size;
+
+ ut_ad(!dict_index_is_online_ddl(index));
+ ut_ad(dict_index_is_clust(index) == !!table);
+ ut_ad(!table || index->table != table);
+ ut_ad(same_pk || table);
+ ut_ad(!table || col_map);
+ ut_ad(!add_cols || col_map);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ size = 2 * srv_sort_buf_size + sizeof *log;
+ buf = (byte*) os_mem_alloc_large(&size);
+ if (!buf) {
+ return(false);
+ }
+
+ log = (row_log_t*) &buf[2 * srv_sort_buf_size];
+ log->size = size;
+ log->fd = row_merge_file_create_low();
+ if (log->fd < 0) {
+ os_mem_free_large(buf, size);
+ return(false);
+ }
+ mutex_create(index_online_log_key, &log->mutex,
+ SYNC_INDEX_ONLINE_LOG);
+ log->trx_rb = NULL;
+ log->table = table;
+ log->same_pk = same_pk;
+ log->add_cols = add_cols;
+ log->col_map = col_map;
+ log->error = DB_SUCCESS;
+ log->max_trx = 0;
+ log->head.block = buf;
+ log->tail.block = buf + srv_sort_buf_size;
+ log->tail.blocks = log->tail.bytes = 0;
+ log->head.blocks = log->head.bytes = 0;
+ dict_index_set_online_status(index, ONLINE_INDEX_CREATION);
+ index->online_log = log;
+
+ /* While we might be holding an exclusive data dictionary lock
+ here, in row_log_abort_sec() we will not always be holding it. Use
+ atomic operations in both cases. */
+ MONITOR_ATOMIC_INC(MONITOR_ONLINE_CREATE_INDEX);
+
+ return(true);
+}
+
+/******************************************************//**
+Free the row log for an index that was being created online. */
+UNIV_INTERN
+void
+row_log_free(
+/*=========*/
+ row_log_t*& log) /*!< in,own: row log */
+{
+ MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX);
+
+ delete log->trx_rb;
+ row_merge_file_destroy_low(log->fd);
+ mutex_free(&log->mutex);
+ os_mem_free_large(log->head.block, log->size);
+ log = 0;
+}
+
+/******************************************************//**
+Get the latest transaction ID that has invoked row_log_online_op()
+during online creation.
+@return latest transaction ID, or 0 if nothing was logged */
+UNIV_INTERN
+trx_id_t
+row_log_get_max_trx(
+/*================*/
+ dict_index_t* index) /*!< in: index, must be locked */
+{
+ ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_CREATION);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad((rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
+ && mutex_own(&index->online_log->mutex))
+ || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ return(index->online_log->max_trx);
+}
+
+/******************************************************//**
+Applies an operation to a secondary index that was being created. */
+static __attribute__((nonnull))
+void
+row_log_apply_op_low(
+/*=================*/
+ dict_index_t* index, /*!< in/out: index */
+ row_merge_dup_t*dup, /*!< in/out: for reporting
+ duplicate key errors */
+ dberr_t* error, /*!< out: DB_SUCCESS or error code */
+ mem_heap_t* offsets_heap, /*!< in/out: memory heap for
+ allocating offsets; can be emptied */
+ bool has_index_lock, /*!< in: true if holding index->lock
+ in exclusive mode */
+ enum row_op op, /*!< in: operation being applied */
+ trx_id_t trx_id, /*!< in: transaction identifier */
+ const dtuple_t* entry) /*!< in: row */
+{
+ mtr_t mtr;
+ btr_cur_t cursor;
+ ulint* offsets = NULL;
+
+ ut_ad(!dict_index_is_clust(index));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)
+ == has_index_lock);
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(!dict_index_is_corrupted(index));
+ ut_ad(trx_id != 0 || op == ROW_OP_DELETE);
+
+ mtr_start(&mtr);
+
+ /* We perform the pessimistic variant of the operations if we
+ already hold index->lock exclusively. First, search the
+ record. The operation may already have been performed,
+ depending on when the row in the clustered index was
+ scanned. */
+ btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
+ has_index_lock
+ ? BTR_MODIFY_TREE
+ : BTR_MODIFY_LEAF,
+ &cursor, 0, __FILE__, __LINE__,
+ &mtr);
+
+ ut_ad(dict_index_get_n_unique(index) > 0);
+ /* This test is somewhat similar to row_ins_must_modify_rec(),
+ but not identical for unique secondary indexes. */
+ if (cursor.low_match >= dict_index_get_n_unique(index)
+ && !page_rec_is_infimum(btr_cur_get_rec(&cursor))) {
+ /* We have a matching record. */
+ bool exists = (cursor.low_match
+ == dict_index_get_n_fields(index));
+#ifdef UNIV_DEBUG
+ rec_t* rec = btr_cur_get_rec(&cursor);
+ ut_ad(page_rec_is_user_rec(rec));
+ ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
+#endif /* UNIV_DEBUG */
+
+ ut_ad(exists || dict_index_is_unique(index));
+
+ switch (op) {
+ case ROW_OP_DELETE:
+ if (!exists) {
+ /* The record was already deleted. */
+ goto func_exit;
+ }
+
+ if (btr_cur_optimistic_delete(
+ &cursor, BTR_CREATE_FLAG, &mtr)) {
+ *error = DB_SUCCESS;
+ break;
+ }
+
+ if (!has_index_lock) {
+ /* This needs a pessimistic operation.
+ Lock the index tree exclusively. */
+ mtr_commit(&mtr);
+ mtr_start(&mtr);
+ btr_cur_search_to_nth_level(
+ index, 0, entry, PAGE_CUR_LE,
+ BTR_MODIFY_TREE, &cursor, 0,
+ __FILE__, __LINE__, &mtr);
+
+ /* No other thread than the current one
+ is allowed to modify the index tree.
+ Thus, the record should still exist. */
+ ut_ad(cursor.low_match
+ >= dict_index_get_n_fields(index));
+ ut_ad(page_rec_is_user_rec(
+ btr_cur_get_rec(&cursor)));
+ }
+
+ /* As there are no externally stored fields in
+ a secondary index record, the parameter
+ rb_ctx = RB_NONE will be ignored. */
+
+ btr_cur_pessimistic_delete(
+ error, FALSE, &cursor,
+ BTR_CREATE_FLAG, RB_NONE, &mtr);
+ break;
+ case ROW_OP_INSERT:
+ if (exists) {
+ /* The record already exists. There
+ is nothing to be inserted. */
+ goto func_exit;
+ }
+
+ if (dtuple_contains_null(entry)) {
+ /* The UNIQUE KEY columns match, but
+ there is a NULL value in the key, and
+ NULL!=NULL. */
+ goto insert_the_rec;
+ }
+
+ /* Duplicate key error */
+ ut_ad(dict_index_is_unique(index));
+ row_merge_dup_report(dup, entry->fields);
+ goto func_exit;
+ }
+ } else {
+ switch (op) {
+ rec_t* rec;
+ big_rec_t* big_rec;
+ case ROW_OP_DELETE:
+ /* The record does not exist. */
+ goto func_exit;
+ case ROW_OP_INSERT:
+ if (dict_index_is_unique(index)
+ && (cursor.up_match
+ >= dict_index_get_n_unique(index)
+ || cursor.low_match
+ >= dict_index_get_n_unique(index))
+ && (!index->n_nullable
+ || !dtuple_contains_null(entry))) {
+ /* Duplicate key */
+ row_merge_dup_report(dup, entry->fields);
+ goto func_exit;
+ }
+insert_the_rec:
+ /* Insert the record. As we are inserting into
+ a secondary index, there cannot be externally
+ stored columns (!big_rec). */
+ *error = btr_cur_optimistic_insert(
+ BTR_NO_UNDO_LOG_FLAG
+ | BTR_NO_LOCKING_FLAG
+ | BTR_CREATE_FLAG,
+ &cursor, &offsets, &offsets_heap,
+ const_cast<dtuple_t*>(entry),
+ &rec, &big_rec, 0, NULL, &mtr);
+ ut_ad(!big_rec);
+ if (*error != DB_FAIL) {
+ break;
+ }
+
+ if (!has_index_lock) {
+ /* This needs a pessimistic operation.
+ Lock the index tree exclusively. */
+ mtr_commit(&mtr);
+ mtr_start(&mtr);
+ btr_cur_search_to_nth_level(
+ index, 0, entry, PAGE_CUR_LE,
+ BTR_MODIFY_TREE, &cursor, 0,
+ __FILE__, __LINE__, &mtr);
+ }
+
+ /* We already determined that the
+ record did not exist. No other thread
+ than the current one is allowed to
+ modify the index tree. Thus, the
+ record should still not exist. */
+
+ *error = btr_cur_pessimistic_insert(
+ BTR_NO_UNDO_LOG_FLAG
+ | BTR_NO_LOCKING_FLAG
+ | BTR_CREATE_FLAG,
+ &cursor, &offsets, &offsets_heap,
+ const_cast<dtuple_t*>(entry),
+ &rec, &big_rec,
+ 0, NULL, &mtr);
+ ut_ad(!big_rec);
+ break;
+ }
+ mem_heap_empty(offsets_heap);
+ }
+
+ if (*error == DB_SUCCESS && trx_id) {
+ page_update_max_trx_id(btr_cur_get_block(&cursor),
+ btr_cur_get_page_zip(&cursor),
+ trx_id, &mtr);
+ }
+
+func_exit:
+ mtr_commit(&mtr);
+}
+
+/******************************************************//**
+Applies an operation to a secondary index that was being created.
+@return NULL on failure (mrec corruption) or when out of data;
+pointer to next record on success */
+static __attribute__((nonnull, warn_unused_result))
+const mrec_t*
+row_log_apply_op(
+/*=============*/
+ dict_index_t* index, /*!< in/out: index */
+ row_merge_dup_t*dup, /*!< in/out: for reporting
+ duplicate key errors */
+ dberr_t* error, /*!< out: DB_SUCCESS or error code */
+ mem_heap_t* offsets_heap, /*!< in/out: memory heap for
+ allocating offsets; can be emptied */
+ mem_heap_t* heap, /*!< in/out: memory heap for
+ allocating data tuples */
+ bool has_index_lock, /*!< in: true if holding index->lock
+ in exclusive mode */
+ const mrec_t* mrec, /*!< in: merge record */
+ const mrec_t* mrec_end, /*!< in: end of buffer */
+ ulint* offsets) /*!< in/out: work area for
+ rec_init_offsets_temp() */
+
+{
+ enum row_op op;
+ ulint extra_size;
+ ulint data_size;
+ ulint n_ext;
+ dtuple_t* entry;
+ trx_id_t trx_id;
+
+ /* Online index creation is only used for secondary indexes. */
+ ut_ad(!dict_index_is_clust(index));
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)
+ == has_index_lock);
+#endif /* UNIV_SYNC_DEBUG */
+
+ if (dict_index_is_corrupted(index)) {
+ *error = DB_INDEX_CORRUPT;
+ return(NULL);
+ }
+
+ *error = DB_SUCCESS;
+
+ if (mrec + ROW_LOG_HEADER_SIZE >= mrec_end) {
+ return(NULL);
+ }
+
+ switch (*mrec) {
+ case ROW_OP_INSERT:
+ if (ROW_LOG_HEADER_SIZE + DATA_TRX_ID_LEN + mrec >= mrec_end) {
+ return(NULL);
+ }
+
+ op = static_cast<enum row_op>(*mrec++);
+ trx_id = trx_read_trx_id(mrec);
+ mrec += DATA_TRX_ID_LEN;
+ break;
+ case ROW_OP_DELETE:
+ op = static_cast<enum row_op>(*mrec++);
+ trx_id = 0;
+ break;
+ default:
+corrupted:
+ ut_ad(0);
+ *error = DB_CORRUPTION;
+ return(NULL);
+ }
+
+ extra_size = *mrec++;
+
+ ut_ad(mrec < mrec_end);
+
+ if (extra_size >= 0x80) {
+ /* Read another byte of extra_size. */
+
+ extra_size = (extra_size & 0x7f) << 8;
+ extra_size |= *mrec++;
+ }
+
+ mrec += extra_size;
+
+ if (mrec > mrec_end) {
+ return(NULL);
+ }
+
+ rec_init_offsets_temp(mrec, index, offsets);
+
+ if (rec_offs_any_extern(offsets)) {
+ /* There should never be any externally stored fields
+ in a secondary index, which is what online index
+ creation is used for. Therefore, the log file must be
+ corrupted. */
+ goto corrupted;
+ }
+
+ data_size = rec_offs_data_size(offsets);
+
+ mrec += data_size;
+
+ if (mrec > mrec_end) {
+ return(NULL);
+ }
+
+ entry = row_rec_to_index_entry_low(
+ mrec - data_size, index, offsets, &n_ext, heap);
+ /* Online index creation is only implemented for secondary
+ indexes, which never contain off-page columns. */
+ ut_ad(n_ext == 0);
+#ifdef ROW_LOG_APPLY_PRINT
+ if (row_log_apply_print) {
+ fprintf(stderr, "apply " IB_ID_FMT " " TRX_ID_FMT " %u %u ",
+ index->id, trx_id,
+ unsigned (op), unsigned (has_index_lock));
+ for (const byte* m = mrec - data_size; m < mrec; m++) {
+ fprintf(stderr, "%02x", *m);
+ }
+ putc('\n', stderr);
+ }
+#endif /* ROW_LOG_APPLY_PRINT */
+ row_log_apply_op_low(index, dup, error, offsets_heap,
+ has_index_lock, op, trx_id, entry);
+ return(mrec);
+}
+
+/******************************************************//**
+Applies operations to a secondary index that was being created.
+@return DB_SUCCESS, or error code on failure */
+static __attribute__((nonnull))
+dberr_t
+row_log_apply_ops(
+/*==============*/
+ trx_t* trx, /*!< in: transaction (for checking if
+ the operation was interrupted) */
+ dict_index_t* index, /*!< in/out: index */
+ row_merge_dup_t*dup) /*!< in/out: for reporting duplicate key
+ errors */
+{
+ dberr_t error;
+ const mrec_t* mrec = NULL;
+ const mrec_t* next_mrec;
+ const mrec_t* mrec_end= NULL; /* silence bogus warning */
+ const mrec_t* next_mrec_end;
+ mem_heap_t* offsets_heap;
+ mem_heap_t* heap;
+ ulint* offsets;
+ bool has_index_lock;
+ const ulint i = 1 + REC_OFFS_HEADER_SIZE
+ + dict_index_get_n_fields(index);
+
+ ut_ad(dict_index_is_online_ddl(index));
+ ut_ad(*index->name == TEMP_INDEX_PREFIX);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(index->online_log);
+ UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end);
+
+ offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets));
+ offsets[0] = i;
+ offsets[1] = dict_index_get_n_fields(index);
+
+ offsets_heap = mem_heap_create(UNIV_PAGE_SIZE);
+ heap = mem_heap_create(UNIV_PAGE_SIZE);
+ has_index_lock = true;
+
+next_block:
+ ut_ad(has_index_lock);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ ut_ad(index->online_log->head.bytes == 0);
+
+ if (trx_is_interrupted(trx)) {
+ goto interrupted;
+ }
+
+ if (dict_index_is_corrupted(index)) {
+ error = DB_INDEX_CORRUPT;
+ goto func_exit;
+ }
+
+ if (UNIV_UNLIKELY(index->online_log->head.blocks
+ > index->online_log->tail.blocks)) {
+unexpected_eof:
+ fprintf(stderr, "InnoDB: unexpected end of temporary file"
+ " for index %s\n", index->name + 1);
+corruption:
+ error = DB_CORRUPTION;
+ goto func_exit;
+ }
+
+ if (index->online_log->head.blocks
+ == index->online_log->tail.blocks) {
+ if (index->online_log->head.blocks) {
+#ifdef HAVE_FTRUNCATE
+ /* Truncate the file in order to save space. */
+ ftruncate(index->online_log->fd, 0);
+#endif /* HAVE_FTRUNCATE */
+ index->online_log->head.blocks
+ = index->online_log->tail.blocks = 0;
+ }
+
+ next_mrec = index->online_log->tail.block;
+ next_mrec_end = next_mrec + index->online_log->tail.bytes;
+
+ if (next_mrec_end == next_mrec) {
+ /* End of log reached. */
+all_done:
+ ut_ad(has_index_lock);
+ ut_ad(index->online_log->head.blocks == 0);
+ ut_ad(index->online_log->tail.blocks == 0);
+ error = DB_SUCCESS;
+ goto func_exit;
+ }
+ } else {
+ os_offset_t ofs;
+ ibool success;
+
+ ofs = (os_offset_t) index->online_log->head.blocks
+ * srv_sort_buf_size;
+
+ ut_ad(has_index_lock);
+ has_index_lock = false;
+ rw_lock_x_unlock(dict_index_get_lock(index));
+
+ log_free_check();
+
+ success = os_file_read_no_error_handling(
+ OS_FILE_FROM_FD(index->online_log->fd),
+ index->online_log->head.block, ofs,
+ srv_sort_buf_size);
+
+ if (!success) {
+ fprintf(stderr, "InnoDB: unable to read temporary file"
+ " for index %s\n", index->name + 1);
+ goto corruption;
+ }
+
+#ifdef POSIX_FADV_DONTNEED
+ /* Each block is read exactly once. Free up the file cache. */
+ posix_fadvise(index->online_log->fd,
+ ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
+#endif /* POSIX_FADV_DONTNEED */
+#ifdef FALLOC_FL_PUNCH_HOLE
+ /* Try to deallocate the space for the file on disk.
+ This should work on ext4 on Linux 2.6.39 and later,
+ and be ignored when the operation is unsupported. */
+ fallocate(index->online_log->fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ ofs, srv_buf_size);
+#endif /* FALLOC_FL_PUNCH_HOLE */
+
+ next_mrec = index->online_log->head.block;
+ next_mrec_end = next_mrec + srv_sort_buf_size;
+ }
+
+ if (mrec) {
+ /* A partial record was read from the previous block.
+ Copy the temporary buffer full, as we do not know the
+ length of the record. Parse subsequent records from
+ the bigger buffer index->online_log->head.block
+ or index->online_log->tail.block. */
+
+ ut_ad(mrec == index->online_log->head.buf);
+ ut_ad(mrec_end > mrec);
+ ut_ad(mrec_end < (&index->online_log->head.buf)[1]);
+
+ memcpy((mrec_t*) mrec_end, next_mrec,
+ (&index->online_log->head.buf)[1] - mrec_end);
+ mrec = row_log_apply_op(
+ index, dup, &error, offsets_heap, heap,
+ has_index_lock, index->online_log->head.buf,
+ (&index->online_log->head.buf)[1], offsets);
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ } else if (UNIV_UNLIKELY(mrec == NULL)) {
+ /* The record was not reassembled properly. */
+ goto corruption;
+ }
+ /* The record was previously found out to be
+ truncated. Now that the parse buffer was extended,
+ it should proceed beyond the old end of the buffer. */
+ ut_a(mrec > mrec_end);
+
+ index->online_log->head.bytes = mrec - mrec_end;
+ next_mrec += index->online_log->head.bytes;
+ }
+
+ ut_ad(next_mrec <= next_mrec_end);
+ /* The following loop must not be parsing the temporary
+ buffer, but head.block or tail.block. */
+
+ /* mrec!=NULL means that the next record starts from the
+ middle of the block */
+ ut_ad((mrec == NULL) == (index->online_log->head.bytes == 0));
+
+#ifdef UNIV_DEBUG
+ if (next_mrec_end == index->online_log->head.block
+ + srv_sort_buf_size) {
+ /* If tail.bytes == 0, next_mrec_end can also be at
+ the end of tail.block. */
+ if (index->online_log->tail.bytes == 0) {
+ ut_ad(next_mrec == next_mrec_end);
+ ut_ad(index->online_log->tail.blocks == 0);
+ ut_ad(index->online_log->head.blocks == 0);
+ ut_ad(index->online_log->head.bytes == 0);
+ } else {
+ ut_ad(next_mrec == index->online_log->head.block
+ + index->online_log->head.bytes);
+ ut_ad(index->online_log->tail.blocks
+ > index->online_log->head.blocks);
+ }
+ } else if (next_mrec_end == index->online_log->tail.block
+ + index->online_log->tail.bytes) {
+ ut_ad(next_mrec == index->online_log->tail.block
+ + index->online_log->head.bytes);
+ ut_ad(index->online_log->tail.blocks == 0);
+ ut_ad(index->online_log->head.blocks == 0);
+ ut_ad(index->online_log->head.bytes
+ <= index->online_log->tail.bytes);
+ } else {
+ ut_error;
+ }
+#endif /* UNIV_DEBUG */
+
+ mrec_end = next_mrec_end;
+
+ while (!trx_is_interrupted(trx)) {
+ mrec = next_mrec;
+ ut_ad(mrec < mrec_end);
+
+ if (!has_index_lock) {
+ /* We are applying operations from a different
+ block than the one that is being written to.
+ We do not hold index->lock in order to
+ allow other threads to concurrently buffer
+ modifications. */
+ ut_ad(mrec >= index->online_log->head.block);
+ ut_ad(mrec_end == index->online_log->head.block
+ + srv_sort_buf_size);
+ ut_ad(index->online_log->head.bytes
+ < srv_sort_buf_size);
+
+ /* Take the opportunity to do a redo log
+ checkpoint if needed. */
+ log_free_check();
+ } else {
+ /* We are applying operations from the last block.
+ Do not allow other threads to buffer anything,
+ so that we can finally catch up and synchronize. */
+ ut_ad(index->online_log->head.blocks == 0);
+ ut_ad(index->online_log->tail.blocks == 0);
+ ut_ad(mrec_end == index->online_log->tail.block
+ + index->online_log->tail.bytes);
+ ut_ad(mrec >= index->online_log->tail.block);
+ }
+
+ next_mrec = row_log_apply_op(
+ index, dup, &error, offsets_heap, heap,
+ has_index_lock, mrec, mrec_end, offsets);
+
+ if (error != DB_SUCCESS) {
+ goto func_exit;
+ } else if (next_mrec == next_mrec_end) {
+ /* The record happened to end on a block boundary.
+ Do we have more blocks left? */
+ if (has_index_lock) {
+ /* The index will be locked while
+ applying the last block. */
+ goto all_done;
+ }
+
+ mrec = NULL;
+process_next_block:
+ rw_lock_x_lock(dict_index_get_lock(index));
+ has_index_lock = true;
+
+ index->online_log->head.bytes = 0;
+ index->online_log->head.blocks++;
+ goto next_block;
+ } else if (next_mrec != NULL) {
+ ut_ad(next_mrec < next_mrec_end);
+ index->online_log->head.bytes += next_mrec - mrec;
+ } else if (has_index_lock) {
+ /* When mrec is within tail.block, it should
+ be a complete record, because we are holding
+ index->lock and thus excluding the writer. */
+ ut_ad(index->online_log->tail.blocks == 0);
+ ut_ad(mrec_end == index->online_log->tail.block
+ + index->online_log->tail.bytes);
+ ut_ad(0);
+ goto unexpected_eof;
+ } else {
+ memcpy(index->online_log->head.buf, mrec,
+ mrec_end - mrec);
+ mrec_end += index->online_log->head.buf - mrec;
+ mrec = index->online_log->head.buf;
+ goto process_next_block;
+ }
+ }
+
+interrupted:
+ error = DB_INTERRUPTED;
+func_exit:
+ if (!has_index_lock) {
+ rw_lock_x_lock(dict_index_get_lock(index));
+ }
+
+ switch (error) {
+ case DB_SUCCESS:
+ break;
+ case DB_INDEX_CORRUPT:
+ if (((os_offset_t) index->online_log->tail.blocks + 1)
+ * srv_sort_buf_size >= srv_online_max_size) {
+ /* The log file grew too big. */
+ error = DB_ONLINE_LOG_TOO_BIG;
+ }
+ /* fall through */
+ default:
+ /* We set the flag directly instead of invoking
+ dict_set_corrupted_index_cache_only(index) here,
+ because the index is not "public" yet. */
+ index->type |= DICT_CORRUPT;
+ }
+
+ mem_heap_free(heap);
+ mem_heap_free(offsets_heap);
+ ut_free(offsets);
+ return(error);
+}
+
+/******************************************************//**
+Apply the row log to the index upon completing index creation.
+@return DB_SUCCESS, or error code on failure */
+UNIV_INTERN
+dberr_t
+row_log_apply(
+/*==========*/
+ trx_t* trx, /*!< in: transaction (for checking if
+ the operation was interrupted) */
+ dict_index_t* index, /*!< in/out: secondary index */
+ struct TABLE* table) /*!< in/out: MySQL table
+ (for reporting duplicates) */
+{
+ dberr_t error;
+ row_log_t* log;
+ row_merge_dup_t dup = { index, table, NULL, 0 };
+
+ ut_ad(dict_index_is_online_ddl(index));
+ ut_ad(!dict_index_is_clust(index));
+
+ log_free_check();
+
+ rw_lock_x_lock(dict_index_get_lock(index));
+
+ if (!dict_table_is_corrupted(index->table)) {
+ error = row_log_apply_ops(trx, index, &dup);
+ } else {
+ error = DB_SUCCESS;
+ }
+
+ if (error != DB_SUCCESS || dup.n_dup) {
+ ut_a(!dict_table_is_discarded(index->table));
+ /* We set the flag directly instead of invoking
+ dict_set_corrupted_index_cache_only(index) here,
+ because the index is not "public" yet. */
+ index->type |= DICT_CORRUPT;
+ index->table->drop_aborted = TRUE;
+
+ if (error == DB_SUCCESS) {
+ error = DB_DUPLICATE_KEY;
+ }
+
+ dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
+ } else {
+ dict_index_set_online_status(index, ONLINE_INDEX_COMPLETE);
+ }
+
+ log = index->online_log;
+ index->online_log = NULL;
+ /* We could remove the TEMP_INDEX_PREFIX and update the data
+ dictionary to say that this index is complete, if we had
+ access to the .frm file here. If the server crashes before
+ all requested indexes have been created, this completed index
+ will be dropped. */
+ rw_lock_x_unlock(dict_index_get_lock(index));
+
+ row_log_free(log);
+
+ return(error);
+}
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index cf662cb1f88..a509e2c5ca8 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,40 +26,18 @@ Completed by Sunny Bains and Marko Makela
#include "row0merge.h"
#include "row0ext.h"
-#include "row0row.h"
-#include "row0upd.h"
+#include "row0log.h"
#include "row0ins.h"
#include "row0sel.h"
-#include "dict0dict.h"
-#include "dict0mem.h"
-#include "dict0boot.h"
#include "dict0crea.h"
-#include "dict0load.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "rem0cmp.h"
-#include "read0read.h"
-#include "os0file.h"
#include "lock0lock.h"
-#include "data0data.h"
-#include "data0type.h"
-#include "que0que.h"
#include "pars0pars.h"
-#include "mem0mem.h"
-#include "log0log.h"
#include "ut0sort.h"
-#include "handler0alter.h"
-#include "fts0fts.h"
-#include "fts0types.h"
-#include "fts0priv.h"
#include "row0ftsort.h"
+#include "row0import.h"
+#include "handler0alter.h"
+#include "ha_prototypes.h"
/* Ignore posix_fadvise() on those platforms where it does not exist */
#if defined __WIN__
@@ -69,8 +47,6 @@ Completed by Sunny Bains and Marko Makela
#ifdef UNIV_DEBUG
/** Set these in order ot enable debug printout. */
/* @{ */
-/** Log the outcome of each row_merge_cmp() call, comparing records. */
-static ibool row_merge_print_cmp;
/** Log each record read from temporary file. */
static ibool row_merge_print_read;
/** Log each record write to temporary file. */
@@ -86,39 +62,23 @@ static ibool row_merge_print_block_write;
#endif /* UNIV_DEBUG */
/* Whether to disable file system cache */
-UNIV_INTERN char srv_disable_sort_file_cache;
-
-/********************************************************************//**
-Read sorted file containing index data tuples and insert these data
-tuples to the index
-@return DB_SUCCESS or error number */
-static
-ulint
-row_merge_insert_index_tuples(
-/*==========================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: index */
- dict_table_t* table, /*!< in: new table */
- ulint zip_size,/*!< in: compressed page size of
- the old table, or 0 if uncompressed */
- int fd, /*!< in: file descriptor */
- row_merge_block_t* block); /*!< in/out: file buffer */
+UNIV_INTERN char srv_disable_sort_file_cache;
#ifdef UNIV_DEBUG
/******************************************************//**
Display a merge tuple. */
-static
+static __attribute__((nonnull))
void
row_merge_tuple_print(
/*==================*/
FILE* f, /*!< in: output stream */
- const dfield_t* entry, /*!< in: tuple to print */
+ const mtuple_t* entry, /*!< in: tuple to print */
ulint n_fields)/*!< in: number of fields in the tuple */
{
ulint j;
for (j = 0; j < n_fields; j++) {
- const dfield_t* field = &entry[j];
+ const dfield_t* field = &entry->fields[j];
if (dfield_is_null(field)) {
fputs("\n NULL;", f);
@@ -141,16 +101,54 @@ row_merge_tuple_print(
#endif /* UNIV_DEBUG */
/******************************************************//**
+Encode an index record. */
+static __attribute__((nonnull))
+void
+row_merge_buf_encode(
+/*=================*/
+ byte** b, /*!< in/out: pointer to
+ current end of output buffer */
+ const dict_index_t* index, /*!< in: index */
+ const mtuple_t* entry, /*!< in: index fields
+ of the record to encode */
+ ulint n_fields) /*!< in: number of fields
+ in the entry */
+{
+ ulint size;
+ ulint extra_size;
+
+ size = rec_get_converted_size_temp(
+ index, entry->fields, n_fields, &extra_size);
+ ut_ad(size >= extra_size);
+
+ /* Encode extra_size + 1 */
+ if (extra_size + 1 < 0x80) {
+ *(*b)++ = (byte) (extra_size + 1);
+ } else {
+ ut_ad((extra_size + 1) < 0x8000);
+ *(*b)++ = (byte) (0x80 | ((extra_size + 1) >> 8));
+ *(*b)++ = (byte) (extra_size + 1);
+ }
+
+ rec_convert_dtuple_to_temp(*b + extra_size, index,
+ entry->fields, n_fields);
+
+ *b += size;
+}
+
+/******************************************************//**
Allocate a sort buffer.
@return own: sort buffer */
-static
+static __attribute__((malloc, nonnull))
row_merge_buf_t*
row_merge_buf_create_low(
/*=====================*/
mem_heap_t* heap, /*!< in: heap where allocated */
dict_index_t* index, /*!< in: secondary index */
- ulint max_tuples, /*!< in: maximum number of data tuples */
- ulint buf_size) /*!< in: size of the buffer, in bytes */
+ ulint max_tuples, /*!< in: maximum number of
+ data tuples */
+ ulint buf_size) /*!< in: size of the buffer,
+ in bytes */
{
row_merge_buf_t* buf;
@@ -162,7 +160,7 @@ row_merge_buf_create_low(
buf->heap = heap;
buf->index = index;
buf->max_tuples = max_tuples;
- buf->tuples = static_cast<const dfield_t**>(
+ buf->tuples = static_cast<mtuple_t*>(
ut_malloc(2 * max_tuples * sizeof *buf->tuples));
buf->tmp_tuples = buf->tuples + max_tuples;
@@ -204,13 +202,11 @@ row_merge_buf_empty(
/*================*/
row_merge_buf_t* buf) /*!< in,own: sort buffer */
{
- ulint buf_size;
+ ulint buf_size = sizeof *buf;
ulint max_tuples = buf->max_tuples;
mem_heap_t* heap = buf->heap;
dict_index_t* index = buf->index;
- void* tuple = buf->tuples;
-
- buf_size = (sizeof *buf);;
+ mtuple_t* tuples = buf->tuples;
mem_heap_empty(heap);
@@ -218,7 +214,7 @@ row_merge_buf_empty(
buf->heap = heap;
buf->index = index;
buf->max_tuples = max_tuples;
- buf->tuples = static_cast<const dfield_t**>(tuple);
+ buf->tuples = tuples;
buf->tmp_tuples = buf->tuples + max_tuples;
return(buf);
@@ -230,7 +226,7 @@ UNIV_INTERN
void
row_merge_buf_free(
/*===============*/
- row_merge_buf_t* buf) /*!< in,own: sort buffer, to be freed */
+ row_merge_buf_t* buf) /*!< in,own: sort buffer to be freed */
{
ut_free(buf->tuples);
mem_heap_free(buf->heap);
@@ -244,19 +240,18 @@ ulint
row_merge_buf_add(
/*==============*/
row_merge_buf_t* buf, /*!< in/out: sort buffer */
- dict_index_t* fts_index,/*!< fts index to be
- created */
+ dict_index_t* fts_index,/*!< in: fts index to be created */
+ const dict_table_t* old_table,/*!< in: original table */
fts_psort_t* psort_info, /*!< in: parallel sort info */
- const dtuple_t* row, /*!< in: row in clustered index */
+ const dtuple_t* row, /*!< in: table row */
const row_ext_t* ext, /*!< in: cache of externally stored
column prefixes, or NULL */
doc_id_t* doc_id) /*!< in/out: Doc ID if we are
creating FTS index */
-
{
ulint i;
const dict_index_t* index;
- dfield_t* entry;
+ mtuple_t* entry;
dfield_t* field;
const dict_field_t* ifield;
ulint n_fields;
@@ -267,9 +262,13 @@ row_merge_buf_add(
ulint n_row_added = 0;
if (buf->n_tuples >= buf->max_tuples) {
- return(FALSE);
+ return(0);
}
+ DBUG_EXECUTE_IF(
+ "ib_row_merge_buf_add_two",
+ if (buf->n_tuples >= 2) return(0););
+
UNIV_PREFETCH_R(row->fields);
/* If we are building FTS index, buf->index points to
@@ -279,11 +278,9 @@ row_merge_buf_add(
n_fields = dict_index_get_n_fields(index);
- entry = static_cast<dfield_t*>(
- mem_heap_alloc(buf->heap, n_fields * sizeof *entry));
-
- buf->tuples[buf->n_tuples] = entry;
- field = entry;
+ entry = &buf->tuples[buf->n_tuples];
+ field = entry->fields = static_cast<dfield_t*>(
+ mem_heap_alloc(buf->heap, n_fields * sizeof *entry->fields));
data_size = 0;
extra_size = UT_BITS_IN_BYTES(index->n_nullable);
@@ -294,31 +291,15 @@ row_merge_buf_add(
ulint len;
const dict_col_t* col;
ulint col_no;
+ ulint fixed_len;
const dfield_t* row_field;
- ibool col_adjusted;
col = ifield->col;
col_no = dict_col_get_no(col);
- col_adjusted = FALSE;
-
- /* If we are creating a FTS index, a new Doc
- ID column is being added, so we need to adjust
- any column number positioned after this Doc ID */
- if (*doc_id > 0
- && DICT_TF2_FLAG_IS_SET(index->table,
- DICT_TF2_FTS_ADD_DOC_ID)
- && col_no > index->table->fts->doc_col) {
-
- ut_ad(index->table->fts);
-
- col_no--;
- col_adjusted = TRUE;
- }
/* Process the Doc ID column */
if (*doc_id > 0
- && col_no == index->table->fts->doc_col
- && !col_adjusted) {
+ && col_no == index->table->fts->doc_col) {
fts_write_doc_id((byte*) &write_doc_id, *doc_id);
/* Note: field->data now points to a value on the
@@ -435,9 +416,30 @@ row_merge_buf_add(
ut_ad(len <= col->len || col->mtype == DATA_BLOB);
- if (ifield->fixed_len) {
- ut_ad(len == ifield->fixed_len);
+ fixed_len = ifield->fixed_len;
+ if (fixed_len && !dict_table_is_comp(index->table)
+ && DATA_MBMINLEN(col->mbminmaxlen)
+ != DATA_MBMAXLEN(col->mbminmaxlen)) {
+ /* CHAR in ROW_FORMAT=REDUNDANT is always
+ fixed-length, but in the temporary file it is
+ variable-length for variable-length character
+ sets. */
+ fixed_len = 0;
+ }
+
+ if (fixed_len) {
+#ifdef UNIV_DEBUG
+ ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
+ ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
+
+ /* len should be between size calcualted base on
+ mbmaxlen and mbminlen */
+ ut_ad(len <= fixed_len);
+ ut_ad(!mbmaxlen || len >= mbminlen
+ * (fixed_len / mbmaxlen));
+
ut_ad(!dfield_is_ext(field));
+#endif /* UNIV_DEBUG */
} else if (dfield_is_ext(field)) {
extra_size += 2;
} else if (len < 128
@@ -464,12 +466,11 @@ row_merge_buf_add(
ulint size;
ulint extra;
- size = rec_get_converted_size_comp(index,
- REC_STATUS_ORDINARY,
- entry, n_fields, &extra);
+ size = rec_get_converted_size_temp(
+ index, entry->fields, n_fields, &extra);
- ut_ad(data_size + extra_size + REC_N_NEW_EXTRA_BYTES == size);
- ut_ad(extra_size + REC_N_NEW_EXTRA_BYTES == extra);
+ ut_ad(data_size + extra_size == size);
+ ut_ad(extra_size == extra);
}
#endif /* UNIV_DEBUG */
@@ -479,12 +480,6 @@ row_merge_buf_add(
of extra_size. */
data_size += (extra_size + 1) + ((extra_size + 1) >= 0x80);
- /* The following assertion may fail if row_merge_block_t is
- declared very small and a PRIMARY KEY is being created with
- many prefix columns. In that case, the record may exceed the
- page_zip_rec_needs_ext() limit. However, no further columns
- will be moved to external storage until the record is inserted
- to the clustered index B-tree. */
ut_ad(data_size < srv_sort_buf_size);
/* Reserve one byte for the end marker of row_merge_block_t. */
@@ -496,7 +491,7 @@ row_merge_buf_add(
buf->n_tuples++;
n_row_added++;
- field = entry;
+ field = entry->fields;
/* Copy the data fields. */
@@ -509,118 +504,120 @@ row_merge_buf_add(
/*************************************************************//**
Report a duplicate key. */
-static
+UNIV_INTERN
void
row_merge_dup_report(
/*=================*/
row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */
const dfield_t* entry) /*!< in: duplicate index entry */
{
- mrec_buf_t* buf;
- const dtuple_t* tuple;
- dtuple_t tuple_store;
- const rec_t* rec;
- const dict_index_t* index = dup->index;
- ulint n_fields= dict_index_get_n_fields(index);
- mem_heap_t* heap;
- ulint* offsets;
- ulint n_ext;
-
- if (dup->n_dup++) {
+ if (!dup->n_dup++) {
/* Only report the first duplicate record,
but count all duplicate records. */
- return;
+ innobase_fields_to_mysql(dup->table, dup->index, entry);
}
-
- /* Convert the tuple to a record and then to MySQL format. */
- heap = mem_heap_create((1 + REC_OFFS_HEADER_SIZE + n_fields)
- * sizeof *offsets
- + sizeof *buf);
-
- buf = static_cast<mrec_buf_t*>(mem_heap_alloc(heap, sizeof *buf));
-
- tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
- n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
-
- rec = rec_convert_dtuple_to_rec(*buf, index, tuple, n_ext);
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
- innobase_rec_to_mysql(dup->table, rec, index, offsets);
-
- mem_heap_free(heap);
}
/*************************************************************//**
Compare two tuples.
@return 1, 0, -1 if a is greater, equal, less, respectively, than b */
-static
+static __attribute__((warn_unused_result))
int
row_merge_tuple_cmp(
/*================*/
+ ulint n_uniq, /*!< in: number of unique fields */
ulint n_field,/*!< in: number of fields */
- const dfield_t* a, /*!< in: first tuple to be compared */
- const dfield_t* b, /*!< in: second tuple to be compared */
- row_merge_dup_t* dup) /*!< in/out: for reporting duplicates */
+ const mtuple_t& a, /*!< in: first tuple to be compared */
+ const mtuple_t& b, /*!< in: second tuple to be compared */
+ row_merge_dup_t* dup) /*!< in/out: for reporting duplicates,
+ NULL if non-unique index */
{
int cmp;
- const dfield_t* field = a;
+ const dfield_t* af = a.fields;
+ const dfield_t* bf = b.fields;
+ ulint n = n_uniq;
+
+ ut_ad(n_uniq > 0);
+ ut_ad(n_uniq <= n_field);
/* Compare the fields of the tuples until a difference is
found or we run out of fields to compare. If !cmp at the
end, the tuples are equal. */
do {
- cmp = cmp_dfield_dfield(a++, b++);
- } while (!cmp && --n_field);
+ cmp = cmp_dfield_dfield(af++, bf++);
+ } while (!cmp && --n);
+
+ if (cmp) {
+ return(cmp);
+ }
- if (UNIV_UNLIKELY(!cmp) && UNIV_LIKELY_NULL(dup)) {
+ if (dup) {
/* Report a duplicate value error if the tuples are
logically equal. NULL columns are logically inequal,
although they are equal in the sorting order. Find
out if any of the fields are NULL. */
- for (b = field; b != a; b++) {
- if (dfield_is_null(b)) {
-
- goto func_exit;
+ for (const dfield_t* df = a.fields; df != af; df++) {
+ if (dfield_is_null(df)) {
+ goto no_report;
}
}
- row_merge_dup_report(dup, field);
+ row_merge_dup_report(dup, a.fields);
}
-func_exit:
+no_report:
+ /* The n_uniq fields were equal, but we compare all fields so
+ that we will get the same (internal) order as in the B-tree. */
+ for (n = n_field - n_uniq + 1; --n; ) {
+ cmp = cmp_dfield_dfield(af++, bf++);
+ if (cmp) {
+ return(cmp);
+ }
+ }
+
+ /* This should never be reached, except in a secondary index
+ when creating a secondary index and a PRIMARY KEY, and there
+ is a duplicate in the PRIMARY KEY that has not been detected
+ yet. Internally, an index must never contain duplicates. */
return(cmp);
}
/** Wrapper for row_merge_tuple_sort() to inject some more context to
UT_SORT_FUNCTION_BODY().
-@param a array of tuples that being sorted
-@param b aux (work area), same size as tuples[]
-@param c lower bound of the sorting area, inclusive
-@param d upper bound of the sorting area, inclusive */
-#define row_merge_tuple_sort_ctx(a,b,c,d) \
- row_merge_tuple_sort(n_field, dup, a, b, c, d)
+@param tuples array of tuples that being sorted
+@param aux work area, same size as tuples[]
+@param low lower bound of the sorting area, inclusive
+@param high upper bound of the sorting area, inclusive */
+#define row_merge_tuple_sort_ctx(tuples, aux, low, high) \
+ row_merge_tuple_sort(n_uniq, n_field, dup, tuples, aux, low, high)
/** Wrapper for row_merge_tuple_cmp() to inject some more context to
UT_SORT_FUNCTION_BODY().
@param a first tuple to be compared
@param b second tuple to be compared
@return 1, 0, -1 if a is greater, equal, less, respectively, than b */
-#define row_merge_tuple_cmp_ctx(a,b) row_merge_tuple_cmp(n_field, a, b, dup)
+#define row_merge_tuple_cmp_ctx(a,b) \
+ row_merge_tuple_cmp(n_uniq, n_field, a, b, dup)
/**********************************************************************//**
Merge sort the tuple buffer in main memory. */
-static
+static __attribute__((nonnull(4,5)))
void
row_merge_tuple_sort(
/*=================*/
+ ulint n_uniq, /*!< in: number of unique fields */
ulint n_field,/*!< in: number of fields */
- row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */
- const dfield_t** tuples, /*!< in/out: tuples */
- const dfield_t** aux, /*!< in/out: work area */
+ row_merge_dup_t* dup, /*!< in/out: reporter of duplicates
+ (NULL if non-unique index) */
+ mtuple_t* tuples, /*!< in/out: tuples */
+ mtuple_t* aux, /*!< in/out: work area */
ulint low, /*!< in: lower bound of the
sorting area, inclusive */
ulint high) /*!< in: upper bound of the
sorting area, exclusive */
{
+ ut_ad(n_field > 0);
+ ut_ad(n_uniq <= n_field);
+
UT_SORT_FUNCTION_BODY(row_merge_tuple_sort_ctx,
tuples, aux, low, high, row_merge_tuple_cmp_ctx);
}
@@ -632,9 +629,12 @@ void
row_merge_buf_sort(
/*===============*/
row_merge_buf_t* buf, /*!< in/out: sort buffer */
- row_merge_dup_t* dup) /*!< in/out: for reporting duplicates */
+ row_merge_dup_t* dup) /*!< in/out: reporter of duplicates
+ (NULL if non-unique index) */
{
- row_merge_tuple_sort(dict_index_get_n_unique(buf->index), dup,
+ row_merge_tuple_sort(dict_index_get_n_unique(buf->index),
+ dict_index_get_n_fields(buf->index),
+ dup,
buf->tuples, buf->tmp_tuples, 0, buf->n_tuples);
}
@@ -653,39 +653,11 @@ row_merge_buf_write(
ulint n_fields= dict_index_get_n_fields(index);
byte* b = &block[0];
- ulint i;
-
- for (i = 0; i < buf->n_tuples; i++) {
- ulint size;
- ulint extra_size;
- const dfield_t* entry = buf->tuples[i];
-
- size = rec_get_converted_size_comp(index,
- REC_STATUS_ORDINARY,
- entry, n_fields,
- &extra_size);
- ut_ad(size >= extra_size);
- ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES);
- extra_size -= REC_N_NEW_EXTRA_BYTES;
- size -= REC_N_NEW_EXTRA_BYTES;
-
- /* Encode extra_size + 1 */
- if (extra_size + 1 < 0x80) {
- *b++ = (byte) (extra_size + 1);
- } else {
- ut_ad((extra_size + 1) < 0x8000);
- *b++ = (byte) (0x80 | ((extra_size + 1) >> 8));
- *b++ = (byte) (extra_size + 1);
- }
-
- ut_ad(b + size < &block[srv_sort_buf_size]);
-
- rec_convert_dtuple_to_rec_comp(b + extra_size, 0, index,
- REC_STATUS_ORDINARY,
- entry, n_fields);
-
- b += size;
+ for (ulint i = 0; i < buf->n_tuples; i++) {
+ const mtuple_t* entry = &buf->tuples[i];
+ row_merge_buf_encode(&b, index, entry, n_fields);
+ ut_ad(b < &block[srv_sort_buf_size]);
#ifdef UNIV_DEBUG
if (row_merge_print_write) {
fprintf(stderr, "row_merge_buf_write %p,%d,%lu %lu",
@@ -744,36 +716,6 @@ row_merge_heap_create(
return(heap);
}
-/**********************************************************************//**
-Search an index object by name and column names. If several indexes match,
-return the index with the max id.
-@return matching index, NULL if not found */
-static
-dict_index_t*
-row_merge_dict_table_get_index(
-/*===========================*/
- dict_table_t* table, /*!< in: table */
- const merge_index_def_t*index_def) /*!< in: index definition */
-{
- ulint i;
- dict_index_t* index;
- const char** column_names;
-
- column_names = static_cast<const char**>(
- mem_alloc(index_def->n_fields * sizeof *column_names));
-
- for (i = 0; i < index_def->n_fields; ++i) {
- column_names[i] = index_def->fields[i].field_name;
- }
-
- index = dict_table_get_index_by_max_id(
- table, index_def->name, column_names, index_def->n_fields);
-
- mem_free((void*) column_names);
-
- return(index);
-}
-
/********************************************************************//**
Read a merge block from the file system.
@return TRUE if request was successful, FALSE if fail */
@@ -790,6 +732,8 @@ row_merge_read(
os_offset_t ofs = ((os_offset_t) offset) * srv_sort_buf_size;
ibool success;
+ DBUG_EXECUTE_IF("row_merge_read_failure", return(FALSE););
+
#ifdef UNIV_DEBUG
if (row_merge_print_block_read) {
fprintf(stderr, "row_merge_read fd=%d ofs=%lu\n",
@@ -837,6 +781,8 @@ row_merge_write(
os_offset_t ofs = buf_len * (os_offset_t) offset;
ibool ret;
+ DBUG_EXECUTE_IF("row_merge_write_failure", return(FALSE););
+
ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf, ofs, buf_len);
#ifdef UNIV_DEBUG
@@ -858,7 +804,7 @@ row_merge_write(
/********************************************************************//**
Read a merge record.
@return pointer to next record, or NULL on I/O error or end of list */
-UNIV_INTERN __attribute__((nonnull))
+UNIV_INTERN
const byte*
row_merge_read_rec(
/*===============*/
@@ -934,7 +880,7 @@ err_exit:
case. */
avail_size = &block[srv_sort_buf_size] - b;
-
+ ut_ad(avail_size < sizeof *buf);
memcpy(*buf, b, avail_size);
if (!row_merge_read(fd, ++(*foffs), block)) {
@@ -951,7 +897,7 @@ err_exit:
*mrec = *buf + extra_size;
- rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+ rec_init_offsets_temp(*mrec, index, offsets);
data_size = rec_offs_data_size(offsets);
@@ -970,7 +916,7 @@ err_exit:
*mrec = b + extra_size;
- rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+ rec_init_offsets_temp(*mrec, index, offsets);
data_size = rec_offs_data_size(offsets);
ut_ad(extra_size + data_size < sizeof *buf);
@@ -1174,46 +1120,12 @@ row_merge_write_eof(
return(&block[0]);
}
-/*************************************************************//**
-Compare two merge records.
-@return 1, 0, -1 if mrec1 is greater, equal, less, respectively, than mrec2 */
-UNIV_INTERN
-int
-row_merge_cmp(
-/*==========*/
- const mrec_t* mrec1, /*!< in: first merge
- record to be compared */
- const mrec_t* mrec2, /*!< in: second merge
- record to be compared */
- const ulint* offsets1, /*!< in: first record offsets */
- const ulint* offsets2, /*!< in: second record offsets */
- const dict_index_t* index, /*!< in: index */
- ibool* null_eq) /*!< out: set to TRUE if
- found matching null values */
-{
- int cmp;
-
- cmp = cmp_rec_rec_simple(mrec1, mrec2, offsets1, offsets2, index,
- null_eq);
-
-#ifdef UNIV_DEBUG
- if (row_merge_print_cmp) {
- fputs("row_merge_cmp1 ", stderr);
- rec_print_comp(stderr, mrec1, offsets1);
- fputs("\nrow_merge_cmp2 ", stderr);
- rec_print_comp(stderr, mrec2, offsets2);
- fprintf(stderr, "\nrow_merge_cmp=%d\n", cmp);
- }
-#endif /* UNIV_DEBUG */
-
- return(cmp);
-}
/********************************************************************//**
Reads clustered index of the table and create temporary files
containing the index entries for the indexes to be built.
@return DB_SUCCESS or error */
-static __attribute__((nonnull))
-ulint
+static __attribute__((nonnull(1,2,3,4,6,9,10,16), warn_unused_result))
+dberr_t
row_merge_read_clustered_index(
/*===========================*/
trx_t* trx, /*!< in: transaction */
@@ -1224,23 +1136,40 @@ row_merge_read_clustered_index(
const dict_table_t* new_table,/*!< in: table where indexes are
created; identical to old_table
unless creating a PRIMARY KEY */
+ bool online, /*!< in: true if creating indexes
+ online */
dict_index_t** index, /*!< in: indexes to be created */
dict_index_t* fts_sort_idx,
- /*!< in: indexes to be created */
- fts_psort_t* psort_info, /*!< in: parallel sort info */
+ /*!< in: full-text index to be created,
+ or NULL */
+ fts_psort_t* psort_info,
+ /*!< in: parallel sort info for
+ fts_sort_idx creation, or NULL */
merge_file_t* files, /*!< in: temporary files */
+ const ulint* key_numbers,
+ /*!< in: MySQL key numbers to create */
ulint n_index,/*!< in: number of indexes to create */
+ const dtuple_t* add_cols,
+ /*!< in: default values of
+ added columns, or NULL */
+ const ulint* col_map,/*!< in: mapping of old column
+ numbers to new ones, or NULL
+ if old_table == new_table */
+ ulint add_autoinc,
+ /*!< in: number of added
+ AUTO_INCREMENT column, or
+ ULINT_UNDEFINED if none is added */
+ ib_sequence_t& sequence,/*!< in/out: autoinc sequence */
row_merge_block_t* block) /*!< in/out: file buffer */
{
dict_index_t* clust_index; /* Clustered index */
mem_heap_t* row_heap; /* Heap memory to create
- clustered index records */
+ clustered index tuples */
row_merge_buf_t** merge_buf; /* Temporary list for records*/
- btr_pcur_t pcur; /* Persistent cursor on the
- clustered index */
+ btr_pcur_t pcur; /* Cursor on the clustered
+ index */
mtr_t mtr; /* Mini transaction */
- ulint err = DB_SUCCESS;/* Return code */
- ulint i;
+ dberr_t err = DB_SUCCESS;/* Return code */
ulint n_nonnull = 0; /* number of columns
changed to NOT NULL */
ulint* nonnull = NULL; /* NOT NULL columns */
@@ -1252,13 +1181,10 @@ row_merge_read_clustered_index(
ibool fts_pll_sort = FALSE;
ib_int64_t sig_count = 0;
- trx->op_info = "reading clustered index";
+ ut_ad((old_table == new_table) == !col_map);
+ ut_ad(!add_cols || col_map);
- ut_ad(trx);
- ut_ad(old_table);
- ut_ad(new_table);
- ut_ad(index);
- ut_ad(files);
+ trx->op_info = "reading clustered index";
#ifdef FTS_INTERNAL_DIAG_PRINT
DEBUG_FTS_SORT_PRINT("FTS_SORT: Start Create Index\n");
@@ -1269,8 +1195,7 @@ row_merge_read_clustered_index(
merge_buf = static_cast<row_merge_buf_t**>(
mem_alloc(n_index * sizeof *merge_buf));
-
- for (i = 0; i < n_index; i++) {
+ for (ulint i = 0; i < n_index; i++) {
if (index[i]->type & DICT_FTS) {
/* We are building a FT index, make sure
@@ -1282,14 +1207,14 @@ row_merge_read_clustered_index(
merge_buf[i] = row_merge_buf_create(fts_sort_idx);
add_doc_id = DICT_TF2_FLAG_IS_SET(
- old_table, DICT_TF2_FTS_ADD_DOC_ID);
+ new_table, DICT_TF2_FTS_ADD_DOC_ID);
/* If Doc ID does not exist in the table itself,
fetch the first FTS Doc ID */
if (add_doc_id) {
fts_get_next_doc_id(
(dict_table_t*) new_table,
- &doc_id);
+ &doc_id);
ut_ad(doc_id > 0);
}
@@ -1310,35 +1235,34 @@ row_merge_read_clustered_index(
clust_index = dict_table_get_first_index(old_table);
btr_pcur_open_at_index_side(
- TRUE, clust_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
-
- if (UNIV_UNLIKELY(old_table != new_table)) {
- ulint n_cols = dict_table_get_n_cols(old_table);
+ true, clust_index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
- /* A primary key will be created. Identify the
- columns that were flagged NOT NULL in the new table,
- so that we can quickly check that the records in the
- (old) clustered index do not violate the added NOT
- NULL constraints. */
-
- if (!fts_sort_idx) {
- ut_a(n_cols == dict_table_get_n_cols(new_table));
- }
+ if (old_table != new_table) {
+ /* The table is being rebuilt. Identify the columns
+ that were flagged NOT NULL in the new table, so that
+ we can quickly check that the records in the old table
+ do not violate the added NOT NULL constraints. */
nonnull = static_cast<ulint*>(
- mem_alloc(n_cols * sizeof *nonnull));
+ mem_alloc(dict_table_get_n_cols(new_table)
+ * sizeof *nonnull));
- for (i = 0; i < n_cols; i++) {
+ for (ulint i = 0; i < dict_table_get_n_cols(old_table); i++) {
if (dict_table_get_nth_col(old_table, i)->prtype
& DATA_NOT_NULL) {
+ continue;
+ }
+
+ const ulint j = col_map[i];
+ if (j == ULINT_UNDEFINED) {
+ /* The column was dropped. */
continue;
}
- if (dict_table_get_nth_col(new_table, i)->prtype
+ if (dict_table_get_nth_col(new_table, j)->prtype
& DATA_NOT_NULL) {
-
- nonnull[n_nonnull++] = i;
+ nonnull[n_nonnull++] = j;
}
}
@@ -1354,81 +1278,221 @@ row_merge_read_clustered_index(
for (;;) {
const rec_t* rec;
ulint* offsets;
- dtuple_t* row = NULL;
+ const dtuple_t* row;
row_ext_t* ext;
- ibool has_next = TRUE;
-
- btr_pcur_move_to_next_on_page(&pcur);
+ page_cur_t* cur = btr_pcur_get_page_cur(&pcur);
- /* When switching pages, commit the mini-transaction
- in order to release the latch on the old page. */
+ page_cur_move_to_next(cur);
- if (btr_pcur_is_after_last_on_page(&pcur)) {
+ if (page_cur_is_after_last(cur)) {
if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
err = DB_INTERRUPTED;
trx->error_key_num = 0;
goto func_exit;
}
- /* Store the cursor position on the last user
- record on the page. */
- btr_pcur_move_to_prev_on_page(&pcur);
- /* Leaf pages must never be empty, unless
- this is the only page in the index tree. */
- ut_ad(btr_pcur_is_on_user_rec(&pcur)
- || buf_block_get_page_no(
- btr_pcur_get_block(&pcur))
- == clust_index->page);
-
- btr_pcur_store_position(&pcur, &mtr);
- mtr_commit(&mtr);
- mtr_start(&mtr);
- /* Restore position on the record, or its
- predecessor if the record was purged
- meanwhile. */
- btr_pcur_restore_position(BTR_SEARCH_LEAF,
- &pcur, &mtr);
- /* Move to the successor of the original record. */
- has_next = btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+ if (online && old_table != new_table) {
+ err = row_log_table_get_error(clust_index);
+ if (err != DB_SUCCESS) {
+ trx->error_key_num = 0;
+ goto func_exit;
+ }
+ }
+#ifdef DBUG_OFF
+# define dbug_run_purge false
+#else /* DBUG_OFF */
+ bool dbug_run_purge = false;
+#endif /* DBUG_OFF */
+ DBUG_EXECUTE_IF(
+ "ib_purge_on_create_index_page_switch",
+ dbug_run_purge = true;);
+
+ if (dbug_run_purge
+ || rw_lock_get_waiters(
+ dict_index_get_lock(clust_index))) {
+ /* There are waiters on the clustered
+ index tree lock, likely the purge
+ thread. Store and restore the cursor
+ position, and yield so that scanning a
+ large table will not starve other
+ threads. */
+
+ /* Store the cursor position on the last user
+ record on the page. */
+ btr_pcur_move_to_prev_on_page(&pcur);
+ /* Leaf pages must never be empty, unless
+ this is the only page in the index tree. */
+ ut_ad(btr_pcur_is_on_user_rec(&pcur)
+ || buf_block_get_page_no(
+ btr_pcur_get_block(&pcur))
+ == clust_index->page);
+
+ btr_pcur_store_position(&pcur, &mtr);
+ mtr_commit(&mtr);
+
+ if (dbug_run_purge) {
+ /* This is for testing
+ purposes only (see
+ DBUG_EXECUTE_IF above). We
+ signal the purge thread and
+ hope that the purge batch will
+ complete before we execute
+ btr_pcur_restore_position(). */
+ trx_purge_run();
+ os_thread_sleep(1000000);
+ }
+
+ /* Give the waiters a chance to proceed. */
+ os_thread_yield();
+
+ mtr_start(&mtr);
+ /* Restore position on the record, or its
+ predecessor if the record was purged
+ meanwhile. */
+ btr_pcur_restore_position(
+ BTR_SEARCH_LEAF, &pcur, &mtr);
+ /* Move to the successor of the
+ original record. */
+ if (!btr_pcur_move_to_next_user_rec(
+ &pcur, &mtr)) {
+end_of_index:
+ row = NULL;
+ mtr_commit(&mtr);
+ mem_heap_free(row_heap);
+ if (nonnull) {
+ mem_free(nonnull);
+ }
+ goto write_buffers;
+ }
+ } else {
+ ulint next_page_no;
+ buf_block_t* block;
+
+ next_page_no = btr_page_get_next(
+ page_cur_get_page(cur), &mtr);
+
+ if (next_page_no == FIL_NULL) {
+ goto end_of_index;
+ }
+
+ block = page_cur_get_block(cur);
+ block = btr_block_get(
+ buf_block_get_space(block),
+ buf_block_get_zip_size(block),
+ next_page_no, BTR_SEARCH_LEAF,
+ clust_index, &mtr);
+
+ btr_leaf_page_release(page_cur_get_block(cur),
+ BTR_SEARCH_LEAF, &mtr);
+ page_cur_set_before_first(block, cur);
+ page_cur_move_to_next(cur);
+
+ ut_ad(!page_cur_is_after_last(cur));
+ }
}
- if (UNIV_LIKELY(has_next)) {
- rec = btr_pcur_get_rec(&pcur);
- offsets = rec_get_offsets(rec, clust_index, NULL,
- ULINT_UNDEFINED, &row_heap);
+ rec = page_cur_get_rec(cur);
+
+ offsets = rec_get_offsets(rec, clust_index, NULL,
+ ULINT_UNDEFINED, &row_heap);
+
+ if (online && new_table != old_table) {
+ /* When rebuilding the table online, perform a
+ REPEATABLE READ, so that row_log_table_apply()
+ will not see a newer state of the table when
+ applying the log. This is mainly to prevent
+ false duplicate key errors, because the log
+ will identify records by the PRIMARY KEY. */
+ ut_ad(trx->read_view);
+
+ if (!read_view_sees_trx_id(
+ trx->read_view,
+ row_get_rec_trx_id(
+ rec, clust_index, offsets))) {
+ rec_t* old_vers;
+
+ row_vers_build_for_consistent_read(
+ rec, &mtr, clust_index, &offsets,
+ trx->read_view, &row_heap,
+ row_heap, &old_vers);
+
+ rec = old_vers;
+
+ if (!rec) {
+ continue;
+ }
+ }
- /* Skip delete marked records. */
if (rec_get_deleted_flag(
- rec, dict_table_is_comp(old_table))) {
+ rec,
+ dict_table_is_comp(old_table))) {
+ /* This record was deleted in the latest
+ committed version, or it was deleted and
+ then reinserted-by-update before purge
+ kicked in. Skip it. */
continue;
}
- srv_n_rows_inserted++;
+ ut_ad(!rec_offs_any_null_extern(rec, offsets));
+ } else if (rec_get_deleted_flag(
+ rec, dict_table_is_comp(old_table))) {
+ /* Skip delete-marked records.
+
+ Skipping delete-marked records will make the
+ created indexes unuseable for transactions
+ whose read views were created before the index
+ creation completed, but preserving the history
+ would make it tricky to detect duplicate
+ keys. */
+ continue;
+ } else if (UNIV_LIKELY_NULL(rec_offs_any_null_extern(
+ rec, offsets))) {
+ /* This is essentially a READ UNCOMMITTED to
+ fetch the most recent version of the record. */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ trx_id_t trx_id;
+ ulint trx_id_offset;
+
+ /* It is possible that the record was
+ just inserted and the off-page columns
+ have not yet been written. We will
+ ignore the record if this is the case,
+ because it should be covered by the
+ index->info.online log in that case. */
+
+ trx_id_offset = clust_index->trx_id_offset;
+ if (!trx_id_offset) {
+ trx_id_offset = row_get_trx_id_offset(
+ clust_index, offsets);
+ }
- /* Build a row based on the clustered index. */
+ trx_id = trx_read_trx_id(rec + trx_id_offset);
+ ut_a(trx_rw_is_active(trx_id, NULL));
+ ut_a(trx_undo_trx_id_is_insert(rec + trx_id_offset));
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- row = row_build(ROW_COPY_POINTERS, clust_index,
- rec, offsets,
- new_table, &ext, row_heap);
+ /* When !online, we are holding an X-lock on
+ old_table, preventing any inserts. */
+ ut_ad(online);
+ continue;
+ }
- if (UNIV_LIKELY_NULL(nonnull)) {
- for (i = 0; i < n_nonnull; i++) {
- dfield_t* field
- = &row->fields[nonnull[i]];
- dtype_t* field_type
- = dfield_get_type(field);
+ /* Build a row based on the clustered index. */
- ut_a(!(field_type->prtype
- & DATA_NOT_NULL));
+ row = row_build(ROW_COPY_POINTERS, clust_index,
+ rec, offsets, new_table,
+ add_cols, col_map, &ext, row_heap);
+ ut_ad(row);
- if (dfield_is_null(field)) {
- err = DB_PRIMARY_KEY_IS_NULL;
- trx->error_key_num = 0;
- goto func_exit;
- }
+ for (ulint i = 0; i < n_nonnull; i++) {
+ const dfield_t* field = &row->fields[nonnull[i]];
- field_type->prtype |= DATA_NOT_NULL;
- }
+ ut_ad(dfield_get_type(field)->prtype & DATA_NOT_NULL);
+
+ if (dfield_is_null(field)) {
+ err = DB_INVALID_NULL;
+ trx->error_key_num = 0;
+ goto func_exit;
}
}
@@ -1439,19 +1503,72 @@ row_merge_read_clustered_index(
doc_id = 0;
}
+ if (add_autoinc != ULINT_UNDEFINED) {
+
+ ut_ad(add_autoinc
+ < dict_table_get_n_user_cols(new_table));
+
+ const dfield_t* dfield;
+
+ dfield = dtuple_get_nth_field(row, add_autoinc);
+ if (dfield_is_null(dfield)) {
+ goto write_buffers;
+ }
+
+ const dtype_t* dtype = dfield_get_type(dfield);
+ byte* b = static_cast<byte*>(dfield_get_data(dfield));
+
+ if (sequence.eof()) {
+ err = DB_ERROR;
+ trx->error_key_num = 0;
+
+ ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_AUTOINC_READ_FAILED, "[NULL]");
+
+ goto func_exit;
+ }
+
+ ulonglong value = sequence++;
+
+ switch (dtype_get_mtype(dtype)) {
+ case DATA_INT: {
+ ibool usign;
+ ulint len = dfield_get_len(dfield);
+
+ usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
+ mach_write_ulonglong(b, value, len, usign);
+
+ break;
+ }
+
+ case DATA_FLOAT:
+ mach_float_write(
+ b, static_cast<float>(value));
+ break;
+
+ case DATA_DOUBLE:
+ mach_double_write(
+ b, static_cast<double>(value));
+ break;
+
+ default:
+ ut_ad(0);
+ }
+ }
+
+write_buffers:
/* Build all entries for all the indexes to be created
in a single scan of the clustered index. */
- for (i = 0; i < n_index; i++) {
+ for (ulint i = 0; i < n_index; i++) {
row_merge_buf_t* buf = merge_buf[i];
merge_file_t* file = &files[i];
- const dict_index_t* index = buf->index;
ulint rows_added = 0;
if (UNIV_LIKELY
(row && (rows_added = row_merge_buf_add(
- buf, fts_index, psort_info,
- row, ext, &doc_id)))) {
+ buf, fts_index, old_table,
+ psort_info, row, ext, &doc_id)))) {
/* If we are creating FTS index,
a single row can generate more
@@ -1464,35 +1581,60 @@ row_merge_read_clustered_index(
continue;
}
- if ((!row || !doc_id)
- && index->type & DICT_FTS) {
+ if ((buf->index->type & DICT_FTS)
+ && (!row || !doc_id)) {
continue;
}
/* The buffer must be sufficiently large
- to hold at least one record. */
- ut_ad(buf->n_tuples || !has_next);
+ to hold at least one record. It may only
+ be empty when we reach the end of the
+ clustered index. row_merge_buf_add()
+ must not have been called in this loop. */
+ ut_ad(buf->n_tuples || row == NULL);
/* We have enough data tuples to form a block.
Sort them and write to disk. */
if (buf->n_tuples) {
- if (dict_index_is_unique(index)) {
- row_merge_dup_t dup;
- dup.index = buf->index;
- dup.table = table;
- dup.n_dup = 0;
+ if (dict_index_is_unique(buf->index)) {
+ row_merge_dup_t dup = {
+ buf->index, table, col_map, 0};
row_merge_buf_sort(buf, &dup);
if (dup.n_dup) {
err = DB_DUPLICATE_KEY;
- trx->error_key_num = i;
- goto func_exit;
+ trx->error_key_num
+ = key_numbers[i];
+ break;
}
} else {
row_merge_buf_sort(buf, NULL);
}
+ } else if (online && new_table == old_table) {
+ /* Note the newest transaction that
+ modified this index when the scan was
+ completed. We prevent older readers
+ from accessing this index, to ensure
+ read consistency. */
+
+ trx_id_t max_trx_id;
+
+ ut_a(row == NULL);
+ rw_lock_x_lock(
+ dict_index_get_lock(buf->index));
+ ut_a(dict_index_get_online_status(buf->index)
+ == ONLINE_INDEX_CREATION);
+
+ max_trx_id = row_log_get_max_trx(buf->index);
+
+ if (max_trx_id > buf->index->trx_id) {
+ buf->index->trx_id = max_trx_id;
+ }
+
+ rw_lock_x_unlock(
+ dict_index_get_lock(buf->index));
}
row_merge_buf_write(buf, file, block);
@@ -1501,7 +1643,7 @@ row_merge_read_clustered_index(
block)) {
err = DB_OUT_OF_FILE_SPACE;
trx->error_key_num = i;
- goto func_exit;
+ break;
}
UNIV_MEM_INVALID(&block[0], srv_sort_buf_size);
@@ -1514,14 +1656,11 @@ row_merge_read_clustered_index(
if (UNIV_UNLIKELY
(!(rows_added = row_merge_buf_add(
- buf, fts_index, psort_info, row,
- ext, &doc_id)))) {
+ buf, fts_index, old_table,
+ psort_info, row, ext,
+ &doc_id)))) {
/* An empty buffer should have enough
- room for at least one record.
- TODO: for FTS index building, we'll
- need to prepared for coping with very
- large text/blob data in a single row
- that could fill up the merge file */
+ room for at least one record. */
ut_error;
}
@@ -1529,27 +1668,40 @@ row_merge_read_clustered_index(
}
}
- mem_heap_empty(row_heap);
+ if (row == NULL) {
+ goto all_done;
+ }
- if (UNIV_UNLIKELY(!has_next)) {
+ if (err != DB_SUCCESS) {
goto func_exit;
}
+
+ mem_heap_empty(row_heap);
}
func_exit:
+ mtr_commit(&mtr);
+ mem_heap_free(row_heap);
+
+ if (nonnull) {
+ mem_free(nonnull);
+ }
+
+all_done:
#ifdef FTS_INTERNAL_DIAG_PRINT
DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Scan Table\n");
#endif
if (fts_pll_sort) {
- for (i = 0; i < fts_sort_pll_degree; i++) {
+ for (ulint i = 0; i < fts_sort_pll_degree; i++) {
psort_info[i].state = FTS_PARENT_COMPLETE;
}
wait_again:
os_event_wait_time_low(fts_parallel_sort_event,
1000000, sig_count);
- for (i = 0; i < fts_sort_pll_degree; i++) {
- if (psort_info[i].child_status != FTS_CHILD_COMPLETE) {
+ for (ulint i = 0; i < fts_sort_pll_degree; i++) {
+ if (psort_info[i].child_status != FTS_CHILD_COMPLETE
+ && psort_info[i].child_status != FTS_CHILD_EXITING) {
sig_count = os_event_reset(
fts_parallel_sort_event);
goto wait_again;
@@ -1560,17 +1712,7 @@ wait_again:
#ifdef FTS_INTERNAL_DIAG_PRINT
DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Tokenization\n");
#endif
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(row_heap);
-
- if (UNIV_LIKELY_NULL(nonnull)) {
- mem_free(nonnull);
- }
-
-
- for (i = 0; i < n_index; i++) {
+ for (ulint i = 0; i < n_index; i++) {
row_merge_buf_free(merge_buf[i]);
}
@@ -1578,10 +1720,13 @@ wait_again:
mem_free(merge_buf);
+ btr_pcur_close(&pcur);
+
/* Update the next Doc ID we used. Table should be locked, so
no concurrent DML */
if (max_doc_id) {
- fts_update_next_doc_id(new_table, old_table->name, max_doc_id);
+ fts_update_next_doc_id(
+ 0, new_table, old_table->name, max_doc_id);
}
trx->op_info = "";
@@ -1590,24 +1735,20 @@ wait_again:
}
/** Write a record via buffer 2 and read the next record to buffer N.
-@param M FTS merge info structure
-@param N index into array of merge info structure
-@param INDEX the FTS index */
-
-
-/** Write a record via buffer 2 and read the next record to buffer N.
@param N number of the buffer (0 or 1)
+@param INDEX record descriptor
@param AT_END statement to execute at end of input */
-#define ROW_MERGE_WRITE_GET_NEXT(N, AT_END) \
+#define ROW_MERGE_WRITE_GET_NEXT(N, INDEX, AT_END) \
do { \
- b2 = row_merge_write_rec(&block[2 * srv_sort_buf_size], &buf[2], b2, \
+ b2 = row_merge_write_rec(&block[2 * srv_sort_buf_size], \
+ &buf[2], b2, \
of->fd, &of->offset, \
mrec##N, offsets##N); \
if (UNIV_UNLIKELY(!b2 || ++of->n_rec > file->n_rec)) { \
goto corrupt; \
} \
- b##N = row_merge_read_rec(&block[N * srv_sort_buf_size], &buf[N], \
- b##N, index, \
+ b##N = row_merge_read_rec(&block[N * srv_sort_buf_size],\
+ &buf[N], b##N, INDEX, \
file->fd, foffs##N, \
&mrec##N, offsets##N); \
if (UNIV_UNLIKELY(!b##N)) { \
@@ -1621,11 +1762,12 @@ wait_again:
/*************************************************************//**
Merge two blocks of records on disk and write a bigger block.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_merge_blocks(
/*=============*/
- const dict_index_t* index, /*!< in: index being created */
+ const row_merge_dup_t* dup, /*!< in: descriptor of
+ index being created */
const merge_file_t* file, /*!< in: file containing
index entries */
row_merge_block_t* block, /*!< in/out: 3 buffers */
@@ -1633,20 +1775,18 @@ row_merge_blocks(
source list in the file */
ulint* foffs1, /*!< in/out: offset of second
source list in the file */
- merge_file_t* of, /*!< in/out: output file */
- struct TABLE* table) /*!< in/out: MySQL table, for
- reporting erroneous key value
- if applicable */
+ merge_file_t* of) /*!< in/out: output file */
{
mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
mrec_buf_t* buf; /*!< buffer for handling
split mrec in block[] */
const byte* b0; /*!< pointer to block[0] */
- const byte* b1; /*!< pointer to block[1] */
- byte* b2; /*!< pointer to block[2] */
+ const byte* b1; /*!< pointer to block[srv_sort_buf_size] */
+ byte* b2; /*!< pointer to block[2 * srv_sort_buf_size] */
const mrec_t* mrec0; /*!< merge rec, points to block[0] or buf[0] */
- const mrec_t* mrec1; /*!< merge rec, points to block[1] or buf[1] */
+ const mrec_t* mrec1; /*!< merge rec, points to
+ block[srv_sort_buf_size] or buf[1] */
ulint* offsets0;/* offsets of mrec0 */
ulint* offsets1;/* offsets of mrec1 */
@@ -1661,7 +1801,7 @@ row_merge_blocks(
}
#endif /* UNIV_DEBUG */
- heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
+ heap = row_merge_heap_create(dup->index, &buf, &offsets0, &offsets1);
/* Write a record and read the next record. Split the output
file in two halves, which can be merged on the following pass. */
@@ -1677,10 +1817,13 @@ corrupt:
b1 = &block[srv_sort_buf_size];
b2 = &block[2 * srv_sort_buf_size];
- b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
- foffs0, &mrec0, offsets0);
- b1 = row_merge_read_rec(&block[srv_sort_buf_size], &buf[srv_sort_buf_size], b1, index, file->fd,
- foffs1, &mrec1, offsets1);
+ b0 = row_merge_read_rec(
+ &block[0], &buf[0], b0, dup->index,
+ file->fd, foffs0, &mrec0, offsets0);
+ b1 = row_merge_read_rec(
+ &block[srv_sort_buf_size],
+ &buf[srv_sort_buf_size], b1, dup->index,
+ file->fd, foffs1, &mrec1, offsets1);
if (UNIV_UNLIKELY(!b0 && mrec0)
|| UNIV_UNLIKELY(!b1 && mrec1)) {
@@ -1688,56 +1831,49 @@ corrupt:
}
while (mrec0 && mrec1) {
- ibool null_eq = FALSE;
- switch (row_merge_cmp(mrec0, mrec1,
- offsets0, offsets1, index,
- &null_eq)) {
+ switch (cmp_rec_rec_simple(
+ mrec0, mrec1, offsets0, offsets1,
+ dup->index, dup->table)) {
case 0:
- if (UNIV_UNLIKELY
- (dict_index_is_unique(index) && !null_eq)) {
- innobase_rec_to_mysql(table, mrec0,
- index, offsets0);
- mem_heap_free(heap);
- return(DB_DUPLICATE_KEY);
- }
- /* fall through */
+ mem_heap_free(heap);
+ return(DB_DUPLICATE_KEY);
case -1:
- ROW_MERGE_WRITE_GET_NEXT(0, goto merged);
+ ROW_MERGE_WRITE_GET_NEXT(0, dup->index, goto merged);
break;
case 1:
- ROW_MERGE_WRITE_GET_NEXT(1, goto merged);
+ ROW_MERGE_WRITE_GET_NEXT(1, dup->index, goto merged);
break;
default:
ut_error;
}
-
}
merged:
if (mrec0) {
/* append all mrec0 to output */
for (;;) {
- ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
+ ROW_MERGE_WRITE_GET_NEXT(0, dup->index, goto done0);
}
}
done0:
if (mrec1) {
/* append all mrec1 to output */
for (;;) {
- ROW_MERGE_WRITE_GET_NEXT(1, goto done1);
+ ROW_MERGE_WRITE_GET_NEXT(1, dup->index, goto done1);
}
}
done1:
mem_heap_free(heap);
- b2 = row_merge_write_eof(&block[2 * srv_sort_buf_size], b2, of->fd, &of->offset);
+ b2 = row_merge_write_eof(&block[2 * srv_sort_buf_size],
+ b2, of->fd, &of->offset);
return(b2 ? DB_SUCCESS : DB_CORRUPTION);
}
/*************************************************************//**
Copy a block of index entries.
@return TRUE on success, FALSE on failure */
-static __attribute__((nonnull))
+static __attribute__((nonnull, warn_unused_result))
ibool
row_merge_blocks_copy(
/*==================*/
@@ -1752,7 +1888,7 @@ row_merge_blocks_copy(
mrec_buf_t* buf; /*!< buffer for handling
split mrec in block[] */
const byte* b0; /*!< pointer to block[0] */
- byte* b2; /*!< pointer to block[2] */
+ byte* b2; /*!< pointer to block[2 * srv_sort_buf_size] */
const mrec_t* mrec0; /*!< merge rec, points to block[0] */
ulint* offsets0;/* offsets of mrec0 */
ulint* offsets1;/* dummy offsets */
@@ -1782,8 +1918,8 @@ corrupt:
b2 = &block[2 * srv_sort_buf_size];
- b0 = row_merge_read_rec(&block[0], &buf[0], b0, index, file->fd,
- foffs0, &mrec0, offsets0);
+ b0 = row_merge_read_rec(&block[0], &buf[0], b0, index,
+ file->fd, foffs0, &mrec0, offsets0);
if (UNIV_UNLIKELY(!b0 && mrec0)) {
goto corrupt;
@@ -1792,7 +1928,7 @@ corrupt:
if (mrec0) {
/* append all mrec0 to output */
for (;;) {
- ROW_MERGE_WRITE_GET_NEXT(0, goto done0);
+ ROW_MERGE_WRITE_GET_NEXT(0, index, goto done0);
}
}
done0:
@@ -1802,7 +1938,8 @@ done0:
(*foffs0)++;
mem_heap_free(heap);
- return(row_merge_write_eof(&block[2 * srv_sort_buf_size], b2, of->fd, &of->offset)
+ return(row_merge_write_eof(&block[2 * srv_sort_buf_size],
+ b2, of->fd, &of->offset)
!= NULL);
}
@@ -1810,18 +1947,16 @@ done0:
Merge disk files.
@return DB_SUCCESS or error code */
static __attribute__((nonnull))
-ulint
+dberr_t
row_merge(
/*======*/
trx_t* trx, /*!< in: transaction */
- const dict_index_t* index, /*!< in: index being created */
+ const row_merge_dup_t* dup, /*!< in: descriptor of
+ index being created */
merge_file_t* file, /*!< in/out: file containing
index entries */
row_merge_block_t* block, /*!< in/out: 3 buffers */
int* tmpfd, /*!< in/out: temporary file handle */
- struct TABLE* table, /*!< in/out: MySQL table, for
- reporting erroneous key value
- if applicable */
ulint* num_run,/*!< in/out: Number of runs remain
to be merged */
ulint* run_offset) /*!< in/out: Array contains the
@@ -1830,7 +1965,7 @@ row_merge(
{
ulint foffs0; /*!< first input offset */
ulint foffs1; /*!< second input offset */
- ulint error; /*!< error code */
+ dberr_t error; /*!< error code */
merge_file_t of; /*!< output file */
const ulint ihalf = run_offset[*num_run / 2];
/*!< half the input file */
@@ -1861,15 +1996,15 @@ row_merge(
for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) {
- if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
+ if (trx_is_interrupted(trx)) {
return(DB_INTERRUPTED);
}
/* Remember the offset number for this run */
run_offset[n_run++] = of.offset;
- error = row_merge_blocks(index, file, block,
- &foffs0, &foffs1, &of, table);
+ error = row_merge_blocks(dup, file, block,
+ &foffs0, &foffs1, &of);
if (error != DB_SUCCESS) {
return(error);
@@ -1887,7 +2022,8 @@ row_merge(
/* Remember the offset number for this run */
run_offset[n_run++] = of.offset;
- if (!row_merge_blocks_copy(index, file, block, &foffs0, &of)) {
+ if (!row_merge_blocks_copy(dup->index, file, block,
+ &foffs0, &of)) {
return(DB_CORRUPTION);
}
}
@@ -1895,14 +2031,15 @@ row_merge(
ut_ad(foffs0 == ihalf);
while (foffs1 < file->offset) {
- if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
+ if (trx_is_interrupted(trx)) {
return(DB_INTERRUPTED);
}
/* Remember the offset number for this run */
run_offset[n_run++] = of.offset;
- if (!row_merge_blocks_copy(index, file, block, &foffs1, &of)) {
+ if (!row_merge_blocks_copy(dup->index, file, block,
+ &foffs1, &of)) {
return(DB_CORRUPTION);
}
}
@@ -1940,23 +2077,21 @@ row_merge(
Merge disk files.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
row_merge_sort(
/*===========*/
trx_t* trx, /*!< in: transaction */
- const dict_index_t* index, /*!< in: index being created */
+ const row_merge_dup_t* dup, /*!< in: descriptor of
+ index being created */
merge_file_t* file, /*!< in/out: file containing
index entries */
row_merge_block_t* block, /*!< in/out: 3 buffers */
- int* tmpfd, /*!< in/out: temporary file handle */
- struct TABLE* table) /*!< in/out: MySQL table, for
- reporting erroneous key value
- if applicable */
+ int* tmpfd) /*!< in/out: temporary file handle */
{
- ulint half = file->offset / 2;
- ulint num_runs;
- ulint* run_offset;
- ulint error = DB_SUCCESS;
+ const ulint half = file->offset / 2;
+ ulint num_runs;
+ ulint* run_offset;
+ dberr_t error = DB_SUCCESS;
/* Record the number of merge runs we need to perform */
num_runs = file->offset;
@@ -1979,14 +2114,14 @@ row_merge_sort(
/* Merge the runs until we have one big run */
do {
- error = row_merge(trx, index, file, block, tmpfd,
- table, &num_runs, run_offset);
-
- UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
+ error = row_merge(trx, dup, file, block, tmpfd,
+ &num_runs, run_offset);
if (error != DB_SUCCESS) {
break;
}
+
+ UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
} while (num_runs > 1);
mem_free(run_offset);
@@ -1995,8 +2130,25 @@ row_merge_sort(
}
/*************************************************************//**
+Set blob fields empty */
+static __attribute__((nonnull))
+void
+row_merge_set_blob_empty(
+/*=====================*/
+ dtuple_t* tuple) /*!< in/out: data tuple */
+{
+ for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) {
+ dfield_t* field = dtuple_get_nth_field(tuple, i);
+
+ if (dfield_is_ext(field)) {
+ dfield_set_data(field, NULL, 0);
+ }
+ }
+}
+
+/*************************************************************//**
Copy externally stored columns to the data tuple. */
-static
+static __attribute__((nonnull))
void
row_merge_copy_blobs(
/*=================*/
@@ -2006,10 +2158,9 @@ row_merge_copy_blobs(
dtuple_t* tuple, /*!< in/out: data tuple */
mem_heap_t* heap) /*!< in/out: memory heap */
{
- ulint i;
- ulint n_fields = dtuple_get_n_fields(tuple);
+ ut_ad(rec_offs_any_extern(offsets));
- for (i = 0; i < n_fields; i++) {
+ for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) {
ulint len;
const void* data;
dfield_t* field = dtuple_get_nth_field(tuple, i);
@@ -2020,11 +2171,12 @@ row_merge_copy_blobs(
ut_ad(!dfield_is_null(field));
- /* The table is locked during index creation.
- Therefore, externally stored columns cannot possibly
- be freed between the time the BLOB pointers are read
- (row_merge_read_clustered_index()) and dereferenced
- (below). */
+ /* During the creation of a PRIMARY KEY, the table is
+ X-locked, and we skip copying records that have been
+ marked for deletion. Therefore, externally stored
+ columns cannot possibly be freed between the time the
+ BLOB pointers are read (row_merge_read_clustered_index())
+ and dereferenced (below). */
data = btr_rec_copy_externally_stored_field(
mrec, offsets, zip_size, i, &len, heap);
/* Because we have locked the table, any records
@@ -2041,54 +2193,38 @@ row_merge_copy_blobs(
Read sorted file containing index data tuples and insert these data
tuples to the index
@return DB_SUCCESS or error number */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_merge_insert_index_tuples(
/*==========================*/
- trx_t* trx, /*!< in: transaction */
+ trx_id_t trx_id, /*!< in: transaction identifier */
dict_index_t* index, /*!< in: index */
- dict_table_t* table, /*!< in: new table */
- ulint zip_size,/*!< in: compressed page size of
- the old table, or 0 if uncompressed */
+ const dict_table_t* old_table,/*!< in: old table */
int fd, /*!< in: file descriptor */
row_merge_block_t* block) /*!< in/out: file buffer */
{
const byte* b;
- que_thr_t* thr;
- ins_node_t* node;
+ mem_heap_t* heap;
mem_heap_t* tuple_heap;
- mem_heap_t* graph_heap;
- ulint error = DB_SUCCESS;
+ mem_heap_t* ins_heap;
+ dberr_t error = DB_SUCCESS;
ulint foffs = 0;
ulint* offsets;
+ mrec_buf_t* buf;
- ut_ad(trx);
- ut_ad(index);
- ut_ad(table);
-
+ ut_ad(!srv_read_only_mode);
ut_ad(!(index->type & DICT_FTS));
-
- /* We use the insert query graph as the dummy graph
- needed in the row module call */
-
- trx->op_info = "inserting index entries";
-
- graph_heap = mem_heap_create(500 + sizeof(mrec_buf_t));
- node = ins_node_create(INS_DIRECT, table, graph_heap);
-
- thr = pars_complete_graph_for_exec(node, trx, graph_heap);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
+ ut_ad(trx_id);
tuple_heap = mem_heap_create(1000);
{
ulint i = 1 + REC_OFFS_HEADER_SIZE
+ dict_index_get_n_fields(index);
-
+ heap = mem_heap_create(sizeof *buf + i * sizeof *offsets);
+ ins_heap = mem_heap_create(sizeof *buf + i * sizeof *offsets);
offsets = static_cast<ulint*>(
- mem_heap_alloc(graph_heap, i * sizeof *offsets));
-
+ mem_heap_alloc(heap, i * sizeof *offsets));
offsets[0] = i;
offsets[1] = dict_index_get_n_fields(index);
}
@@ -2098,15 +2234,17 @@ row_merge_insert_index_tuples(
if (!row_merge_read(fd, foffs, block)) {
error = DB_CORRUPTION;
} else {
- mrec_buf_t* buf;
-
buf = static_cast<mrec_buf_t*>(
- mem_heap_alloc(graph_heap, sizeof *buf));
+ mem_heap_alloc(heap, sizeof *buf));
for (;;) {
const mrec_t* mrec;
dtuple_t* dtuple;
ulint n_ext;
+ big_rec_t* big_rec;
+ rec_t* rec;
+ btr_cur_t cursor;
+ mtr_t mtr;
b = row_merge_read_rec(block, buf, b, index,
fd, &foffs, &mrec, offsets);
@@ -2118,55 +2256,164 @@ row_merge_insert_index_tuples(
break;
}
+ dict_index_t* old_index
+ = dict_table_get_first_index(old_table);
+
+ if (dict_index_is_clust(index)
+ && dict_index_is_online_ddl(old_index)) {
+ error = row_log_table_get_error(old_index);
+ if (error != DB_SUCCESS) {
+ break;
+ }
+ }
+
dtuple = row_rec_to_index_entry_low(
mrec, index, offsets, &n_ext, tuple_heap);
- if (UNIV_UNLIKELY(n_ext)) {
- row_merge_copy_blobs(mrec, offsets, zip_size,
- dtuple, tuple_heap);
- }
+ if (!n_ext) {
+ /* There are no externally stored columns. */
+ } else if (!dict_index_is_online_ddl(old_index)) {
+ ut_ad(dict_index_is_clust(index));
+ /* Modifications to the table are
+ blocked while we are not rebuilding it
+ or creating indexes. Off-page columns
+ can be fetched safely. */
+ row_merge_copy_blobs(
+ mrec, offsets,
+ dict_table_zip_size(old_table),
+ dtuple, tuple_heap);
+ } else {
+ ut_ad(dict_index_is_clust(index));
- node->row = dtuple;
- node->table = table;
- node->trx_id = trx->id;
+ ulint offset = index->trx_id_offset;
- ut_ad(dtuple_validate(dtuple));
+ if (!offset) {
+ offset = row_get_trx_id_offset(
+ index, offsets);
+ }
- do {
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
+ /* Copy the off-page columns while
+ holding old_index->lock, so
+ that they cannot be freed by
+ a rollback of a fresh insert. */
+ rw_lock_s_lock(&old_index->lock);
+
+ if (row_log_table_is_rollback(
+ old_index,
+ trx_read_trx_id(mrec + offset))) {
+ /* The row and BLOB could
+ already be freed. They
+ will be deleted by
+ row_undo_ins_remove_clust_rec
+ when rolling back a fresh
+ insert. So, no need to retrieve
+ the off-page column. */
+ row_merge_set_blob_empty(
+ dtuple);
+ } else {
+ row_merge_copy_blobs(
+ mrec, offsets,
+ dict_table_zip_size(old_table),
+ dtuple, tuple_heap);
+ }
- error = row_ins_index_entry(index, dtuple,
- 0, FALSE, thr);
+ rw_lock_s_unlock(&old_index->lock);
+ }
- if (UNIV_LIKELY(error == DB_SUCCESS)) {
+ ut_ad(dtuple_validate(dtuple));
+ log_free_check();
- goto next_rec;
- }
+ mtr_start(&mtr);
+ /* Insert after the last user record. */
+ btr_cur_open_at_index_side(
+ false, index, BTR_MODIFY_LEAF,
+ &cursor, 0, &mtr);
+ page_cur_position(
+ page_rec_get_prev(btr_cur_get_rec(&cursor)),
+ btr_cur_get_block(&cursor),
+ btr_cur_get_page_cur(&cursor));
+ cursor.flag = BTR_CUR_BINARY;
+#ifdef UNIV_DEBUG
+ /* Check that the records are inserted in order. */
+ rec = btr_cur_get_rec(&cursor);
+
+ if (!page_rec_is_infimum(rec)) {
+ ulint* rec_offsets = rec_get_offsets(
+ rec, index, offsets,
+ ULINT_UNDEFINED, &tuple_heap);
+ ut_ad(cmp_dtuple_rec(dtuple, rec, rec_offsets)
+ > 0);
+ }
+#endif /* UNIV_DEBUG */
+ ulint* ins_offsets = NULL;
+
+ error = btr_cur_optimistic_insert(
+ BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG,
+ &cursor, &ins_offsets, &ins_heap,
+ dtuple, &rec, &big_rec, 0, NULL, &mtr);
+
+ if (error == DB_FAIL) {
+ ut_ad(!big_rec);
+ mtr_commit(&mtr);
+ mtr_start(&mtr);
+ btr_cur_open_at_index_side(
+ false, index, BTR_MODIFY_TREE,
+ &cursor, 0, &mtr);
+ page_cur_position(
+ page_rec_get_prev(btr_cur_get_rec(
+ &cursor)),
+ btr_cur_get_block(&cursor),
+ btr_cur_get_page_cur(&cursor));
+
+ error = btr_cur_pessimistic_insert(
+ BTR_NO_UNDO_LOG_FLAG
+ | BTR_NO_LOCKING_FLAG
+ | BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG,
+ &cursor, &ins_offsets, &ins_heap,
+ dtuple, &rec, &big_rec, 0, NULL, &mtr);
+ }
+
+ if (!dict_index_is_clust(index)) {
+ page_update_max_trx_id(
+ btr_cur_get_block(&cursor),
+ btr_cur_get_page_zip(&cursor),
+ trx_id, &mtr);
+ }
- thr->lock_state = QUE_THR_LOCK_ROW;
+ mtr_commit(&mtr);
- trx->error_state = static_cast<enum db_err>(
- error);
+ if (UNIV_LIKELY_NULL(big_rec)) {
+ /* If the system crashes at this
+ point, the clustered index record will
+ contain a null BLOB pointer. This
+ should not matter, because the copied
+ table will be dropped on crash
+ recovery anyway. */
+
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(error == DB_SUCCESS);
+ error = row_ins_index_entry_big_rec(
+ dtuple, big_rec,
+ ins_offsets, &ins_heap,
+ index, NULL, __FILE__, __LINE__);
+ dtuple_convert_back_big_rec(
+ index, dtuple, big_rec);
+ }
- que_thr_stop_for_mysql(thr);
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
- } while (row_mysql_handle_errors(&error, trx,
- thr, NULL));
+ if (error != DB_SUCCESS) {
+ goto err_exit;
+ }
- goto err_exit;
-next_rec:
mem_heap_empty(tuple_heap);
+ mem_heap_empty(ins_heap);
}
}
- que_thr_stop_for_mysql_no_error(thr, trx);
err_exit:
- que_graph_free(thr->graph);
-
- trx->op_info = "";
-
mem_heap_free(tuple_heap);
+ mem_heap_free(ins_heap);
+ mem_heap_free(heap);
return(error);
}
@@ -2175,7 +2422,7 @@ err_exit:
Sets an exclusive lock on a table, for the duration of creating indexes.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_merge_lock_table(
/*=================*/
trx_t* trx, /*!< in/out: transaction */
@@ -2184,10 +2431,10 @@ row_merge_lock_table(
{
mem_heap_t* heap;
que_thr_t* thr;
- ulint err;
+ dberr_t err;
sel_node_t* node;
- ut_ad(trx);
+ ut_ad(!srv_read_only_mode);
ut_ad(mode == LOCK_X || mode == LOCK_S);
heap = mem_heap_create(512);
@@ -2213,7 +2460,7 @@ run_again:
err = lock_table(0, table, mode, thr);
- trx->error_state =static_cast<enum db_err>( err);
+ trx->error_state = err;
if (UNIV_LIKELY(err == DB_SUCCESS)) {
que_thr_stop_for_mysql_no_error(thr, trx);
@@ -2221,7 +2468,7 @@ run_again:
que_thr_stop_for_mysql(thr);
if (err != DB_QUE_THR_SUSPENDED) {
- ibool was_lock_wait;
+ bool was_lock_wait;
was_lock_wait = row_mysql_handle_errors(
&err, trx, thr, NULL);
@@ -2255,105 +2502,312 @@ run_again:
}
/*********************************************************************//**
-Drop an index from the InnoDB system tables. The data dictionary must
-have been locked exclusively by the caller, because the transaction
-will not be committed. */
-UNIV_INTERN
+Drop an index that was created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
+static
void
-row_merge_drop_index(
-/*=================*/
- dict_index_t* index, /*!< in: index to be removed */
- dict_table_t* table, /*!< in: table */
- trx_t* trx) /*!< in: transaction handle */
+row_merge_drop_index_dict(
+/*======================*/
+ trx_t* trx, /*!< in/out: dictionary transaction */
+ index_id_t index_id)/*!< in: index identifier */
{
- db_err err;
- pars_info_t* info = pars_info_create();
-
- /* We use the private SQL parser of Innobase to generate the
- query graphs needed in deleting the dictionary data from system
- tables in Innobase. Deleting a row from SYS_INDEXES table also
- frees the file segments of the B-tree associated with the index. */
-
static const char sql[] =
"PROCEDURE DROP_INDEX_PROC () IS\n"
"BEGIN\n"
- /* Rename the index, so that it will be dropped by
- row_merge_drop_temp_indexes() at crash recovery
- if the server crashes before this trx is committed. */
- "UPDATE SYS_INDEXES SET NAME=CONCAT('"
- TEMP_INDEX_PREFIX_STR "', NAME) WHERE ID = :indexid;\n"
- "COMMIT WORK;\n"
- /* Drop the field definitions of the index. */
- "DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
- /* Drop the index definition and the B-tree. */
- "DELETE FROM SYS_INDEXES WHERE ID = :indexid;\n"
+ "DELETE FROM SYS_FIELDS WHERE INDEX_ID=:indexid;\n"
+ "DELETE FROM SYS_INDEXES WHERE ID=:indexid;\n"
"END;\n";
+ dberr_t error;
+ pars_info_t* info;
- ut_ad(index && table && trx);
+ ut_ad(!srv_read_only_mode);
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
- pars_info_add_ull_literal(info, "indexid", index->id);
+ info = pars_info_create();
+ pars_info_add_ull_literal(info, "indexid", index_id);
+ trx->op_info = "dropping index from dictionary";
+ error = que_eval_sql(info, sql, FALSE, trx);
- trx_start_if_not_started_xa(trx);
- trx->op_info = "dropping index";
+ if (error != DB_SUCCESS) {
+ /* Even though we ensure that DDL transactions are WAIT
+ and DEADLOCK free, we could encounter other errors e.g.,
+ DB_TOO_MANY_CONCURRENT_TRXS. */
+ trx->error_state = DB_SUCCESS;
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: Error: row_merge_drop_index_dict "
+ "failed with error code: %u.\n", (unsigned) error);
+ }
- err = static_cast<db_err>(que_eval_sql(info, sql, FALSE, trx));
+ trx->op_info = "";
+}
- DBUG_EXECUTE_IF(
- "ib_drop_index_too_many_concurrent_trxs",
- err = DB_TOO_MANY_CONCURRENT_TRXS;
- trx->error_state = err;);
+/*********************************************************************//**
+Drop indexes that were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
+UNIV_INTERN
+void
+row_merge_drop_indexes_dict(
+/*========================*/
+ trx_t* trx, /*!< in/out: dictionary transaction */
+ table_id_t table_id)/*!< in: table identifier */
+{
+ static const char sql[] =
+ "PROCEDURE DROP_INDEXES_PROC () IS\n"
+ "ixid CHAR;\n"
+ "found INT;\n"
- if (err == DB_SUCCESS) {
+ "DECLARE CURSOR index_cur IS\n"
+ " SELECT ID FROM SYS_INDEXES\n"
+ " WHERE TABLE_ID=:tableid AND\n"
+ " SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "'\n"
+ "FOR UPDATE;\n"
- /* If it is FTS index, drop from table->fts and also drop
- its auxiliary tables */
- if (index->type & DICT_FTS) {
- ut_a(table->fts);
- fts_drop_index(table, index, trx);
- }
+ "BEGIN\n"
+ "found := 1;\n"
+ "OPEN index_cur;\n"
+ "WHILE found = 1 LOOP\n"
+ " FETCH index_cur INTO ixid;\n"
+ " IF (SQL % NOTFOUND) THEN\n"
+ " found := 0;\n"
+ " ELSE\n"
+ " DELETE FROM SYS_FIELDS WHERE INDEX_ID=ixid;\n"
+ " DELETE FROM SYS_INDEXES WHERE CURRENT OF index_cur;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE index_cur;\n"
- /* Replace this index with another equivalent index for all
- foreign key constraints on this table where this index is
- used */
+ "END;\n";
+ dberr_t error;
+ pars_info_t* info;
- dict_table_replace_index_in_foreign_list(table, index, trx);
- dict_index_remove_from_cache(table, index);
+ ut_ad(!srv_read_only_mode);
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
- } else {
+ /* It is possible that table->n_ref_count > 1 when
+ locked=TRUE. In this case, all code that should have an open
+ handle to the table be waiting for the next statement to execute,
+ or waiting for a meta-data lock.
+
+ A concurrent purge will be prevented by dict_operation_lock. */
+
+ info = pars_info_create();
+ pars_info_add_ull_literal(info, "tableid", table_id);
+ trx->op_info = "dropping indexes";
+ error = que_eval_sql(info, sql, FALSE, trx);
+
+ if (error != DB_SUCCESS) {
/* Even though we ensure that DDL transactions are WAIT
and DEADLOCK free, we could encounter other errors e.g.,
- DB_TOO_MANY_TRANSACTIONS. */
+ DB_TOO_MANY_CONCURRENT_TRXS. */
trx->error_state = DB_SUCCESS;
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: row_merge_drop_index failed "
- "with error code: %lu.\n", (ulint) err);
+ fprintf(stderr, " InnoDB: Error: row_merge_drop_indexes_dict "
+ "failed with error code: %u.\n", (unsigned) error);
}
trx->op_info = "";
}
/*********************************************************************//**
-Drop those indexes which were created before an error occurred when
-building an index. The data dictionary must have been locked
-exclusively by the caller, because the transaction will not be
-committed. */
+Drop indexes that were created before an error occurred.
+The data dictionary must have been locked exclusively by the caller,
+because the transaction will not be committed. */
UNIV_INTERN
void
row_merge_drop_indexes(
/*===================*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table, /*!< in: table containing the indexes */
- dict_index_t** index, /*!< in: indexes to drop */
- ulint num_created) /*!< in: number of elements in index[] */
+ trx_t* trx, /*!< in/out: dictionary transaction */
+ dict_table_t* table, /*!< in/out: table containing the indexes */
+ ibool locked) /*!< in: TRUE=table locked,
+ FALSE=may need to do a lazy drop */
{
- ulint key_num;
+ dict_index_t* index;
+ dict_index_t* next_index;
+
+ ut_ad(!srv_read_only_mode);
+ ut_ad(mutex_own(&dict_sys->mutex));
+ ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ index = dict_table_get_first_index(table);
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_COMPLETE);
+
+ /* the caller should have an open handle to the table */
+ ut_ad(table->n_ref_count >= 1);
+
+ /* It is possible that table->n_ref_count > 1 when
+ locked=TRUE. In this case, all code that should have an open
+ handle to the table be waiting for the next statement to execute,
+ or waiting for a meta-data lock.
+
+ A concurrent purge will be prevented by dict_operation_lock. */
+
+ if (!locked && table->n_ref_count > 1) {
+ /* We will have to drop the indexes later, when the
+ table is guaranteed to be no longer in use. Mark the
+ indexes as incomplete and corrupted, so that other
+ threads will stop using them. Let dict_table_close()
+ or crash recovery or the next invocation of
+ prepare_inplace_alter_table() take care of dropping
+ the indexes. */
+
+ while ((index = dict_table_get_next_index(index)) != NULL) {
+ ut_ad(!dict_index_is_clust(index));
+
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ continue;
+ case ONLINE_INDEX_COMPLETE:
+ if (*index->name != TEMP_INDEX_PREFIX) {
+ /* Do nothing to already
+ published indexes. */
+ } else if (index->type & DICT_FTS) {
+ /* Drop a completed FULLTEXT
+ index, due to a timeout during
+ MDL upgrade for
+ commit_inplace_alter_table().
+ Because only concurrent reads
+ are allowed (and they are not
+ seeing this index yet) we
+ are safe to drop the index. */
+ dict_index_t* prev = UT_LIST_GET_PREV(
+ indexes, index);
+ /* At least there should be
+ the clustered index before
+ this one. */
+ ut_ad(prev);
+ ut_a(table->fts);
+ fts_drop_index(table, index, trx);
+ /* Since
+ INNOBASE_SHARE::idx_trans_tbl
+ is shared between all open
+ ha_innobase handles to this
+ table, no thread should be
+ accessing this dict_index_t
+ object. Also, we should be
+ holding LOCK=SHARED MDL on the
+ table even after the MDL
+ upgrade timeout. */
+
+ /* We can remove a DICT_FTS
+ index from the cache, because
+ we do not allow ADD FULLTEXT INDEX
+ with LOCK=NONE. If we allowed that,
+ we should exclude FTS entries from
+ prebuilt->ins_node->entry_list
+ in ins_node_create_entry_list(). */
+ dict_index_remove_from_cache(
+ table, index);
+ index = prev;
+ } else {
+ rw_lock_x_lock(
+ dict_index_get_lock(index));
+ dict_index_set_online_status(
+ index, ONLINE_INDEX_ABORTED);
+ index->type |= DICT_CORRUPT;
+ table->drop_aborted = TRUE;
+ goto drop_aborted;
+ }
+ continue;
+ case ONLINE_INDEX_CREATION:
+ rw_lock_x_lock(dict_index_get_lock(index));
+ ut_ad(*index->name == TEMP_INDEX_PREFIX);
+ row_log_abort_sec(index);
+ drop_aborted:
+ rw_lock_x_unlock(dict_index_get_lock(index));
+
+ DEBUG_SYNC_C("merge_drop_index_after_abort");
+ /* covered by dict_sys->mutex */
+ MONITOR_INC(MONITOR_BACKGROUND_DROP_INDEX);
+ /* fall through */
+ case ONLINE_INDEX_ABORTED:
+ /* Drop the index tree from the
+ data dictionary and free it from
+ the tablespace, but keep the object
+ in the data dictionary cache. */
+ row_merge_drop_index_dict(trx, index->id);
+ rw_lock_x_lock(dict_index_get_lock(index));
+ dict_index_set_online_status(
+ index, ONLINE_INDEX_ABORTED_DROPPED);
+ rw_lock_x_unlock(dict_index_get_lock(index));
+ table->drop_aborted = TRUE;
+ continue;
+ }
+ ut_error;
+ }
- for (key_num = 0; key_num < num_created; key_num++) {
- row_merge_drop_index(index[key_num], table, trx);
+ return;
}
+
+ row_merge_drop_indexes_dict(trx, table->id);
+
+ /* Invalidate all row_prebuilt_t::ins_graph that are referring
+ to this table. That is, force row_get_prebuilt_insert_row() to
+ rebuild prebuilt->ins_node->entry_list). */
+ ut_ad(table->def_trx_id <= trx->id);
+ table->def_trx_id = trx->id;
+
+ next_index = dict_table_get_next_index(index);
+
+ while ((index = next_index) != NULL) {
+ /* read the next pointer before freeing the index */
+ next_index = dict_table_get_next_index(index);
+
+ ut_ad(!dict_index_is_clust(index));
+
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ /* If it is FTS index, drop from table->fts
+ and also drop its auxiliary tables */
+ if (index->type & DICT_FTS) {
+ ut_a(table->fts);
+ fts_drop_index(table, index, trx);
+ }
+
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_CREATION:
+ /* This state should only be possible
+ when prepare_inplace_alter_table() fails
+ after invoking row_merge_create_index().
+ In inplace_alter_table(),
+ row_merge_build_indexes()
+ should never leave the index in this state.
+ It would invoke row_log_abort_sec() on
+ failure. */
+ case ONLINE_INDEX_COMPLETE:
+ /* In these cases, we are able to drop
+ the index straight. The DROP INDEX was
+ never deferred. */
+ break;
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ /* covered by dict_sys->mutex */
+ MONITOR_DEC(MONITOR_BACKGROUND_DROP_INDEX);
+ }
+
+ dict_index_remove_from_cache(table, index);
+ }
+ }
+
+ table->drop_aborted = FALSE;
+ ut_d(dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE));
}
/*********************************************************************//**
@@ -2363,9 +2817,32 @@ void
row_merge_drop_temp_indexes(void)
/*=============================*/
{
- trx_t* trx;
- btr_pcur_t pcur;
- mtr_t mtr;
+ static const char sql[] =
+ "PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
+ "ixid CHAR;\n"
+ "found INT;\n"
+
+ "DECLARE CURSOR index_cur IS\n"
+ " SELECT ID FROM SYS_INDEXES\n"
+ " WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "'\n"
+ "FOR UPDATE;\n"
+
+ "BEGIN\n"
+ "found := 1;\n"
+ "OPEN index_cur;\n"
+ "WHILE found = 1 LOOP\n"
+ " FETCH index_cur INTO ixid;\n"
+ " IF (SQL % NOTFOUND) THEN\n"
+ " found := 0;\n"
+ " ELSE\n"
+ " DELETE FROM SYS_FIELDS WHERE INDEX_ID=ixid;\n"
+ " DELETE FROM SYS_INDEXES WHERE CURRENT OF index_cur;\n"
+ " END IF;\n"
+ "END LOOP;\n"
+ "CLOSE index_cur;\n"
+ "END;\n";
+ trx_t* trx;
+ dberr_t error;
/* Load the table definitions that contain partially defined
indexes, so that the data dictionary information can be checked
@@ -2373,75 +2850,26 @@ row_merge_drop_temp_indexes(void)
trx = trx_allocate_for_background();
trx->op_info = "dropping partially created indexes";
row_mysql_lock_data_dictionary(trx);
+ /* Ensure that this transaction will be rolled back and locks
+ will be released, if the server gets killed before the commit
+ gets written to the redo log. */
+ trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
- mtr_start(&mtr);
-
- btr_pcur_open_at_index_side(
- TRUE,
- dict_table_get_first_index(dict_sys->sys_indexes),
- BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
-
- for (;;) {
- const rec_t* rec;
- const byte* field;
- ulint len;
- table_id_t table_id;
- dict_table_t* table;
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- break;
- }
-
- rec = btr_pcur_get_rec(&pcur);
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__NAME, &len);
- if (len == UNIV_SQL_NULL || len == 0
- || (char) *field != TEMP_INDEX_PREFIX) {
- continue;
- }
-
- /* This is a temporary index. */
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len);
- if (len != 8) {
- /* Corrupted TABLE_ID */
- continue;
- }
-
- table_id = mach_read_from_8(field);
-
- btr_pcur_store_position(&pcur, &mtr);
- btr_pcur_commit_specify_mtr(&pcur, &mtr);
-
- table = dict_table_open_on_id(table_id, TRUE);
+ trx->op_info = "dropping indexes";
+ error = que_eval_sql(NULL, sql, FALSE, trx);
- if (table) {
- dict_index_t* index;
- dict_index_t* next_index;
-
- for (index = dict_table_get_first_index(table);
- index; index = next_index) {
-
- next_index = dict_table_get_next_index(index);
-
- if (*index->name == TEMP_INDEX_PREFIX) {
- row_merge_drop_index(index, table, trx);
- trx_commit_for_mysql(trx);
- }
- }
-
- dict_table_close(table, TRUE);
- }
+ if (error != DB_SUCCESS) {
+ /* Even though we ensure that DDL transactions are WAIT
+ and DEADLOCK free, we could encounter other errors e.g.,
+ DB_TOO_MANY_CONCURRENT_TRXS. */
+ trx->error_state = DB_SUCCESS;
- mtr_start(&mtr);
- btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: Error: row_merge_drop_temp_indexes "
+ "failed with error code: %u.\n", (unsigned) error);
}
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
+ trx_commit_for_mysql(trx);
row_mysql_unlock_data_dictionary(trx);
trx_free_for_background(trx);
}
@@ -2449,8 +2877,8 @@ row_merge_drop_temp_indexes(void)
/*********************************************************************//**
Creates temporary merge files, and if UNIV_PFS_IO defined, register
the file descriptor with Performance Schema.
-@return File descriptor */
-UNIV_INLINE
+@return file descriptor, or -1 on failure */
+UNIV_INTERN
int
row_merge_file_create_low(void)
/*===========================*/
@@ -2469,31 +2897,43 @@ row_merge_file_create_low(void)
#endif
fd = innobase_mysql_tmpfile();
#ifdef UNIV_PFS_IO
- register_pfs_file_open_end(locker, fd);
+ register_pfs_file_open_end(locker, fd);
#endif
+
+ if (fd < 0) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot create temporary merge file");
+ return -1;
+ }
return(fd);
}
/*********************************************************************//**
-Create a merge file. */
+Create a merge file.
+@return file descriptor, or -1 on failure */
UNIV_INTERN
-void
+int
row_merge_file_create(
/*==================*/
merge_file_t* merge_file) /*!< out: merge file structure */
{
merge_file->fd = row_merge_file_create_low();
- if (srv_disable_sort_file_cache) {
- os_file_set_nocache(merge_file->fd, "row0merge.c", "sort");
- }
merge_file->offset = 0;
merge_file->n_rec = 0;
+
+ if (merge_file->fd >= 0) {
+ if (srv_disable_sort_file_cache) {
+ os_file_set_nocache(merge_file->fd,
+ "row0merge.cc", "sort");
+ }
+ }
+ return(merge_file->fd);
}
/*********************************************************************//**
Destroy a merge file. And de-register the file from Performance Schema
if UNIV_PFS_IO is defined. */
-UNIV_INLINE
+UNIV_INTERN
void
row_merge_file_destroy_low(
/*=======================*/
@@ -2506,7 +2946,9 @@ row_merge_file_destroy_low(
fd, 0, PSI_FILE_CLOSE,
__FILE__, __LINE__);
#endif
- close(fd);
+ if (fd >= 0) {
+ close(fd);
+ }
#ifdef UNIV_PFS_IO
register_pfs_file_io_end(locker, 0);
#endif
@@ -2517,8 +2959,10 @@ UNIV_INTERN
void
row_merge_file_destroy(
/*===================*/
- merge_file_t* merge_file) /*!< out: merge file structure */
+ merge_file_t* merge_file) /*!< in/out: merge file structure */
{
+ ut_ad(!srv_read_only_mode);
+
if (merge_file->fd != -1) {
row_merge_file_destroy_low(merge_file->fd);
merge_file->fd = -1;
@@ -2526,173 +2970,109 @@ row_merge_file_destroy(
}
/*********************************************************************//**
-Determine the precise type of a column that is added to a tem
-if a column must be constrained NOT NULL.
-@return col->prtype, possibly ORed with DATA_NOT_NULL */
-UNIV_INLINE
-ulint
-row_merge_col_prtype(
-/*=================*/
- const dict_col_t* col, /*!< in: column */
- const char* col_name, /*!< in: name of the column */
- const merge_index_def_t*index_def) /*!< in: the index definition
- of the primary key */
-{
- ulint prtype = col->prtype;
- ulint i;
-
- ut_ad(index_def->ind_type & DICT_CLUSTERED);
-
- if (prtype & DATA_NOT_NULL) {
-
- return(prtype);
- }
-
- /* All columns that are included
- in the PRIMARY KEY must be NOT NULL. */
-
- for (i = 0; i < index_def->n_fields; i++) {
- if (!strcmp(col_name, index_def->fields[i].field_name)) {
- return(prtype | DATA_NOT_NULL);
- }
- }
-
- return(prtype);
-}
-
-/*********************************************************************//**
-Create a temporary table for creating a primary key, using the definition
-of an existing table.
-@return table, or NULL on error */
+Rename an index in the dictionary that was created. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
+@return DB_SUCCESS if all OK */
UNIV_INTERN
-dict_table_t*
-row_merge_create_temporary_table(
-/*=============================*/
- const char* table_name, /*!< in: new table name */
- const merge_index_def_t*index_def, /*!< in: the index definition
- of the primary key */
- const dict_table_t* table, /*!< in: old table definition */
- trx_t* trx) /*!< in/out: transaction
- (sets error_state) */
+dberr_t
+row_merge_rename_index_to_add(
+/*==========================*/
+ trx_t* trx, /*!< in/out: transaction */
+ table_id_t table_id, /*!< in: table identifier */
+ index_id_t index_id) /*!< in: index identifier */
{
- ulint i;
- dict_table_t* new_table = NULL;
- ulint n_cols = dict_table_get_n_user_cols(table);
- ulint error;
- mem_heap_t* heap = mem_heap_create(1000);
- ulint num_col;
-
- ut_ad(table_name);
- ut_ad(index_def);
- ut_ad(table);
- ut_ad(mutex_own(&dict_sys->mutex));
-
- num_col = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)
- ? n_cols + 1
- : n_cols;
-
- new_table = dict_mem_table_create(
- table_name, 0, num_col, table->flags, table->flags2);
-
- for (i = 0; i < n_cols; i++) {
- const dict_col_t* col;
- const char* col_name;
+ dberr_t err = DB_SUCCESS;
+ pars_info_t* info = pars_info_create();
- col = dict_table_get_nth_col(table, i);
- col_name = dict_table_get_col_name(table, i);
+ /* We use the private SQL parser of Innobase to generate the
+ query graphs needed in renaming indexes. */
- dict_mem_table_add_col(new_table, heap, col_name, col->mtype,
- row_merge_col_prtype(col, col_name,
- index_def),
- col->len);
- }
+ static const char rename_index[] =
+ "PROCEDURE RENAME_INDEX_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n"
+ "WHERE TABLE_ID = :tableid AND ID = :indexid;\n"
+ "END;\n";
- /* Add the FTS doc_id hidden column */
- if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
- fts_add_doc_id_column(new_table);
- new_table->fts->doc_col = n_cols;
- }
+ ut_ad(trx);
+ ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
- error = row_create_table_for_mysql(new_table, trx);
- mem_heap_free(heap);
+ trx->op_info = "renaming index to add";
- if (error != DB_SUCCESS) {
- trx->error_state = static_cast<enum db_err>(error);
- new_table = NULL;
- } else {
- dict_table_t* temp_table;
+ pars_info_add_ull_literal(info, "tableid", table_id);
+ pars_info_add_ull_literal(info, "indexid", index_id);
- /* We need to bump up the table ref count and before we can
- use it we need to open the table. */
+ err = que_eval_sql(info, rename_index, FALSE, trx);
- temp_table = dict_table_open_on_name_no_stats(
- new_table->name, TRUE, DICT_ERR_IGNORE_NONE);
+ if (err != DB_SUCCESS) {
+ /* Even though we ensure that DDL transactions are WAIT
+ and DEADLOCK free, we could encounter other errors e.g.,
+ DB_TOO_MANY_CONCURRENT_TRXS. */
+ trx->error_state = DB_SUCCESS;
- ut_a(new_table == temp_table);
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: row_merge_rename_index_to_add "
+ "failed with error code: %u.\n", (unsigned) err);
}
- return(new_table);
+ trx->op_info = "";
+
+ return(err);
}
/*********************************************************************//**
-Rename the temporary indexes in the dictionary to permanent ones. The
-data dictionary must have been locked exclusively by the caller,
-because the transaction will not be committed.
+Rename an index in the dictionary that is to be dropped. The data
+dictionary must have been locked exclusively by the caller, because
+the transaction will not be committed.
@return DB_SUCCESS if all OK */
UNIV_INTERN
-ulint
-row_merge_rename_indexes(
-/*=====================*/
+dberr_t
+row_merge_rename_index_to_drop(
+/*===========================*/
trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table) /*!< in/out: table with new indexes */
+ table_id_t table_id, /*!< in: table identifier */
+ index_id_t index_id) /*!< in: index identifier */
{
- db_err err = DB_SUCCESS;
+ dberr_t err;
pars_info_t* info = pars_info_create();
+ ut_ad(!srv_read_only_mode);
+
/* We use the private SQL parser of Innobase to generate the
query graphs needed in renaming indexes. */
- static const char* sql =
- "PROCEDURE RENAME_INDEXES_PROC () IS\n"
+ static const char rename_index[] =
+ "PROCEDURE RENAME_INDEX_PROC () IS\n"
"BEGIN\n"
- "UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n"
- "WHERE TABLE_ID = :tableid AND SUBSTR(NAME,0,1)='"
- TEMP_INDEX_PREFIX_STR "';\n"
+ "UPDATE SYS_INDEXES SET NAME=CONCAT('"
+ TEMP_INDEX_PREFIX_STR "',NAME)\n"
+ "WHERE TABLE_ID = :tableid AND ID = :indexid;\n"
"END;\n";
- ut_ad(table);
ut_ad(trx);
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
- trx->op_info = "renaming indexes";
+ trx->op_info = "renaming index to drop";
- pars_info_add_ull_literal(info, "tableid", table->id);
+ pars_info_add_ull_literal(info, "tableid", table_id);
+ pars_info_add_ull_literal(info, "indexid", index_id);
- err = static_cast<db_err>(que_eval_sql(info, sql, FALSE, trx));
+ err = que_eval_sql(info, rename_index, FALSE, trx);
- DBUG_EXECUTE_IF(
- "ib_rename_indexes_too_many_concurrent_trxs",
- err = DB_TOO_MANY_CONCURRENT_TRXS;
- trx->error_state = static_cast<db_err>(err););
-
- if (err == DB_SUCCESS) {
- dict_index_t* index = dict_table_get_first_index(table);
- do {
- if (*index->name == TEMP_INDEX_PREFIX) {
- index->name++;
- }
- index = dict_table_get_next_index(index);
- } while (index);
- } else {
+ if (err != DB_SUCCESS) {
/* Even though we ensure that DDL transactions are WAIT
and DEADLOCK free, we could encounter other errors e.g.,
- DB_TOO_MANY_TRANSACTIONS. */
-
+ DB_TOO_MANY_CONCURRENT_TRXS. */
trx->error_state = DB_SUCCESS;
ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: row_merge_rename_indexes "
- "failed with error code: %lu.\n", (ulint) err);
+ fprintf(stderr,
+ " InnoDB: Error: row_merge_rename_index_to_drop "
+ "failed with error code: %u.\n", (unsigned) err);
}
trx->op_info = "";
@@ -2701,12 +3081,39 @@ row_merge_rename_indexes(
}
/*********************************************************************//**
+Provide a new pathname for a table that is being renamed if it belongs to
+a file-per-table tablespace. The caller is responsible for freeing the
+memory allocated for the return value.
+@return new pathname of tablespace file, or NULL if space = 0 */
+UNIV_INTERN
+char*
+row_make_new_pathname(
+/*==================*/
+ dict_table_t* table, /*!< in: table to be renamed */
+ const char* new_name) /*!< in: new name */
+{
+ char* new_path;
+ char* old_path;
+
+ ut_ad(table->space != TRX_SYS_SPACE);
+
+ old_path = fil_space_get_first_path(table->space);
+ ut_a(old_path);
+
+ new_path = os_file_make_new_pathname(old_path, new_name);
+
+ mem_free(old_path);
+
+ return(new_path);
+}
+
+/*********************************************************************//**
Rename the tables in the data dictionary. The data dictionary must
have been locked exclusively by the caller, because the transaction
will not be committed.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_merge_rename_tables(
/*====================*/
dict_table_t* old_table, /*!< in/out: old table, renamed to
@@ -2716,28 +3123,32 @@ row_merge_rename_tables(
const char* tmp_name, /*!< in: new name for old_table */
trx_t* trx) /*!< in: transaction handle */
{
- ulint err = DB_ERROR;
+ dberr_t err = DB_ERROR;
pars_info_t* info;
char old_name[MAX_FULL_NAME_LEN + 1];
+ ut_ad(!srv_read_only_mode);
ut_ad(old_table != new_table);
ut_ad(mutex_own(&dict_sys->mutex));
-
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
+ ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE);
/* store the old/current name to an automatic variable */
if (strlen(old_table->name) + 1 <= sizeof(old_name)) {
memcpy(old_name, old_table->name, strlen(old_table->name) + 1);
} else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: too long table name: '%s', "
- "max length is %d\n", old_table->name,
- MAX_FULL_NAME_LEN);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Too long table name: '%s', max length is %d",
+ old_table->name, MAX_FULL_NAME_LEN);
ut_error;
}
trx->op_info = "renaming tables";
+ DBUG_EXECUTE_IF(
+ "ib_rebuild_cannot_rename",
+ err = DB_ERROR; goto err_exit;);
+
/* We use the private SQL parser of Innobase to generate the query
graphs needed in updating the dictionary data in system tables. */
@@ -2756,21 +3167,124 @@ row_merge_rename_tables(
" WHERE NAME = :new_name;\n"
"END;\n", FALSE, trx);
- if (err != DB_SUCCESS) {
+ /* Update SYS_TABLESPACES and SYS_DATAFILES if the old
+ table is in a non-system tablespace where space > 0. */
+ if (err == DB_SUCCESS
+ && old_table->space != TRX_SYS_SPACE
+ && !old_table->ibd_file_missing) {
+ /* Make pathname to update SYS_DATAFILES. */
+ char* tmp_path = row_make_new_pathname(old_table, tmp_name);
+
+ info = pars_info_create();
+
+ pars_info_add_str_literal(info, "tmp_name", tmp_name);
+ pars_info_add_str_literal(info, "tmp_path", tmp_path);
+ pars_info_add_int4_literal(info, "old_space",
+ (lint) old_table->space);
+
+ err = que_eval_sql(info,
+ "PROCEDURE RENAME_OLD_SPACE () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLESPACES"
+ " SET NAME = :tmp_name\n"
+ " WHERE SPACE = :old_space;\n"
+ "UPDATE SYS_DATAFILES"
+ " SET PATH = :tmp_path\n"
+ " WHERE SPACE = :old_space;\n"
+ "END;\n", FALSE, trx);
+
+ mem_free(tmp_path);
+ }
+
+ /* Update SYS_TABLESPACES and SYS_DATAFILES if the new
+ table is in a non-system tablespace where space > 0. */
+ if (err == DB_SUCCESS && new_table->space != TRX_SYS_SPACE) {
+ /* Make pathname to update SYS_DATAFILES. */
+ char* old_path = row_make_new_pathname(new_table, old_name);
+
+ info = pars_info_create();
+
+ pars_info_add_str_literal(info, "old_name", old_name);
+ pars_info_add_str_literal(info, "old_path", old_path);
+ pars_info_add_int4_literal(info, "new_space",
+ (lint) new_table->space);
+
+ err = que_eval_sql(info,
+ "PROCEDURE RENAME_NEW_SPACE () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLESPACES"
+ " SET NAME = :old_name\n"
+ " WHERE SPACE = :new_space;\n"
+ "UPDATE SYS_DATAFILES"
+ " SET PATH = :old_path\n"
+ " WHERE SPACE = :new_space;\n"
+ "END;\n", FALSE, trx);
+
+ mem_free(old_path);
+ }
+ if (err != DB_SUCCESS) {
goto err_exit;
}
+ /* Generate the redo logs for file operations */
+ fil_mtr_rename_log(old_table->space, old_name,
+ new_table->space, new_table->name, tmp_name);
+
+ /* What if the redo logs are flushed to disk here? This is
+ tested with following crash point */
+ DBUG_EXECUTE_IF("bug14669848_precommit", log_buffer_flush_to_disk();
+ DBUG_SUICIDE(););
+
+ /* File operations cannot be rolled back. So, before proceeding
+ with file operations, commit the dictionary changes.*/
+ trx_commit_for_mysql(trx);
+
+ /* If server crashes here, the dictionary in InnoDB and MySQL
+ will differ. The .ibd files and the .frm files must be swapped
+ manually by the administrator. No loss of data. */
+ DBUG_EXECUTE_IF("bug14669848", DBUG_SUICIDE(););
+
+ /* Ensure that the redo logs are flushed to disk. The config
+ innodb_flush_log_at_trx_commit must not affect this. */
+ log_buffer_flush_to_disk();
+
/* The following calls will also rename the .ibd data files if
the tables are stored in a single-table tablespace */
- if (!dict_table_rename_in_cache(old_table, tmp_name, FALSE)
- || !dict_table_rename_in_cache(new_table, old_name, FALSE)) {
+ err = dict_table_rename_in_cache(old_table, tmp_name, FALSE);
- err = DB_ERROR;
- goto err_exit;
+ if (err == DB_SUCCESS) {
+
+ ut_ad(dict_table_is_discarded(old_table)
+ == dict_table_is_discarded(new_table));
+
+ err = dict_table_rename_in_cache(new_table, old_name, FALSE);
+
+ if (err != DB_SUCCESS) {
+
+ if (dict_table_rename_in_cache(
+ old_table, old_name, FALSE)
+ != DB_SUCCESS) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot undo the rename in cache "
+ "from %s to %s", old_name, tmp_name);
+ }
+
+ goto err_exit;
+ }
+
+ if (dict_table_is_discarded(new_table)) {
+
+ err = row_import_update_discarded_flag(
+ trx, new_table->id, true, true);
+ }
}
+ DBUG_EXECUTE_IF("ib_rebuild_cannot_load_fk",
+ err = DB_ERROR; goto err_exit;);
+
err = dict_load_foreigns(old_name, FALSE, TRUE);
if (err != DB_SUCCESS) {
@@ -2788,8 +3302,8 @@ err_exit:
/*********************************************************************//**
Create and execute a query graph for creating an index.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_merge_create_index_graph(
/*=========================*/
trx_t* trx, /*!< in: trx */
@@ -2799,7 +3313,7 @@ row_merge_create_index_graph(
ind_node_t* node; /*!< Index creation node */
mem_heap_t* heap; /*!< Memory heap */
que_thr_t* thr; /*!< Query thread */
- ulint err;
+ dberr_t err;
ut_ad(trx);
ut_ad(table);
@@ -2808,7 +3322,7 @@ row_merge_create_index_graph(
heap = mem_heap_create(512);
index->table = table;
- node = ind_create_graph_create(index, heap);
+ node = ind_create_graph_create(index, heap, false);
thr = pars_complete_graph_for_exec(node, trx, heap);
ut_a(thr == que_fork_start_command(
@@ -2832,14 +3346,16 @@ row_merge_create_index(
/*===================*/
trx_t* trx, /*!< in/out: trx (sets error_state) */
dict_table_t* table, /*!< in: the index is on this table */
- const merge_index_def_t*index_def)
+ const index_def_t* index_def)
/*!< in: the index definition */
{
dict_index_t* index;
- ulint err;
+ dberr_t err;
ulint n_fields = index_def->n_fields;
ulint i;
+ ut_ad(!srv_read_only_mode);
+
/* Create the index prototype, using the passed in def, this is not
a persistent operation. We pass 0 as the space id, and determine at
a lower level the space id where to store the table. */
@@ -2850,10 +3366,11 @@ row_merge_create_index(
ut_a(index);
for (i = 0; i < n_fields; i++) {
- merge_index_field_t* ifield = &index_def->fields[i];
+ index_field_t* ifield = &index_def->fields[i];
- dict_mem_index_add_field(index, ifield->field_name,
- ifield->prefix_len);
+ dict_mem_index_add_field(
+ index, dict_table_get_col_name(table, ifield->col_no),
+ ifield->prefix_len);
}
/* Add the index to SYS_INDEXES, using the index prototype. */
@@ -2861,15 +3378,14 @@ row_merge_create_index(
if (err == DB_SUCCESS) {
- index = row_merge_dict_table_get_index(
- table, index_def);
+ index = dict_table_get_index_on_name(table, index_def->name);
ut_a(index);
/* Note the id of the transaction that created this
index, we use it to restrict readers from accessing
this index, to ensure read consistency. */
- index->trx_id = trx->id;
+ ut_ad(index->trx_id == trx->id);
} else {
index = NULL;
}
@@ -2886,35 +3402,46 @@ row_merge_is_index_usable(
const trx_t* trx, /*!< in: transaction */
const dict_index_t* index) /*!< in: index to check */
{
+ if (!dict_index_is_clust(index)
+ && dict_index_is_online_ddl(index)) {
+ /* Indexes that are being created are not useable. */
+ return(FALSE);
+ }
+
return(!dict_index_is_corrupted(index)
- && (!trx->read_view
- || read_view_sees_trx_id(trx->read_view, index->trx_id)));
+ && (dict_table_is_temporary(index->table)
+ || !trx->read_view
+ || read_view_sees_trx_id(trx->read_view, index->trx_id)));
}
/*********************************************************************//**
-Drop the old table.
+Drop a table. The caller must have ensured that the background stats
+thread is not processing the table. This can be done by calling
+dict_stats_wait_bg_to_stop_using_tables() after locking the dictionary and
+before calling this function.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
row_merge_drop_table(
/*=================*/
trx_t* trx, /*!< in: transaction */
dict_table_t* table) /*!< in: table to drop */
{
+ ut_ad(!srv_read_only_mode);
+
/* There must be no open transactions on the table. */
ut_a(table->n_ref_count == 0);
- return(row_drop_table_for_mysql(table->name, trx, FALSE));
+ return(row_drop_table_for_mysql(table->name, trx, false, false));
}
-
/*********************************************************************//**
Build indexes on a table by reading a clustered index,
creating a temporary file containing index entries, merge sorting
these index entries and inserting sorted index entries to indexes.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
row_merge_build_indexes(
/*====================*/
trx_t* trx, /*!< in: transaction */
@@ -2923,45 +3450,62 @@ row_merge_build_indexes(
dict_table_t* new_table, /*!< in: table where indexes are
created; identical to old_table
unless creating a PRIMARY KEY */
+ bool online, /*!< in: true if creating indexes
+ online */
dict_index_t** indexes, /*!< in: indexes to be created */
+ const ulint* key_numbers, /*!< in: MySQL key numbers */
ulint n_indexes, /*!< in: size of indexes[] */
- struct TABLE* table) /*!< in/out: MySQL table, for
+ struct TABLE* table, /*!< in/out: MySQL table, for
reporting erroneous key value
if applicable */
+ const dtuple_t* add_cols, /*!< in: default values of
+ added columns, or NULL */
+ const ulint* col_map, /*!< in: mapping of old column
+ numbers to new ones, or NULL
+ if old_table == new_table */
+ ulint add_autoinc, /*!< in: number of added
+ AUTO_INCREMENT column, or
+ ULINT_UNDEFINED if none is added */
+ ib_sequence_t& sequence) /*!< in: autoinc instance if
+ add_autoinc != ULINT_UNDEFINED */
{
merge_file_t* merge_files;
row_merge_block_t* block;
ulint block_size;
ulint i;
ulint j;
- ulint error;
+ dberr_t error;
int tmpfd;
dict_index_t* fts_sort_idx = NULL;
fts_psort_t* psort_info = NULL;
fts_psort_t* merge_info = NULL;
ib_int64_t sig_count = 0;
- ut_ad(trx);
- ut_ad(old_table);
- ut_ad(new_table);
- ut_ad(indexes);
- ut_ad(n_indexes);
-
- trx_start_if_not_started_xa(trx);
+ ut_ad(!srv_read_only_mode);
+ ut_ad((old_table == new_table) == !col_map);
+ ut_ad(!add_cols || col_map);
/* Allocate memory for merge file data structure and initialize
fields */
- merge_files = static_cast<merge_file_t*>(
- mem_alloc(n_indexes * sizeof *merge_files));
-
block_size = 3 * srv_sort_buf_size;
block = static_cast<row_merge_block_t*>(
os_mem_alloc_large(&block_size));
- for (i = 0; i < n_indexes; i++) {
+ if (block == NULL) {
+ return(DB_OUT_OF_MEMORY);
+ }
+
+ trx_start_if_not_started_xa(trx);
- row_merge_file_create(&merge_files[i]);
+ merge_files = static_cast<merge_file_t*>(
+ mem_alloc(n_indexes * sizeof *merge_files));
+
+ for (i = 0; i < n_indexes; i++) {
+ if (row_merge_file_create(&merge_files[i]) < 0) {
+ error = DB_OUT_OF_MEMORY;
+ goto func_exit;
+ }
if (indexes[i]->type & DICT_FTS) {
ibool opt_doc_id_size = FALSE;
@@ -2971,17 +3515,28 @@ row_merge_build_indexes(
we need to build a "fts sort index" indexing
on above three 'fields' */
fts_sort_idx = row_merge_create_fts_sort_index(
- indexes[i], old_table,
- &opt_doc_id_size);
-
- row_fts_psort_info_init(trx, table, new_table,
- fts_sort_idx, opt_doc_id_size,
- &psort_info, &merge_info);
+ indexes[i], old_table, &opt_doc_id_size);
+
+ row_merge_dup_t* dup = static_cast<row_merge_dup_t*>(
+ ut_malloc(sizeof *dup));
+ dup->index = fts_sort_idx;
+ dup->table = table;
+ dup->col_map = col_map;
+ dup->n_dup = 0;
+
+ row_fts_psort_info_init(
+ trx, dup, new_table, opt_doc_id_size,
+ &psort_info, &merge_info);
}
}
tmpfd = row_merge_file_create_low();
+ if (tmpfd < 0) {
+ error = DB_OUT_OF_MEMORY;
+ goto func_exit;
+ }
+
/* Reset the MySQL row buffer that is used when reporting
duplicate keys. */
innobase_rec_reset(table);
@@ -2990,31 +3545,61 @@ row_merge_build_indexes(
secondary index entries for merge sort */
error = row_merge_read_clustered_index(
- trx, table, old_table, new_table, indexes,
- fts_sort_idx, psort_info, merge_files, n_indexes, block);
+ trx, table, old_table, new_table, online, indexes,
+ fts_sort_idx, psort_info, merge_files, key_numbers,
+ n_indexes, add_cols, col_map,
+ add_autoinc, sequence, block);
if (error != DB_SUCCESS) {
goto func_exit;
}
+ DEBUG_SYNC_C("row_merge_after_scan");
+
/* Now we have files containing index entries ready for
sorting and inserting. */
for (i = 0; i < n_indexes; i++) {
- dict_index_t* sort_idx;
-
- sort_idx = (indexes[i]->type & DICT_FTS)
- ? fts_sort_idx
- : indexes[i];
+ dict_index_t* sort_idx = indexes[i];
if (indexes[i]->type & DICT_FTS) {
os_event_t fts_parallel_merge_event;
+ bool all_exit = false;
+ ulint trial_count = 0;
+
+ sort_idx = fts_sort_idx;
+
+ /* Now all children should complete, wait
+ a bit until they all finish using event */
+ while (!all_exit && trial_count < 10000) {
+ all_exit = true;
+
+ for (j = 0; j < fts_sort_pll_degree;
+ j++) {
+ if (psort_info[j].child_status
+ != FTS_CHILD_EXITING) {
+ all_exit = false;
+ os_thread_sleep(1000);
+ break;
+ }
+ }
+ trial_count++;
+ }
+
+ if (!all_exit) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Not all child sort threads exited"
+ " when creating FTS index '%s'",
+ indexes[i]->name);
+ }
fts_parallel_merge_event
- = merge_info[0].psort_common->sort_event;
+ = merge_info[0].psort_common->merge_event;
if (FTS_PLL_MERGE) {
+ trial_count = 0;
+ all_exit = false;
os_event_reset(fts_parallel_merge_event);
row_fts_start_parallel_merge(merge_info);
wait_again:
@@ -3024,33 +3609,64 @@ wait_again:
for (j = 0; j < FTS_NUM_AUX_INDEX; j++) {
if (merge_info[j].child_status
- != FTS_CHILD_COMPLETE) {
+ != FTS_CHILD_COMPLETE
+ && merge_info[j].child_status
+ != FTS_CHILD_EXITING) {
sig_count = os_event_reset(
fts_parallel_merge_event);
goto wait_again;
}
}
+
+ /* Now all children should complete, wait
+ a bit until they all finish using event */
+ while (!all_exit && trial_count < 10000) {
+ all_exit = true;
+
+ for (j = 0; j < FTS_NUM_AUX_INDEX;
+ j++) {
+ if (merge_info[j].child_status
+ != FTS_CHILD_EXITING) {
+ all_exit = false;
+ os_thread_sleep(1000);
+ break;
+ }
+ }
+ trial_count++;
+ }
+
+ if (!all_exit) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Not all child merge threads"
+ " exited when creating FTS"
+ " index '%s'",
+ indexes[i]->name);
+ }
} else {
+ /* This cannot report duplicates; an
+ assertion would fail in that case. */
error = row_fts_merge_insert(
sort_idx, new_table,
psort_info, 0);
}
+#ifdef FTS_INTERNAL_DIAG_PRINT
+ DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n");
+#endif
} else {
- error = row_merge_sort(trx, sort_idx, &merge_files[i],
- block, &tmpfd, table);
+ row_merge_dup_t dup = {
+ sort_idx, table, col_map, 0};
+
+ error = row_merge_sort(
+ trx, &dup, &merge_files[i],
+ block, &tmpfd);
if (error == DB_SUCCESS) {
error = row_merge_insert_index_tuples(
- trx, sort_idx, new_table,
- dict_table_zip_size(old_table),
+ trx->id, sort_idx, old_table,
merge_files[i].fd, block);
}
-
-#ifdef FTS_INTERNAL_DIAG_PRINT
- DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n");
-#endif
}
/* Close the temporary file to free up space. */
@@ -3058,10 +3674,20 @@ wait_again:
if (indexes[i]->type & DICT_FTS) {
row_fts_psort_info_destroy(psort_info, merge_info);
+ } else if (error != DB_SUCCESS || !online) {
+ /* Do not apply any online log. */
+ } else if (old_table != new_table) {
+ ut_ad(!sort_idx->online_log);
+ ut_ad(sort_idx->online_status
+ == ONLINE_INDEX_COMPLETE);
+ } else {
+ DEBUG_SYNC_C("row_log_apply_before");
+ error = row_log_apply(trx, sort_idx, table);
+ DEBUG_SYNC_C("row_log_apply_after");
}
if (error != DB_SUCCESS) {
- trx->error_key_num = i;
+ trx->error_key_num = key_numbers[i];
goto func_exit;
}
@@ -3082,7 +3708,7 @@ func_exit:
DBUG_EXECUTE_IF(
"ib_build_indexes_too_many_concurrent_trxs",
error = DB_TOO_MANY_CONCURRENT_TRXS;
- trx->error_state = static_cast<db_err>(error););
+ trx->error_state = error;);
row_merge_file_destroy_low(tmpfd);
@@ -3097,5 +3723,45 @@ func_exit:
mem_free(merge_files);
os_mem_free_large(block, block_size);
+ DICT_TF2_FLAG_UNSET(new_table, DICT_TF2_FTS_ADD_DOC_ID);
+
+ if (online && old_table == new_table && error != DB_SUCCESS) {
+ /* On error, flag all online secondary index creation
+ as aborted. */
+ for (i = 0; i < n_indexes; i++) {
+ ut_ad(!(indexes[i]->type & DICT_FTS));
+ ut_ad(*indexes[i]->name == TEMP_INDEX_PREFIX);
+ ut_ad(!dict_index_is_clust(indexes[i]));
+
+ /* Completed indexes should be dropped as
+ well, and indexes whose creation was aborted
+ should be dropped from the persistent
+ storage. However, at this point we can only
+ set some flags in the not-yet-published
+ indexes. These indexes will be dropped later
+ in row_merge_drop_indexes(), called by
+ rollback_inplace_alter_table(). */
+
+ switch (dict_index_get_online_status(indexes[i])) {
+ case ONLINE_INDEX_COMPLETE:
+ break;
+ case ONLINE_INDEX_CREATION:
+ rw_lock_x_lock(
+ dict_index_get_lock(indexes[i]));
+ row_log_abort_sec(indexes[i]);
+ indexes[i]->type |= DICT_CORRUPT;
+ rw_lock_x_unlock(
+ dict_index_get_lock(indexes[i]));
+ new_table->drop_aborted = TRUE;
+ /* fall through */
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ case ONLINE_INDEX_ABORTED:
+ MONITOR_MUTEX_INC(
+ &dict_sys->mutex,
+ MONITOR_BACKGROUND_DROP_INDEX);
+ }
+ }
+ }
+
return(error);
}
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index f1811a664c2..f748bb4f60f 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -30,6 +30,9 @@ Created 9/17/2000 Heikki Tuuri
#include "row0mysql.ic"
#endif
+#include <debug_sync.h>
+#include <my_dbug.h>
+
#include "row0ins.h"
#include "row0merge.h"
#include "row0sel.h"
@@ -42,6 +45,7 @@ Created 9/17/2000 Heikki Tuuri
#include "dict0load.h"
#include "dict0boot.h"
#include "dict0stats.h"
+#include "dict0stats_bg.h"
#include "trx0roll.h"
#include "trx0purge.h"
#include "trx0rec.h"
@@ -54,16 +58,16 @@ Created 9/17/2000 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "fts0fts.h"
#include "fts0types.h"
-#include "srv0mon.h"
+#include "srv0start.h"
+#include "row0import.h"
+#include "m_string.h"
+#include "my_sys.h"
/** Provide optional 4.x backwards compatibility for 5.0 and above */
UNIV_INTERN ibool row_rollback_on_timeout = FALSE;
/** Chain node of the list of tables to drop in the background. */
-typedef struct row_mysql_drop_struct row_mysql_drop_t;
-
-/** Chain node of the list of tables to drop in the background. */
-struct row_mysql_drop_struct{
+struct row_mysql_drop_t{
char* table_name; /*!< table name */
UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list;
/*!< list chain node */
@@ -82,7 +86,7 @@ more. Protected by row_drop_list_mutex. */
static UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
/** Mutex protecting the background table drop list. */
-static mutex_t row_drop_list_mutex;
+static ib_mutex_t row_drop_list_mutex;
/** Flag: has row_mysql_drop_list been initialized? */
static ibool row_mysql_drop_list_inited = FALSE;
@@ -570,21 +574,21 @@ next_column:
/****************************************************************//**
Handles user errors and lock waits detected by the database engine.
-@return TRUE if it was a lock wait and we should continue running the
+@return true if it was a lock wait and we should continue running the
query thread and in that case the thr is ALREADY in the running state. */
UNIV_INTERN
-ibool
+bool
row_mysql_handle_errors(
/*====================*/
- ulint* new_err,/*!< out: possible new error encountered in
+ dberr_t* new_err,/*!< out: possible new error encountered in
lock wait, or if no new error, the value
of trx->error_state at the entry of this
function */
trx_t* trx, /*!< in: transaction */
- que_thr_t* thr, /*!< in: query thread */
- trx_savept_t* savept) /*!< in: savepoint or NULL */
+ que_thr_t* thr, /*!< in: query thread, or NULL */
+ trx_savept_t* savept) /*!< in: savepoint, or NULL */
{
- ulint err;
+ dberr_t err;
handle_new_error:
err = trx->error_state;
@@ -612,6 +616,7 @@ handle_new_error:
case DB_READ_ONLY:
case DB_FTS_INVALID_DOCID:
case DB_INTERRUPTED:
+ case DB_DICT_CHANGED:
if (savept) {
/* Roll back the latest, possibly incomplete
insertion or update */
@@ -631,7 +636,7 @@ handle_new_error:
*new_err = err;
- return(TRUE);
+ return(true);
case DB_DEADLOCK:
case DB_LOCK_TABLE_FULL:
@@ -648,6 +653,7 @@ handle_new_error:
" a new data file to\n"
"InnoDB: my.cnf and restart the database.\n", stderr);
+ ut_ad(0);
exit(1);
case DB_CORRUPTION:
@@ -686,7 +692,7 @@ handle_new_error:
trx->error_state = DB_SUCCESS;
- return(FALSE);
+ return(false);
}
/********************************************************************//**
@@ -774,7 +780,7 @@ row_create_prebuilt(
prebuilt->clust_ref = ref;
- prebuilt->autoinc_error = 0;
+ prebuilt->autoinc_error = DB_SUCCESS;
prebuilt->autoinc_offset = 0;
/* Default to 1, we will set the actual value later in
@@ -883,7 +889,7 @@ row_prebuilt_free(
mem_free(base);
}
- dict_table_close(prebuilt->table, dict_locked);
+ dict_table_close(prebuilt->table, dict_locked, TRUE);
mem_heap_free(prebuilt->heap);
}
@@ -950,44 +956,62 @@ row_get_prebuilt_insert_row(
row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
handle */
{
- ins_node_t* node;
- dtuple_t* row;
- dict_table_t* table = prebuilt->table;
+ dict_table_t* table = prebuilt->table;
ut_ad(prebuilt && table && prebuilt->trx);
- if (prebuilt->ins_node == NULL) {
-
- /* Not called before for this handle: create an insert node
- and query graph to the prebuilt struct */
+ if (prebuilt->ins_node != 0) {
- node = ins_node_create(INS_DIRECT, table, prebuilt->heap);
+ /* Check if indexes have been dropped or added and we
+ may need to rebuild the row insert template. */
- prebuilt->ins_node = node;
+ if (prebuilt->trx_id == table->def_trx_id
+ && UT_LIST_GET_LEN(prebuilt->ins_node->entry_list)
+ == UT_LIST_GET_LEN(table->indexes)) {
- if (prebuilt->ins_upd_rec_buff == NULL) {
- prebuilt->ins_upd_rec_buff = static_cast<byte*>(
- mem_heap_alloc(
- prebuilt->heap,
- prebuilt->mysql_row_len));
+ return(prebuilt->ins_node->row);
}
- row = dtuple_create(prebuilt->heap,
- dict_table_get_n_cols(table));
+ ut_ad(prebuilt->trx_id < table->def_trx_id);
- dict_table_copy_types(row, table);
+ que_graph_free_recursive(prebuilt->ins_graph);
- ins_node_set_new_row(node, row);
+ prebuilt->ins_graph = 0;
+ }
- prebuilt->ins_graph = static_cast<que_fork_t*>(
- que_node_get_parent(
- pars_complete_graph_for_exec(
- node,
- prebuilt->trx, prebuilt->heap)));
+ /* Create an insert node and query graph to the prebuilt struct */
- prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
+ ins_node_t* node;
+
+ node = ins_node_create(INS_DIRECT, table, prebuilt->heap);
+
+ prebuilt->ins_node = node;
+
+ if (prebuilt->ins_upd_rec_buff == 0) {
+ prebuilt->ins_upd_rec_buff = static_cast<byte*>(
+ mem_heap_alloc(
+ prebuilt->heap,
+ prebuilt->mysql_row_len));
}
+ dtuple_t* row;
+
+ row = dtuple_create(prebuilt->heap, dict_table_get_n_cols(table));
+
+ dict_table_copy_types(row, table);
+
+ ins_node_set_new_row(node, row);
+
+ prebuilt->ins_graph = static_cast<que_fork_t*>(
+ que_node_get_parent(
+ pars_complete_graph_for_exec(
+ node,
+ prebuilt->trx, prebuilt->heap)));
+
+ prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
+
+ prebuilt->trx_id = table->def_trx_id;
+
return(prebuilt->ins_node->row);
}
@@ -1000,23 +1024,41 @@ row_update_statistics_if_needed(
/*============================*/
dict_table_t* table) /*!< in: table */
{
- ulint counter;
+ ib_uint64_t counter;
+ ib_uint64_t n_rows;
+
+ if (!table->stat_initialized) {
+ DBUG_EXECUTE_IF(
+ "test_upd_stats_if_needed_not_inited",
+ fprintf(stderr, "test_upd_stats_if_needed_not_inited "
+ "was executed\n");
+ );
+ return;
+ }
- counter = table->stat_modified_counter;
+ counter = table->stat_modified_counter++;
+ n_rows = dict_table_get_n_rows(table);
- table->stat_modified_counter = counter + 1;
+ if (dict_stats_is_persistent_enabled(table)) {
+ if (counter > n_rows / 10 /* 10% */
+ && dict_stats_auto_recalc_is_enabled(table)) {
+
+ dict_stats_recalc_pool_add(table);
+ table->stat_modified_counter = 0;
+ }
+ return;
+ }
/* Calculate new statistics if 1 / 16 of table has been modified
- since the last time a statistics batch was run, or if
- stat_modified_counter > 2 000 000 000 (to avoid wrap-around).
+ since the last time a statistics batch was run.
We calculate statistics at most every 16th round, since we may have
a counter table which is very small and updated very often. */
- if (counter > 2000000000
- || ((ib_int64_t) counter > 16 + table->stat_n_rows / 16)) {
+ if (counter > 16 + n_rows / 16 /* 6.25% */) {
ut_ad(!mutex_own(&dict_sys->mutex));
- dict_stats_update(table, DICT_STATS_FETCH, FALSE);
+ /* this will reset table->stat_modified_counter to 0 */
+ dict_stats_update(table, DICT_STATS_RECALC_TRANSIENT);
}
}
@@ -1028,7 +1070,7 @@ It is not compatible with another AUTO_INC or exclusive lock on the
table.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_lock_table_autoinc_for_mysql(
/*=============================*/
row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in the MySQL
@@ -1038,7 +1080,7 @@ row_lock_table_autoinc_for_mysql(
ins_node_t* node = prebuilt->ins_node;
const dict_table_t* table = prebuilt->table;
que_thr_t* thr;
- ulint err;
+ dberr_t err;
ibool was_lock_wait;
ut_ad(trx);
@@ -1053,10 +1095,8 @@ row_lock_table_autoinc_for_mysql(
trx->op_info = "setting auto-inc lock";
- if (node == NULL) {
- row_get_prebuilt_insert_row(prebuilt);
- node = prebuilt->ins_node;
- }
+ row_get_prebuilt_insert_row(prebuilt);
+ node = prebuilt->ins_node;
/* We use the insert query graph as the dummy graph needed
in the lock module call */
@@ -1076,7 +1116,7 @@ run_again:
err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr);
- trx->error_state = static_cast<enum db_err>(err);
+ trx->error_state = err;
if (err != DB_SUCCESS) {
que_thr_stop_for_mysql(thr);
@@ -1089,21 +1129,21 @@ run_again:
trx->op_info = "";
- return((int) err);
+ return(err);
}
que_thr_stop_for_mysql_no_error(thr, trx);
trx->op_info = "";
- return((int) err);
+ return(err);
}
/*********************************************************************//**
Sets a table lock on the table mentioned in prebuilt.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_lock_table_for_mysql(
/*=====================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL
@@ -1117,7 +1157,7 @@ row_lock_table_for_mysql(
{
trx_t* trx = prebuilt->trx;
que_thr_t* thr;
- ulint err;
+ dberr_t err;
ibool was_lock_wait;
ut_ad(trx);
@@ -1157,7 +1197,7 @@ run_again:
thr);
}
- trx->error_state = static_cast<enum db_err>(err);
+ trx->error_state = err;
if (err != DB_SUCCESS) {
que_thr_stop_for_mysql(thr);
@@ -1170,21 +1210,21 @@ run_again:
trx->op_info = "";
- return((int) err);
+ return(err);
}
que_thr_stop_for_mysql_no_error(thr, trx);
trx->op_info = "";
- return((int) err);
+ return(err);
}
/*********************************************************************//**
Does an insert for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_insert_for_mysql(
/*=================*/
byte* mysql_rec, /*!< in: row in the MySQL format */
@@ -1193,7 +1233,7 @@ row_insert_for_mysql(
{
trx_savept_t savept;
que_thr_t* thr;
- ulint err;
+ dberr_t err;
ibool was_lock_wait;
trx_t* trx = prebuilt->trx;
ins_node_t* node = prebuilt->ins_node;
@@ -1201,24 +1241,23 @@ row_insert_for_mysql(
ut_ad(trx);
- if (table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you"
- " used DISCARD TABLESPACE?\n"
- "InnoDB: Look from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
+ if (dict_table_is_discarded(prebuilt->table)) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "The table %s doesn't have a corresponding "
+ "tablespace, it was discarded.",
prebuilt->table->name);
- return(DB_ERROR);
- }
- if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
+ return(DB_TABLESPACE_DELETED);
+
+ } else if (prebuilt->table->ibd_file_missing) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ ".ibd file is missing for table %s",
+ prebuilt->table->name);
+
+ return(DB_TABLESPACE_NOT_FOUND);
+
+ } else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
fprintf(stderr,
"InnoDB: Error: trying to free a corrupt\n"
"InnoDB: table handle. Magic n %lu, table name ",
@@ -1229,9 +1268,7 @@ row_insert_for_mysql(
mem_analyze_corruption(prebuilt);
ut_error;
- }
-
- if (UNIV_UNLIKELY(srv_created_new_raw || srv_force_recovery)) {
+ } else if (srv_created_new_raw || srv_force_recovery) {
fputs("InnoDB: A new raw disk partition was initialized or\n"
"InnoDB: innodb_force_recovery is on: we do not allow\n"
"InnoDB: database modifications by the user. Shut down\n"
@@ -1249,10 +1286,8 @@ row_insert_for_mysql(
trx_start_if_not_started_xa(trx);
- if (node == NULL) {
- row_get_prebuilt_insert_row(prebuilt);
- node = prebuilt->ins_node;
- }
+ row_get_prebuilt_insert_row(prebuilt);
+ node = prebuilt->ins_node;
row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec);
@@ -1290,12 +1325,14 @@ error_exit:
thr->lock_state = QUE_THR_LOCK_NOLOCK;
if (was_lock_wait) {
+ ut_ad(node->state == INS_NODE_INSERT_ENTRIES
+ || node->state == INS_NODE_ALLOC_ROW_ID);
goto run_again;
}
trx->op_info = "";
- return((int) err);
+ return(err);
}
if (dict_table_has_fts_index(table)) {
@@ -1353,19 +1390,18 @@ error_exit:
que_thr_stop_for_mysql_no_error(thr, trx);
- table->stat_n_rows++;
+ srv_stats.n_rows_inserted.add((size_t)trx->id, 1);
- srv_n_rows_inserted++;
-
- if (prebuilt->table->stat_n_rows == 0) {
- /* Avoid wrap-over */
- table->stat_n_rows--;
- }
+ /* Not protected by dict_table_stats_lock() for performance
+ reasons, we would rather get garbage in stat_n_rows (which is
+ just an estimate anyway) than protecting the following code
+ with a latch. */
+ dict_table_n_rows_inc(table);
row_update_statistics_if_needed(table);
trx->op_info = "";
- return((int) err);
+ return(err);
}
/*********************************************************************//**
@@ -1490,7 +1526,7 @@ row_fts_do_update(
Handles FTS matters for an update or a delete.
NOTE: should not be called if the table does not have an FTS index. .*/
static
-ulint
+dberr_t
row_fts_update_or_delete(
/*=====================*/
row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
@@ -1530,16 +1566,18 @@ void
init_fts_doc_id_for_ref(
/*====================*/
dict_table_t* table, /*!< in: table */
- ulint depth) /*!< in: recusive call depth */
+ ulint* depth) /*!< in: recusive call depth */
{
dict_foreign_t* foreign;
foreign = UT_LIST_GET_FIRST(table->referenced_list);
- depth++;
+ table->fk_max_recusive_level = 0;
+
+ (*depth)++;
/* Limit on tables involved in cascading delete/update */
- if (depth > FK_MAX_CASCADE_DEL) {
+ if (*depth > FK_MAX_CASCADE_DEL) {
return;
}
@@ -1563,7 +1601,7 @@ init_fts_doc_id_for_ref(
Does an update or delete of a row for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_update_for_mysql(
/*=================*/
byte* mysql_rec, /*!< in: the row to be updated, in
@@ -1572,7 +1610,7 @@ row_update_for_mysql(
handle */
{
trx_savept_t savept;
- ulint err;
+ dberr_t err;
que_thr_t* thr;
ibool was_lock_wait;
dict_index_t* clust_index;
@@ -1580,6 +1618,7 @@ row_update_for_mysql(
upd_node_t* node;
dict_table_t* table = prebuilt->table;
trx_t* trx = prebuilt->trx;
+ ulint fk_depth = 0;
ut_ad(prebuilt && trx);
UT_NOT_USED(mysql_rec);
@@ -1626,14 +1665,26 @@ row_update_for_mysql(
return(DB_ERROR);
}
+ DEBUG_SYNC_C("innodb_row_update_for_mysql_begin");
+
trx->op_info = "updating or deleting";
row_mysql_delay_if_needed();
- init_fts_doc_id_for_ref(table, 0);
-
trx_start_if_not_started_xa(trx);
+ if (dict_table_is_referenced_by_foreign_key(table)) {
+ /* Share lock the data dictionary to prevent any
+ table dictionary (for foreign constraint) change.
+ This is similar to row_ins_check_foreign_constraint
+ check protect by the dictionary lock as well.
+ In the future, this can be removed once the Foreign
+ key MDL is implemented */
+ row_mysql_freeze_data_dictionary(trx);
+ init_fts_doc_id_for_ref(table, &fk_depth);
+ row_mysql_unfreeze_data_dictionary(trx);
+ }
+
node = prebuilt->upd_node;
clust_index = dict_table_get_first_index(table);
@@ -1683,10 +1734,13 @@ run_again:
trx->error_state = DB_SUCCESS;
trx->op_info = "";
- return((int) err);
+ return(err);
}
thr->lock_state= QUE_THR_LOCK_ROW;
+
+ DEBUG_SYNC(trx->mysql_thd, "row_update_for_mysql_error");
+
was_lock_wait = row_mysql_handle_errors(&err, trx, thr,
&savept);
thr->lock_state= QUE_THR_LOCK_NOLOCK;
@@ -1697,7 +1751,7 @@ run_again:
trx->op_info = "";
- return((int) err);
+ return(err);
}
que_thr_stop_for_mysql_no_error(thr, trx);
@@ -1707,18 +1761,20 @@ run_again:
err = row_fts_update_or_delete(prebuilt);
if (err != DB_SUCCESS) {
trx->op_info = "";
- return((int) err);
+ return(err);
}
}
if (node->is_delete) {
- if (prebuilt->table->stat_n_rows > 0) {
- prebuilt->table->stat_n_rows--;
- }
+ /* Not protected by dict_table_stats_lock() for performance
+ reasons, we would rather get garbage in stat_n_rows (which is
+ just an estimate anyway) than protecting the following code
+ with a latch. */
+ dict_table_n_rows_dec(prebuilt->table);
- srv_n_rows_deleted++;
+ srv_stats.n_rows_deleted.add((size_t)trx->id, 1);
} else {
- srv_n_rows_updated++;
+ srv_stats.n_rows_updated.add((size_t)trx->id, 1);
}
/* We update table statistics only if it is a DELETE or UPDATE
@@ -1730,7 +1786,7 @@ run_again:
trx->op_info = "";
- return((int) err);
+ return(err);
}
/*********************************************************************//**
@@ -1744,7 +1800,7 @@ prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that
releases the latest clustered index record lock we set.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+void
row_unlock_for_mysql(
/*=================*/
row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL
@@ -1770,8 +1826,7 @@ row_unlock_for_mysql(
"InnoDB: innodb_locks_unsafe_for_binlog is FALSE and\n"
"InnoDB: this session is not using"
" READ COMMITTED isolation level.\n");
-
- return(DB_SUCCESS);
+ return;
}
trx->op_info = "unlock_row";
@@ -1863,15 +1918,13 @@ no_unlock:
}
trx->op_info = "";
-
- return(DB_SUCCESS);
}
/**********************************************************************//**
Does a cascaded delete or set null in a foreign key operation.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_update_cascade_for_mysql(
/*=========================*/
que_thr_t* thr, /*!< in: query thread */
@@ -1879,7 +1932,7 @@ row_update_cascade_for_mysql(
or set null operation */
dict_table_t* table) /*!< in: table where we do the operation */
{
- ulint err;
+ dberr_t err;
trx_t* trx;
trx = thr_get_trx(thr);
@@ -1890,12 +1943,14 @@ row_update_cascade_for_mysql(
thr->fk_cascade_depth++;
if (thr->fk_cascade_depth > FK_MAX_CASCADE_DEL) {
- return (DB_FOREIGN_EXCEED_MAX_CASCADE);
+ return(DB_FOREIGN_EXCEED_MAX_CASCADE);
}
run_again:
thr->run_node = node;
thr->prev_node = node;
+ DEBUG_SYNC_C("foreign_constraint_update_cascade");
+
row_upd_step(thr);
/* The recursive call for cascading update/delete happens
@@ -1937,13 +1992,15 @@ run_again:
}
if (node->is_delete) {
- if (table->stat_n_rows > 0) {
- table->stat_n_rows--;
- }
+ /* Not protected by dict_table_stats_lock() for performance
+ reasons, we would rather get garbage in stat_n_rows (which is
+ just an estimate anyway) than protecting the following code
+ with a latch. */
+ dict_table_n_rows_dec(table);
- srv_n_rows_deleted++;
+ srv_stats.n_rows_deleted.add((size_t)trx->id, 1);
} else {
- srv_n_rows_updated++;
+ srv_stats.n_rows_updated.add((size_t)trx->id, 1);
}
row_update_statistics_if_needed(table);
@@ -1981,7 +2038,7 @@ row_mysql_freeze_data_dictionary_func(
{
ut_a(trx->dict_operation_lock_mode == 0);
- rw_lock_s_lock_func(&dict_operation_lock, 0, file, line);
+ rw_lock_s_lock_inline(&dict_operation_lock, 0, file, line);
trx->dict_operation_lock_mode = RW_S_LATCH;
}
@@ -1994,6 +2051,8 @@ row_mysql_unfreeze_data_dictionary(
/*===============================*/
trx_t* trx) /*!< in/out: transaction */
{
+ ut_ad(lock_trx_has_sys_table_locks(trx) == NULL);
+
ut_a(trx->dict_operation_lock_mode == RW_S_LATCH);
rw_lock_s_unlock(&dict_operation_lock);
@@ -2018,7 +2077,7 @@ row_mysql_lock_data_dictionary_func(
/* Serialize data dictionary operations with dictionary mutex:
no deadlocks or lock waits can occur then in these operations */
- rw_lock_x_lock_func(&dict_operation_lock, 0, file, line);
+ rw_lock_x_lock_inline(&dict_operation_lock, 0, file, line);
trx->dict_operation_lock_mode = RW_X_LATCH;
mutex_enter(&(dict_sys->mutex));
@@ -2032,6 +2091,8 @@ row_mysql_unlock_data_dictionary(
/*=============================*/
trx_t* trx) /*!< in/out: transaction */
{
+ ut_ad(lock_trx_has_sys_table_locks(trx) == NULL);
+
ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
/* Serialize data dictionary operations with dictionary mutex:
@@ -2052,19 +2113,21 @@ InnoDB will try to invoke mem_validate(). On failure the transaction will
be rolled back and the 'table' object will be freed.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_create_table_for_mysql(
/*=======================*/
dict_table_t* table, /*!< in, own: table definition
- (will be freed) */
- trx_t* trx) /*!< in: transaction handle */
+ (will be freed, or on DB_SUCCESS
+ added to the data dictionary cache) */
+ trx_t* trx, /*!< in/out: transaction */
+ bool commit) /*!< in: if true, commit the transaction */
{
tab_node_t* node;
mem_heap_t* heap;
que_thr_t* thr;
const char* table_name;
ulint table_name_len;
- ulint err;
+ dberr_t err;
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@@ -2072,6 +2135,11 @@ row_create_table_for_mysql(
ut_ad(mutex_own(&(dict_sys->mutex)));
ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_at_start_of_row_create_table_for_mysql",
+ goto err_exit;
+ );
+
if (srv_created_new_raw) {
fputs("InnoDB: A new raw disk partition was initialized:\n"
"InnoDB: we do not allow database modifications"
@@ -2080,7 +2148,10 @@ row_create_table_for_mysql(
" is replaced with raw.\n", stderr);
err_exit:
dict_mem_table_free(table);
- trx_commit_for_mysql(trx);
+
+ if (commit) {
+ trx_commit_for_mysql(trx);
+ }
return(DB_ERROR);
}
@@ -2117,23 +2188,23 @@ err_exit:
/* The lock timeout monitor thread also takes care
of InnoDB monitor prints */
- os_event_set(srv_timeout_event);
+ os_event_set(lock_sys->timeout_event);
} else if (STR_EQ(table_name, table_name_len,
S_innodb_lock_monitor)) {
srv_print_innodb_monitor = TRUE;
srv_print_innodb_lock_monitor = TRUE;
- os_event_set(srv_timeout_event);
+ os_event_set(lock_sys->timeout_event);
} else if (STR_EQ(table_name, table_name_len,
S_innodb_tablespace_monitor)) {
srv_print_innodb_tablespace_monitor = TRUE;
- os_event_set(srv_timeout_event);
+ os_event_set(lock_sys->timeout_event);
} else if (STR_EQ(table_name, table_name_len,
S_innodb_table_monitor)) {
srv_print_innodb_table_monitor = TRUE;
- os_event_set(srv_timeout_event);
+ os_event_set(lock_sys->timeout_event);
#ifdef UNIV_MEM_DEBUG
} else if (STR_EQ(table_name, table_name_len,
S_innodb_mem_validate)) {
@@ -2152,12 +2223,21 @@ err_exit:
#endif /* UNIV_MEM_DEBUG */
}
-
heap = mem_heap_create(512);
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ switch (trx_get_dict_operation(trx)) {
+ case TRX_DICT_OP_NONE:
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ case TRX_DICT_OP_TABLE:
+ break;
+ case TRX_DICT_OP_INDEX:
+ /* If the transaction was previously flagged as
+ TRX_DICT_OP_INDEX, we should be creating auxiliary
+ tables for full-text indexes. */
+ ut_ad(strstr(table->name, "/FTS_") != NULL);
+ }
- node = tab_create_graph_create(table, heap);
+ node = tab_create_graph_create(table, heap, commit);
thr = pars_complete_graph_for_exec(node, trx, heap);
@@ -2168,6 +2248,29 @@ err_exit:
err = trx->error_state;
+ if (table->space != TRX_SYS_SPACE) {
+ ut_a(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE));
+
+ /* Update SYS_TABLESPACES and SYS_DATAFILES if a new
+ tablespace was created. */
+ if (err == DB_SUCCESS) {
+ char* path;
+ path = fil_space_get_first_path(table->space);
+
+ err = dict_create_add_tablespace_to_dictionary(
+ table->space, table->name,
+ fil_space_get_flags(table->space),
+ path, trx, commit);
+
+ mem_free(path);
+ }
+
+ if (err != DB_SUCCESS) {
+ /* We must delete the link file. */
+ fil_delete_link_file(table->name);
+ }
+ }
+
switch (err) {
case DB_SUCCESS:
break;
@@ -2181,8 +2284,8 @@ err_exit:
ut_print_name(stderr, trx, TRUE, table->name);
fputs(" because tablespace full\n", stderr);
- if (dict_table_open_on_name_no_stats(
- table->name, FALSE, DICT_ERR_IGNORE_NONE)) {
+ if (dict_table_open_on_name(table->name, TRUE, FALSE,
+ DICT_ERR_IGNORE_NONE)) {
/* Make things easy for the drop table code. */
@@ -2190,10 +2293,13 @@ err_exit:
dict_table_move_from_lru_to_non_lru(table);
}
- dict_table_close(table, FALSE);
+ dict_table_close(table, TRUE, FALSE);
row_drop_table_for_mysql(table->name, trx, FALSE);
- trx_commit_for_mysql(trx);
+
+ if (commit) {
+ trx_commit_for_mysql(trx);
+ }
} else {
dict_mem_table_free(table);
}
@@ -2203,7 +2309,12 @@ err_exit:
case DB_TOO_MANY_CONCURRENT_TRXS:
/* We already have .ibd file here. it should be deleted. */
- if (table->space && !fil_delete_tablespace(table->space)) {
+ if (table->space
+ && fil_delete_tablespace(
+ table->space,
+ BUF_REMOVE_FLUSH_NO_WRITE)
+ != DB_SUCCESS) {
+
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: not able to"
@@ -2215,10 +2326,8 @@ err_exit:
/* fall through */
case DB_DUPLICATE_KEY:
+ case DB_TABLESPACE_EXISTS:
default:
- /* We may also get err == DB_ERROR if the .ibd file for the
- table already exists */
-
trx->error_state = DB_SUCCESS;
trx_rollback_to_savepoint(trx, NULL);
dict_mem_table_free(table);
@@ -2229,7 +2338,7 @@ err_exit:
trx->op_info = "";
- return((int) err);
+ return(err);
}
/*********************************************************************//**
@@ -2238,7 +2347,7 @@ to create an index results in dropping the whole table! This is no problem
currently as all indexes must be created at the same time as the table.
@return error number or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_create_index_for_mysql(
/*=======================*/
dict_index_t* index, /*!< in, own: index definition
@@ -2254,13 +2363,13 @@ row_create_index_for_mysql(
ind_node_t* node;
mem_heap_t* heap;
que_thr_t* thr;
- ulint err;
+ dberr_t err;
ulint i;
ulint len;
char* table_name;
char* index_name;
dict_table_t* table;
- ibool is_fts = FALSE;
+ ibool is_fts;
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@@ -2277,8 +2386,8 @@ row_create_index_for_mysql(
is_fts = (index->type == DICT_FTS);
- table = dict_table_open_on_name_no_stats(table_name, TRUE,
- DICT_ERR_IGNORE_NONE);
+ table = dict_table_open_on_name(table_name, TRUE, TRUE,
+ DICT_ERR_IGNORE_NONE);
trx_start_if_not_started_xa(trx);
@@ -2292,6 +2401,11 @@ row_create_index_for_mysql(
len = ut_max(len, field_lengths[i]);
}
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_at_create_index",
+ len = DICT_MAX_FIELD_LEN_BY_FORMAT(table) + 1;
+ );
+
/* Column or prefix length exceeds maximum column length */
if (len > (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table)) {
err = DB_TOO_BIG_INDEX_COL;
@@ -2308,7 +2422,7 @@ row_create_index_for_mysql(
/* Note that the space id where we store the index is inherited from
the table in dict_build_index_def_step() in dict0crea.cc. */
- node = ind_create_graph_create(index, heap);
+ node = ind_create_graph_create(index, heap, true);
thr = pars_complete_graph_for_exec(node, trx, heap);
@@ -2332,7 +2446,7 @@ row_create_index_for_mysql(
}
error_handling:
- dict_table_close(table, TRUE);
+ dict_table_close(table, TRUE, FALSE);
if (err != DB_SUCCESS) {
/* We have special error handling here */
@@ -2353,7 +2467,7 @@ error_handling:
mem_free(table_name);
mem_free(index_name);
- return((int) err);
+ return(err);
}
/*********************************************************************//**
@@ -2366,7 +2480,7 @@ fields than mentioned in the constraint. Check also that foreign key
constraints which reference this table are ok.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_table_add_foreign_constraints(
/*==============================*/
trx_t* trx, /*!< in: transaction */
@@ -2383,7 +2497,7 @@ row_table_add_foreign_constraints(
code DB_CANNOT_ADD_CONSTRAINT if
any foreign keys are found. */
{
- ulint err;
+ dberr_t err;
ut_ad(mutex_own(&(dict_sys->mutex)));
#ifdef UNIV_SYNC_DEBUG
@@ -2399,6 +2513,12 @@ row_table_add_foreign_constraints(
err = dict_create_foreign_constraints(trx, sql_string, sql_length,
name, reject_fks);
+
+ DBUG_EXECUTE_IF("ib_table_add_foreign_fail",
+ err = DB_DUPLICATE_KEY;);
+
+ DEBUG_SYNC_C("table_add_foreign_constraints");
+
if (err == DB_SUCCESS) {
/* Check that also referencing constraints are ok */
err = dict_load_foreigns(name, FALSE, TRUE);
@@ -2418,7 +2538,7 @@ row_table_add_foreign_constraints(
trx->error_state = DB_SUCCESS;
}
- return((int) err);
+ return(err);
}
/*********************************************************************//**
@@ -2430,12 +2550,12 @@ as a background operation, which is taken care of by the master thread
in srv0srv.cc.
@return error code or DB_SUCCESS */
static
-int
+dberr_t
row_drop_table_for_mysql_in_background(
/*===================================*/
const char* name) /*!< in: table name */
{
- ulint error;
+ dberr_t error;
trx_t* trx;
trx = trx_allocate_for_background();
@@ -2464,7 +2584,7 @@ row_drop_table_for_mysql_in_background(
trx_free_for_background(trx);
- return((int) error);
+ return(error);
}
/*********************************************************************//**
@@ -2498,8 +2618,8 @@ loop:
return(n_tables + n_tables_dropped);
}
- table = dict_table_open_on_name_no_stats(drop->table_name, FALSE,
- DICT_ERR_IGNORE_NONE);
+ table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
+ DICT_ERR_IGNORE_NONE);
if (table == NULL) {
/* If for some reason the table has already been dropped
@@ -2510,7 +2630,7 @@ loop:
ut_a(!table->can_be_evicted);
- dict_table_close(table, FALSE);
+ dict_table_close(table, FALSE, FALSE);
if (DB_SUCCESS != row_drop_table_for_mysql_in_background(
drop->table_name)) {
@@ -2617,356 +2737,429 @@ row_add_table_to_background_drop_list(
}
/*********************************************************************//**
-Discards the tablespace of a table which stored in an .ibd file. Discarding
-means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE.
+Reassigns the table identifier of a table.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
-row_discard_tablespace_for_mysql(
-/*=============================*/
- const char* name, /*!< in: table name */
- trx_t* trx) /*!< in: transaction handle */
+dberr_t
+row_mysql_table_id_reassign(
+/*========================*/
+ dict_table_t* table, /*!< in/out: table */
+ trx_t* trx, /*!< in/out: transaction */
+ table_id_t* new_id) /*!< out: new table id */
{
- dict_foreign_t* foreign;
- table_id_t new_id;
- dict_table_t* table;
- ibool success;
- ulint err;
- pars_info_t* info = NULL;
+ dberr_t err;
+ pars_info_t* info = pars_info_create();
- /* How do we prevent crashes caused by ongoing operations on
- the table? Old operations could try to access non-existent
- pages.
+ dict_hdr_get_new_id(new_id, NULL, NULL);
- 1) SQL queries, INSERT, SELECT, ...: we must get an exclusive
- MySQL table lock on the table before we can do DISCARD
- TABLESPACE. Then there are no running queries on the table.
+ /* Remove all locks except the table-level S and X locks. */
+ lock_remove_all_on_table(table, FALSE);
- 2) Purge and rollback: we assign a new table id for the
- table. Since purge and rollback look for the table based on
- the table id, they see the table as 'dropped' and discard
- their operations.
+ pars_info_add_ull_literal(info, "old_id", table->id);
+ pars_info_add_ull_literal(info, "new_id", *new_id);
+
+ err = que_eval_sql(
+ info,
+ "PROCEDURE RENUMBER_TABLE_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLES SET ID = :new_id\n"
+ " WHERE ID = :old_id;\n"
+ "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
+ " WHERE TABLE_ID = :old_id;\n"
+ "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
+ " WHERE TABLE_ID = :old_id;\n"
+ "END;\n", FALSE, trx);
- 3) Insert buffer: we remove all entries for the tablespace in
- the insert buffer tree; as long as the tablespace mem object
- does not exist, ongoing insert buffer page merges are
- discarded in buf0rea.cc. If we recreate the tablespace mem
- object with IMPORT TABLESPACE later, then the tablespace will
- have the same id, but the tablespace_version field in the mem
- object is different, and ongoing old insert buffer page merges
- get discarded.
+ return(err);
+}
- 4) Linear readahead and random readahead: we use the same
- method as in 3) to discard ongoing operations.
+/*********************************************************************//**
+Setup the pre-requisites for DISCARD TABLESPACE. It will start the transaction,
+acquire the data dictionary lock in X mode and open the table.
+@return table instance or 0 if not found. */
+static
+dict_table_t*
+row_discard_tablespace_begin(
+/*=========================*/
+ const char* name, /*!< in: table name */
+ trx_t* trx) /*!< in: transaction handle */
+{
+ trx->op_info = "discarding tablespace";
- 5) FOREIGN KEY operations: if
- table->n_foreign_key_checks_running > 0, we do not allow the
- discard. We also reserve the data dictionary latch. */
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- trx->op_info = "discarding tablespace";
trx_start_if_not_started_xa(trx);
/* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
+ this is to avoid deadlocks during data dictionary operations */
row_mysql_lock_data_dictionary(trx);
- table = dict_table_open_on_name_no_stats(name, TRUE,
- DICT_ERR_IGNORE_NONE);
-
- if (!table) {
- err = DB_TABLE_NOT_FOUND;
-
- goto funct_exit;
- }
+ dict_table_t* table;
- if (table->space == 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: is in the system tablespace 0"
- " which cannot be discarded\n", stderr);
- err = DB_ERROR;
+ table = dict_table_open_on_name(
+ name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
- goto funct_exit;
+ if (table) {
+ dict_stats_wait_bg_to_stop_using_tables(table, NULL, trx);
+ ut_a(table->space != TRX_SYS_SPACE);
+ ut_a(table->n_foreign_key_checks_running == 0);
}
- if (table->n_foreign_key_checks_running > 0) {
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: You are trying to DISCARD table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: though there is a foreign key check"
- " running on it.\n"
- "InnoDB: Cannot discard the table.\n",
- stderr);
-
- err = DB_ERROR;
+ return(table);
+}
- goto funct_exit;
- }
+/*********************************************************************//**
+Do the foreign key constraint checks.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_discard_tablespace_foreign_key_checks(
+/*======================================*/
+ const trx_t* trx, /*!< in: transaction handle */
+ const dict_table_t* table) /*!< in: table to be discarded */
+{
+ const dict_foreign_t* foreign;
/* Check if the table is referenced by foreign key constraints from
some other table (not the table itself) */
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
+ for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
+ foreign && foreign->foreign_table == table;
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
- while (foreign && foreign->foreign_table == table) {
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
}
- if (foreign && trx->check_foreigns) {
+ if (!srv_read_only_mode && foreign && trx->check_foreigns) {
FILE* ef = dict_foreign_err_file;
/* We only allow discarding a referenced table if
FOREIGN_KEY_CHECKS is set to 0 */
- err = DB_CANNOT_DROP_CONSTRAINT;
-
mutex_enter(&dict_foreign_err_mutex);
+
rewind(ef);
+
ut_print_timestamp(ef);
fputs(" Cannot DISCARD table ", ef);
- ut_print_name(stderr, trx, TRUE, name);
+ ut_print_name(stderr, trx, TRUE, table->name);
fputs("\n"
"because it is referenced by ", ef);
ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name);
putc('\n', ef);
+
mutex_exit(&dict_foreign_err_mutex);
- goto funct_exit;
+ return(DB_CANNOT_DROP_CONSTRAINT);
}
- dict_hdr_get_new_id(&new_id, NULL, NULL);
+ return(DB_SUCCESS);
+}
- /* Remove all locks except the table-level S and X locks. */
- lock_remove_all_on_table(table, FALSE);
+/*********************************************************************//**
+Cleanup after the DISCARD TABLESPACE operation.
+@return error code. */
+static
+dberr_t
+row_discard_tablespace_end(
+/*=======================*/
+ trx_t* trx, /*!< in/out: transaction handle */
+ dict_table_t* table, /*!< in/out: table to be discarded */
+ dberr_t err) /*!< in: error code */
+{
+ if (table != 0) {
+ dict_table_close(table, TRUE, FALSE);
+ }
- info = pars_info_create();
+ DBUG_EXECUTE_IF("ib_discard_before_commit_crash",
+ log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+ DBUG_SUICIDE(););
- pars_info_add_str_literal(info, "table_name", name);
- pars_info_add_ull_literal(info, "new_id", new_id);
+ trx_commit_for_mysql(trx);
- err = que_eval_sql(info,
- "PROCEDURE DISCARD_TABLESPACE_PROC () IS\n"
- "old_id CHAR;\n"
- "BEGIN\n"
- "SELECT ID INTO old_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = :table_name\n"
- "LOCK IN SHARE MODE;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " COMMIT WORK;\n"
- " RETURN;\n"
- "END IF;\n"
- "UPDATE SYS_TABLES SET ID = :new_id\n"
- " WHERE ID = old_id;\n"
- "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = old_id;\n"
- "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = old_id;\n"
- "COMMIT WORK;\n"
- "END;\n"
- , FALSE, trx);
+ DBUG_EXECUTE_IF("ib_discard_after_commit_crash",
+ log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+ DBUG_SUICIDE(););
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ trx->op_info = "";
+
+ return(err);
+}
+
+/*********************************************************************//**
+Do the DISCARD TABLESPACE operation.
+@return DB_SUCCESS or error code. */
+static
+dberr_t
+row_discard_tablespace(
+/*===================*/
+ trx_t* trx, /*!< in/out: transaction handle */
+ dict_table_t* table) /*!< in/out: table to be discarded */
+{
+ dberr_t err;
+
+ /* How do we prevent crashes caused by ongoing operations on
+ the table? Old operations could try to access non-existent
+ pages. MySQL will block all DML on the table using MDL and a
+ DISCARD will not start unless all existing operations on the
+ table to be discarded are completed.
+
+ 1) Acquire the data dictionary latch in X mode. To prevent any
+ internal operations that MySQL is not aware off and also for
+ the internal SQL parser.
+
+ 2) Purge and rollback: we assign a new table id for the
+ table. Since purge and rollback look for the table based on
+ the table id, they see the table as 'dropped' and discard
+ their operations.
+
+ 3) Insert buffer: we remove all entries for the tablespace in
+ the insert buffer tree.
+
+ 4) FOREIGN KEY operations: if table->n_foreign_key_checks_running > 0,
+ we do not allow the discard. */
+
+ /* Play safe and remove all insert buffer entries, though we should
+ have removed them already when DISCARD TABLESPACE was called */
+
+ ibuf_delete_for_discarded_space(table->space);
+
+ table_id_t new_id;
+
+ /* Set the TABLESPACE DISCARD flag in the table definition on disk. */
+
+ err = row_import_update_discarded_flag(trx, table->id, true, true);
if (err != DB_SUCCESS) {
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
- } else {
- dict_table_change_id_in_cache(table, new_id);
+ return(err);
+ }
- success = fil_discard_tablespace(table->space);
+ /* Update the index root pages in the system tables, on disk */
- if (!success) {
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
+ err = row_import_update_index_root(trx, table, true, true);
- err = DB_ERROR;
- } else {
- /* Set the flag which tells that now it is legal to
- IMPORT a tablespace for this table */
- table->tablespace_discarded = TRUE;
- table->ibd_file_missing = TRUE;
- }
+ if (err != DB_SUCCESS) {
+ return(err);
}
-funct_exit:
+ /* Drop all the FTS auxiliary tables. */
+ if (dict_table_has_fts_index(table)
+ || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
- if (table != NULL) {
- dict_table_close(table, TRUE);
+ fts_drop_tables(trx, table);
}
- trx_commit_for_mysql(trx);
+ /* Assign a new space ID to the table definition so that purge
+ can ignore the changes. Update the system table on disk. */
- row_mysql_unlock_data_dictionary(trx);
+ err = row_mysql_table_id_reassign(table, trx, &new_id);
- trx->op_info = "";
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
- return((int) err);
+ /* Discard the physical file that is used for the tablespace. */
+
+ err = fil_discard_tablespace(table->space);
+
+ switch(err) {
+ case DB_SUCCESS:
+ case DB_IO_ERROR:
+ case DB_TABLESPACE_NOT_FOUND:
+ /* All persistent operations successful, update the
+ data dictionary memory cache. */
+
+ table->ibd_file_missing = TRUE;
+
+ table->flags2 |= DICT_TF2_DISCARDED;
+
+ dict_table_change_id_in_cache(table, new_id);
+
+ /* Reset the root page numbers. */
+
+ for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != 0;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ index->page = FIL_NULL;
+ index->space = FIL_NULL;
+ }
+
+ /* If the tablespace did not already exist or we couldn't
+ write to it, we treat that as a successful DISCARD. It is
+ unusable anyway. */
+
+ err = DB_SUCCESS;
+ break;
+
+ default:
+ /* We need to rollback the disk changes, something failed. */
+
+ trx->error_state = DB_SUCCESS;
+
+ trx_rollback_to_savepoint(trx, NULL);
+
+ trx->error_state = DB_SUCCESS;
+ }
+
+ return(err);
}
-/*****************************************************************//**
-Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary.
+/*********************************************************************//**
+Discards the tablespace of a table which stored in an .ibd file. Discarding
+means that this function renames the .ibd file and assigns a new table id for
+the table. Also the flag table->ibd_file_missing is set to TRUE.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
-row_import_tablespace_for_mysql(
-/*============================*/
+dberr_t
+row_discard_tablespace_for_mysql(
+/*=============================*/
const char* name, /*!< in: table name */
trx_t* trx) /*!< in: transaction handle */
{
+ dberr_t err;
dict_table_t* table;
- ibool success;
- lsn_t current_lsn;
- ulint err = DB_SUCCESS;
- trx_start_if_not_started_xa(trx);
+ /* Open the table and start the transaction if not started. */
- trx->op_info = "importing tablespace";
+ table = row_discard_tablespace_begin(name, trx);
- current_lsn = log_get_lsn();
+ if (table == 0) {
+ err = DB_TABLE_NOT_FOUND;
+ } else if (table->space == TRX_SYS_SPACE) {
+ char table_name[MAX_FULL_NAME_LEN + 1];
- /* It is possible, though very improbable, that the lsn's in the
- tablespace to be imported have risen above the current system lsn, if
- a lengthy purge, ibuf merge, or rollback was performed on a backup
- taken with ibbackup. If that is the case, reset page lsn's in the
- file. We assume that mysqld was shut down after it performed these
- cleanup operations on the .ibd file, so that it stamped the latest lsn
- to the FIL_PAGE_FILE_FLUSH_LSN in the first page of the .ibd file.
+ innobase_format_name(
+ table_name, sizeof(table_name), table->name, FALSE);
- TODO: reset also the trx id's in clustered index records and write
- a new space id to each data page. That would allow us to import clean
- .ibd files from another MySQL installation. */
+ ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
- success = fil_reset_too_high_lsns(name, current_lsn);
+ err = DB_ERROR;
- if (!success) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: cannot reset lsn's in table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
- stderr);
+ } else if (table->n_foreign_key_checks_running > 0) {
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name), table->name, FALSE);
+
+ ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+ ER_DISCARD_FK_CHECKS_RUNNING, table_name);
err = DB_ERROR;
- row_mysql_lock_data_dictionary(trx);
- table = NULL;
+ } else {
+ /* Do foreign key constraint checks. */
- goto funct_exit;
- }
+ err = row_discard_tablespace_foreign_key_checks(trx, table);
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
+ if (err == DB_SUCCESS) {
+ err = row_discard_tablespace(trx, table);
+ }
+ }
- row_mysql_lock_data_dictionary(trx);
+ return(row_discard_tablespace_end(trx, table, err));
+}
- table = dict_table_open_on_name_no_stats(name, TRUE,
- DICT_ERR_IGNORE_NONE);
+/*********************************************************************//**
+Sets an exclusive lock on a table.
+@return error code or DB_SUCCESS */
+UNIV_INTERN
+dberr_t
+row_mysql_lock_table(
+/*=================*/
+ trx_t* trx, /*!< in/out: transaction */
+ dict_table_t* table, /*!< in: table to lock */
+ enum lock_mode mode, /*!< in: LOCK_X or LOCK_S */
+ const char* op_info) /*!< in: string for trx->op_info */
+{
+ mem_heap_t* heap;
+ que_thr_t* thr;
+ dberr_t err;
+ sel_node_t* node;
- if (!table) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: does not exist in the InnoDB data dictionary\n"
- "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
- stderr);
+ ut_ad(trx);
+ ut_ad(mode == LOCK_X || mode == LOCK_S);
- err = DB_TABLE_NOT_FOUND;
+ heap = mem_heap_create(512);
- goto funct_exit;
- }
+ trx->op_info = op_info;
- if (table->space == 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: is in the system tablespace 0"
- " which cannot be imported\n", stderr);
- err = DB_ERROR;
+ node = sel_node_create(heap);
+ thr = pars_complete_graph_for_exec(node, trx, heap);
+ thr->graph->state = QUE_FORK_ACTIVE;
- goto funct_exit;
- }
+ /* We use the select query graph as the dummy graph needed
+ in the lock module call */
- if (!table->tablespace_discarded) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: you are trying to"
- " IMPORT a tablespace\n"
- "InnoDB: ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(", though you have not called DISCARD on it yet\n"
- "InnoDB: during the lifetime of the mysqld process!\n",
- stderr);
+ thr = que_fork_get_first_thr(
+ static_cast<que_fork_t*>(que_node_get_parent(thr)));
- err = DB_ERROR;
+ que_thr_move_to_run_state_for_mysql(thr, trx);
- goto funct_exit;
- }
+run_again:
+ thr->run_node = thr;
+ thr->prev_node = thr->common.parent;
- /* Play safe and remove all insert buffer entries, though we should
- have removed them already when DISCARD TABLESPACE was called */
+ err = lock_table(0, table, mode, thr);
- ibuf_delete_for_discarded_space(table->space);
+ trx->error_state = err;
- success = fil_open_single_table_tablespace(
- TRUE, table->space,
- dict_tf_to_fsp_flags(table->flags),
- table->name);
- if (success) {
- table->ibd_file_missing = FALSE;
- table->tablespace_discarded = FALSE;
+ if (err == DB_SUCCESS) {
+ que_thr_stop_for_mysql_no_error(thr, trx);
} else {
- if (table->ibd_file_missing) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: cannot find or open in the"
- " database directory the .ibd file of\n"
- "InnoDB: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n",
- stderr);
- }
+ que_thr_stop_for_mysql(thr);
- err = DB_ERROR;
- }
+ if (err != DB_QUE_THR_SUSPENDED) {
+ ibool was_lock_wait;
-funct_exit:
+ was_lock_wait = row_mysql_handle_errors(
+ &err, trx, thr, NULL);
- if (table != NULL) {
- dict_table_close(table, TRUE);
- }
+ if (was_lock_wait) {
+ goto run_again;
+ }
+ } else {
+ que_thr_t* run_thr;
+ que_node_t* parent;
- trx_commit_for_mysql(trx);
+ parent = que_node_get_parent(thr);
- row_mysql_unlock_data_dictionary(trx);
+ run_thr = que_fork_start_command(
+ static_cast<que_fork_t*>(parent));
+
+ ut_a(run_thr == thr);
+
+ /* There was a lock wait but the thread was not
+ in a ready to run or running state. */
+ trx->error_state = DB_LOCK_WAIT;
+ goto run_again;
+ }
+ }
+
+ que_graph_free(thr->graph);
trx->op_info = "";
- return((int) err);
+ return(err);
}
/*********************************************************************//**
Truncates a table for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_truncate_table_for_mysql(
/*=========================*/
dict_table_t* table, /*!< in: table handle */
trx_t* trx) /*!< in: transaction handle */
{
dict_foreign_t* foreign;
- ulint err;
+ dberr_t err;
mem_heap_t* heap;
byte* buf;
dtuple_t* tuple;
@@ -2978,17 +3171,15 @@ row_truncate_table_for_mysql(
ulint recreate_space = 0;
pars_info_t* info = NULL;
ibool has_internal_doc_id;
+ ulint old_space = table->space;
/* How do we prevent crashes caused by ongoing operations on
the table? Old operations could try to access non-existent
pages.
1) SQL queries, INSERT, SELECT, ...: we must get an exclusive
- MySQL table lock on the table before we can do TRUNCATE
- TABLE. Then there are no running queries on the table. This is
- guaranteed, because in ha_innobase::store_lock(), we do not
- weaken the TL_WRITE lock requested by MySQL when executing
- SQLCOM_TRUNCATE.
+ InnoDB table lock on the table before we can do TRUNCATE
+ TABLE. Then there are no running queries on the table.
2) Purge and rollback: we assign a new table id for the
table. Since purge and rollback look for the table based on
@@ -3031,9 +3222,15 @@ row_truncate_table_for_mysql(
return(DB_ERROR);
}
- trx->op_info = "truncating table";
+ if (dict_table_is_discarded(table)) {
+ return(DB_TABLESPACE_DELETED);
+ } else if (table->ibd_file_missing) {
+ return(DB_TABLESPACE_NOT_FOUND);
+ }
- trx_start_if_not_started_xa(trx);
+ trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+
+ trx->op_info = "truncating table";
/* Serialize data dictionary operations with dictionary mutex:
no deadlocks can occur then in these operations */
@@ -3049,16 +3246,22 @@ row_truncate_table_for_mysql(
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
+ dict_stats_wait_bg_to_stop_using_tables(table, NULL, trx);
+
/* Check if the table is referenced by foreign key constraints from
some other table (not the table itself) */
- foreign = UT_LIST_GET_FIRST(table->referenced_list);
+ for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
+ foreign != 0 && foreign->foreign_table == table;
+ foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
- while (foreign && foreign->foreign_table == table) {
- foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
+ /* Do nothing. */
}
- if (foreign && trx->check_foreigns) {
+ if (!srv_read_only_mode
+ && foreign
+ && trx->check_foreigns) {
+
FILE* ef = dict_foreign_err_file;
/* We only allow truncating a referenced table if
@@ -3099,19 +3302,41 @@ row_truncate_table_for_mysql(
goto funct_exit;
}
- /* Remove all locks except the table-level S and X locks. */
+ /* Remove all locks except the table-level X lock. */
lock_remove_all_on_table(table, FALSE);
+ /* Ensure that the table will be dropped by
+ trx_rollback_active() in case of a crash. */
+
trx->table_id = table->id;
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
+ /* Assign an undo segment for the transaction, so that the
+ transaction will be recovered after a crash. */
+
+ mutex_enter(&trx->undo_mutex);
+
+ err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
+
+ mutex_exit(&trx->undo_mutex);
+
+ if (err != DB_SUCCESS) {
+
+ goto funct_exit;
+ }
if (table->space && !table->dir_path_of_temp_table) {
/* Discard and create the single-table tablespace. */
ulint space = table->space;
ulint flags = fil_space_get_flags(space);
+ ut_a(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY));
+
+ dict_get_and_save_data_dir_path(table, true);
+
if (flags != ULINT_UNDEFINED
- && fil_discard_tablespace(space)) {
+ && fil_discard_tablespace(space) == DB_SUCCESS) {
dict_index_t* index;
@@ -3124,15 +3349,18 @@ row_truncate_table_for_mysql(
if (space == ULINT_UNDEFINED
|| fil_create_new_single_table_tablespace(
- space, table->name, FALSE,
+ space, table->name,
+ table->data_dir_path,
flags, table->flags2,
- FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
+ FIL_IBD_FILE_INITIAL_SIZE)
+ != DB_SUCCESS) {
dict_table_x_unlock_indexes(table);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: TRUNCATE TABLE %s failed to"
- " create a new tablespace\n",
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "TRUNCATE TABLE %s failed to "
+ "create a new tablespace",
table->name);
+
table->ibd_file_missing = 1;
err = DB_ERROR;
goto funct_exit;
@@ -3240,7 +3468,6 @@ next_rec:
mtr_commit(&mtr);
mem_heap_free(heap);
-
/* Done with index truncation, release index tree locks,
subsequent work relates to table level metadata change */
dict_table_x_unlock_indexes(table);
@@ -3259,21 +3486,21 @@ next_rec:
fts_table.name = table->name;
fts_table.id = new_id;
- err = fts_create_common_tables(trx, &fts_table, table->name,
- TRUE);
+ err = fts_create_common_tables(
+ trx, &fts_table, table->name, TRUE);
- if (err == DB_SUCCESS) {
- for (i = 0; i < ib_vector_size(table->fts->indexes);
- i++) {
- dict_index_t* fts_index;
+ for (i = 0;
+ i < ib_vector_size(table->fts->indexes)
+ && err == DB_SUCCESS;
+ i++) {
- fts_index = static_cast<dict_index_t*>(
- ib_vector_getp(
- table->fts->indexes, i));
+ dict_index_t* fts_index;
- fts_create_index_tables_low(
- trx, fts_index, table->name, new_id);
- }
+ fts_index = static_cast<dict_index_t*>(
+ ib_vector_getp(table->fts->indexes, i));
+
+ err = fts_create_index_tables_low(
+ trx, fts_index, table->name, new_id);
}
if (err != DB_SUCCESS) {
@@ -3287,34 +3514,64 @@ next_rec:
fputs("\n", stderr);
goto funct_exit;
+ } else {
+ ut_ad(trx->state != TRX_STATE_NOT_STARTED);
}
}
info = pars_info_create();
- pars_info_add_int4_literal(info, "space", (lint) table->space);
+ pars_info_add_int4_literal(info, "new_space", (lint) table->space);
pars_info_add_ull_literal(info, "old_id", table->id);
pars_info_add_ull_literal(info, "new_id", new_id);
err = que_eval_sql(info,
- "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n"
+ "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
"BEGIN\n"
"UPDATE SYS_TABLES"
- " SET ID = :new_id, SPACE = :space\n"
+ " SET ID = :new_id, SPACE = :new_space\n"
" WHERE ID = :old_id;\n"
"UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
" WHERE TABLE_ID = :old_id;\n"
"UPDATE SYS_INDEXES"
- " SET TABLE_ID = :new_id, SPACE = :space\n"
+ " SET TABLE_ID = :new_id, SPACE = :new_space\n"
" WHERE TABLE_ID = :old_id;\n"
- "COMMIT WORK;\n"
"END;\n"
, FALSE, trx);
+ if (err == DB_SUCCESS && old_space != table->space) {
+ info = pars_info_create();
+
+ pars_info_add_int4_literal(info, "old_space", (lint) old_space);
+
+ pars_info_add_int4_literal(
+ info, "new_space", (lint) table->space);
+
+ err = que_eval_sql(info,
+ "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLESPACES"
+ " SET SPACE = :new_space\n"
+ " WHERE SPACE = :old_space;\n"
+ "UPDATE SYS_DATAFILES"
+ " SET SPACE = :new_space"
+ " WHERE SPACE = :old_space;\n"
+ "END;\n"
+ , FALSE, trx);
+ }
+ DBUG_EXECUTE_IF("ib_ddl_crash_before_fts_truncate", err = DB_ERROR;);
+
if (err != DB_SUCCESS) {
trx->error_state = DB_SUCCESS;
trx_rollback_to_savepoint(trx, NULL);
trx->error_state = DB_SUCCESS;
+
+ /* Update system table failed. Table in memory metadata
+ could be in an inconsistent state, mark the in-memory
+ table->corrupted to be true. In the long run, this should
+ be fixed by atomic truncate table */
+ table->corrupted = true;
+
ut_print_timestamp(stderr);
fputs(" InnoDB: Unable to assign a new identifier to table ",
stderr);
@@ -3323,30 +3580,40 @@ next_rec:
"InnoDB: after truncating it. Background processes"
" may corrupt the table!\n", stderr);
- /* Fail to update the table id, so drop the new
+ /* Failed to update the table id, so drop the new
FTS auxiliary tables */
if (has_internal_doc_id) {
- dict_table_t fts_table;
+ ut_ad(trx->state == TRX_STATE_NOT_STARTED);
+
+ table_id_t id = table->id;
- fts_table.name = table->name;
- fts_table.id = new_id;
+ table->id = new_id;
- fts_drop_tables(trx, &fts_table);
+ fts_drop_tables(trx, table);
+
+ table->id = id;
+
+ ut_ad(trx->state != TRX_STATE_NOT_STARTED);
}
err = DB_ERROR;
} else {
/* Drop the old FTS index */
if (has_internal_doc_id) {
+ ut_ad(trx->state != TRX_STATE_NOT_STARTED);
fts_drop_tables(trx, table);
+ ut_ad(trx->state != TRX_STATE_NOT_STARTED);
}
+ DBUG_EXECUTE_IF("ib_truncate_crash_after_fts_drop",
+ DBUG_SUICIDE(););
+
dict_table_change_id_in_cache(table, new_id);
/* Reset the Doc ID in cache to 0 */
if (has_internal_doc_id && table->fts->cache) {
table->fts->fts_status |= TABLE_DICT_LOCKED;
- fts_update_next_doc_id(table, NULL, 0);
+ fts_update_next_doc_id(trx, table, NULL, 0);
fts_cache_clear(table->fts->cache, TRUE);
fts_cache_init(table->fts->cache);
table->fts->fts_status &= ~TABLE_DICT_LOCKED;
@@ -3364,16 +3631,13 @@ funct_exit:
row_mysql_unlock_data_dictionary(trx);
- /* We are supposed to recalc and save the stats only
- on ANALYZE, but it also makes sense to do so on TRUNCATE */
- dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT_SILENT,
- FALSE);
+ dict_stats_update(table, DICT_STATS_EMPTY_TABLE);
trx->op_info = "";
srv_wake_master_thread();
- return((int) err);
+ return(err);
}
/*********************************************************************//**
@@ -3385,23 +3649,29 @@ by the transaction, the transaction will be committed. Otherwise, the
data dictionary will remain locked.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_drop_table_for_mysql(
/*=====================*/
const char* name, /*!< in: table name */
trx_t* trx, /*!< in: transaction handle */
- ibool drop_db)/*!< in: TRUE=dropping whole database */
+ bool drop_db,/*!< in: true=dropping whole database */
+ bool nonatomic)
+ /*!< in: whether it is permitted
+ to release and reacquire dict_operation_lock */
{
+ dberr_t err;
dict_foreign_t* foreign;
dict_table_t* table;
- dict_index_t* index;
+ ibool print_msg;
ulint space_id;
- ulint err;
- const char* table_name;
+ char* filepath = NULL;
+ const char* tablename_minus_db;
+ char* tablename = NULL;
+ bool ibd_file_missing;
ulint namelen;
- ibool locked_dictionary = FALSE;
- ibool fts_bg_thread_exited = FALSE;
+ bool locked_dictionary = false;
pars_info_t* info = NULL;
+ mem_heap_t* heap = NULL;
ut_a(name != NULL);
@@ -3419,19 +3689,19 @@ row_drop_table_for_mysql(
Certain table names starting with 'innodb_' have their special
meaning regardless of the database name. Thus, we need to
ignore the database name prefix in the comparisons. */
- table_name = strchr(name, '/');
+ tablename_minus_db = strchr(name, '/');
- if (table_name) {
- table_name++;
+ if (tablename_minus_db) {
+ tablename_minus_db++;
} else {
/* Ancillary FTS tables don't have '/' characters. */
- table_name = name;
+ tablename_minus_db = name;
}
- namelen = strlen(table_name) + 1;
+ namelen = strlen(tablename_minus_db) + 1;
if (namelen == sizeof S_innodb_monitor
- && !memcmp(table_name, S_innodb_monitor,
+ && !memcmp(tablename_minus_db, S_innodb_monitor,
sizeof S_innodb_monitor)) {
/* Table name equals "innodb_monitor":
@@ -3440,17 +3710,17 @@ row_drop_table_for_mysql(
srv_print_innodb_monitor = FALSE;
srv_print_innodb_lock_monitor = FALSE;
} else if (namelen == sizeof S_innodb_lock_monitor
- && !memcmp(table_name, S_innodb_lock_monitor,
+ && !memcmp(tablename_minus_db, S_innodb_lock_monitor,
sizeof S_innodb_lock_monitor)) {
srv_print_innodb_monitor = FALSE;
srv_print_innodb_lock_monitor = FALSE;
} else if (namelen == sizeof S_innodb_tablespace_monitor
- && !memcmp(table_name, S_innodb_tablespace_monitor,
+ && !memcmp(tablename_minus_db, S_innodb_tablespace_monitor,
sizeof S_innodb_tablespace_monitor)) {
srv_print_innodb_tablespace_monitor = FALSE;
} else if (namelen == sizeof S_innodb_table_monitor
- && !memcmp(table_name, S_innodb_table_monitor,
+ && !memcmp(tablename_minus_db, S_innodb_table_monitor,
sizeof S_innodb_table_monitor)) {
srv_print_innodb_table_monitor = FALSE;
@@ -3461,7 +3731,10 @@ row_drop_table_for_mysql(
trx->op_info = "dropping table";
- trx_start_if_not_started(trx);
+ /* This function is called recursively via fts_drop_tables(). */
+ if (trx->state == TRX_STATE_NOT_STARTED) {
+ trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
+ }
if (trx->dict_operation_lock_mode != RW_X_LATCH) {
/* Prevent foreign key checks etc. while we are dropping the
@@ -3469,17 +3742,17 @@ row_drop_table_for_mysql(
row_mysql_lock_data_dictionary(trx);
- locked_dictionary = TRUE;
+ locked_dictionary = true;
+ nonatomic = true;
}
-retry:
ut_ad(mutex_own(&(dict_sys->mutex)));
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- table = dict_table_open_on_name_no_stats(
- name, TRUE,
+ table = dict_table_open_on_name(
+ name, TRUE, FALSE,
static_cast<dict_err_ignore_t>(
DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
@@ -3502,34 +3775,53 @@ retry:
goto funct_exit;
}
- if (table->fts) {
- fts_t* fts = table->fts;
+ /* Turn on this drop bit before we could release the dictionary
+ latch */
+ table->to_be_dropped = true;
- /* It is possible that background 'Add' thread fts_add_thread()
- just gets called and the fts_optimize_thread()
- is processing deleted records. There could be undetected
- deadlock between threads synchronization and dict_sys_mutex
- since fts_parse_sql() requires dict_sys->mutex. Ask the
- background thread to exit before proceeds to drop table to
- avoid undetected deadlocks */
- row_mysql_unlock_data_dictionary(trx);
+ if (nonatomic) {
+ /* This trx did not acquire any locks on dictionary
+ table records yet. Thus it is safe to release and
+ reacquire the data dictionary latches. */
+ if (table->fts) {
+ ut_ad(!table->fts->add_wq);
+ ut_ad(lock_trx_has_sys_table_locks(trx) == 0);
- if (fts->add_wq && (!fts_bg_thread_exited)) {
- /* Wait for any background threads accessing the table
- to exit. */
- mutex_enter(&fts->bg_threads_mutex);
- fts->fts_status |= BG_THREAD_STOP;
+ row_mysql_unlock_data_dictionary(trx);
+ fts_optimize_remove_table(table);
+ row_mysql_lock_data_dictionary(trx);
+ }
- dict_table_wait_for_bg_threads_to_exit(table, 250000);
+ /* Do not bother to deal with persistent stats for temp
+ tables since we know temp tables do not use persistent
+ stats. */
+ if (!dict_table_is_temporary(table)) {
+ dict_stats_wait_bg_to_stop_using_tables(
+ table, NULL, trx);
+ }
+ }
- mutex_exit(&fts->bg_threads_mutex);
+ /* make sure background stats thread is not running on the table */
+ ut_ad(!(table->stats_bg_flag & BG_STAT_IN_PROGRESS));
- row_mysql_lock_data_dictionary(trx);
- fts_bg_thread_exited = TRUE;
- goto retry;
- } else {
- fts_optimize_remove_table(table);
- row_mysql_lock_data_dictionary(trx);
+ /* Delete the link file if used. */
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ fil_delete_link_file(name);
+ }
+
+ if (!dict_table_is_temporary(table)) {
+
+ dict_stats_recalc_pool_del(table);
+
+ /* Remove stats for this table and all of its indexes from the
+ persistent storage if it exists and if there are stats for this
+ table in there. This function creates its own trx and commits
+ it. */
+ char errstr[1024];
+ err = dict_stats_drop_table(name, errstr, sizeof(errstr));
+
+ if (err != DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_WARN, "%s", errstr);
}
}
@@ -3540,7 +3832,7 @@ retry:
dict_table_move_from_lru_to_non_lru(table);
}
- dict_table_close(table, TRUE);
+ dict_table_close(table, TRUE, FALSE);
/* Check if the table is referenced by foreign key constraints from
some other table (not the table itself) */
@@ -3552,7 +3844,9 @@ check_next_foreign:
foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
}
- if (foreign && trx->check_foreigns
+ if (!srv_read_only_mode
+ && foreign
+ && trx->check_foreigns
&& !(drop_db && dict_tables_have_same_db(
name, foreign->foreign_table_name_lookup))) {
FILE* ef = dict_foreign_err_file;
@@ -3589,16 +3883,16 @@ check_next_foreign:
if (table->n_foreign_key_checks_running > 0) {
- const char* table_name = table->name;
+ const char* save_tablename = table->name;
ibool added;
- added = row_add_table_to_background_drop_list(table_name);
+ added = row_add_table_to_background_drop_list(save_tablename);
if (added) {
ut_print_timestamp(stderr);
fputs(" InnoDB: You are trying to drop table ",
stderr);
- ut_print_name(stderr, trx, TRUE, table_name);
+ ut_print_name(stderr, trx, TRUE, save_tablename);
fputs("\n"
"InnoDB: though there is a"
" foreign key check running on it.\n"
@@ -3663,23 +3957,54 @@ check_next_foreign:
goto funct_exit;
}
+ /* The "to_be_dropped" marks table that is to be dropped, but
+ has not been dropped, instead, was put in the background drop
+ list due to being used by concurrent DML operations. Clear it
+ here since there are no longer any concurrent activities on it,
+ and it is free to be dropped */
+ table->to_be_dropped = false;
+
/* If we get this far then the table to be dropped must not have
any table or record locks on it. */
ut_a(!lock_table_has_locks(table));
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- trx->table_id = table->id;
+ switch (trx_get_dict_operation(trx)) {
+ case TRX_DICT_OP_NONE:
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+ trx->table_id = table->id;
+ case TRX_DICT_OP_TABLE:
+ break;
+ case TRX_DICT_OP_INDEX:
+ /* If the transaction was previously flagged as
+ TRX_DICT_OP_INDEX, we should be dropping auxiliary
+ tables for full-text indexes. */
+ ut_ad(strstr(table->name, "/FTS_") != NULL);
+ }
/* Mark all indexes unavailable in the data dictionary cache
before starting to drop the table. */
- for (index = dict_table_get_first_index(table);
+ unsigned* page_no;
+ unsigned* page_nos;
+ heap = mem_heap_create(
+ 200 + UT_LIST_GET_LEN(table->indexes) * sizeof *page_nos);
+ tablename = mem_heap_strdup(heap, name);
+
+ page_no = page_nos = static_cast<unsigned*>(
+ mem_heap_alloc(
+ heap,
+ UT_LIST_GET_LEN(table->indexes) * sizeof *page_no));
+
+ for (dict_index_t* index = dict_table_get_first_index(table);
index != NULL;
index = dict_table_get_next_index(index)) {
rw_lock_x_lock(dict_index_get_lock(index));
- ut_ad(!index->to_be_dropped);
- index->to_be_dropped = TRUE;
+ /* Save the page numbers so that we can restore them
+ if the operation fails. */
+ *page_no++ = index->page;
+ /* Mark the index unusable. */
+ index->page = FIL_NULL;
rw_lock_x_unlock(dict_index_get_lock(index));
}
@@ -3698,6 +4023,7 @@ check_next_foreign:
"table_id CHAR;\n"
"index_id CHAR;\n"
"foreign_id CHAR;\n"
+ "space_id INT;\n"
"found INT;\n"
"DECLARE CURSOR cur_fk IS\n"
@@ -3720,6 +4046,12 @@ check_next_foreign:
"IF (SQL % NOTFOUND) THEN\n"
" RETURN;\n"
"END IF;\n"
+ "SELECT SPACE INTO space_id\n"
+ "FROM SYS_TABLES\n"
+ "WHERE NAME = :table_name;\n"
+ "IF (SQL % NOTFOUND) THEN\n"
+ " RETURN;\n"
+ "END IF;\n"
"found := 1;\n"
"SELECT ID INTO sys_foreign_id\n"
"FROM SYS_TABLES\n"
@@ -3762,56 +4094,90 @@ check_next_foreign:
" END IF;\n"
"END LOOP;\n"
"CLOSE cur_idx;\n"
+ "DELETE FROM SYS_TABLESPACES\n"
+ "WHERE SPACE = space_id;\n"
+ "DELETE FROM SYS_DATAFILES\n"
+ "WHERE SPACE = space_id;\n"
"DELETE FROM SYS_COLUMNS\n"
"WHERE TABLE_ID = table_id;\n"
"DELETE FROM SYS_TABLES\n"
- "WHERE ID = table_id;\n"
+ "WHERE NAME = :table_name;\n"
"END;\n"
, FALSE, trx);
switch (err) {
- ibool is_temp;
- mem_heap_t* heap;
+ ibool is_temp;
case DB_SUCCESS:
-
- heap = mem_heap_create(200);
-
/* Clone the name, in case it has been allocated
from table->heap, which will be freed by
dict_table_remove_from_cache(table) below. */
- name = mem_heap_strdup(heap, name);
space_id = table->space;
+ ibd_file_missing = table->ibd_file_missing;
- is_temp = table->flags2 & DICT_TF2_TEMPORARY;
+ is_temp = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY);
+
+ /* If there is a temp path then the temp flag is set.
+ However, during recovery, we might have a temp flag but
+ not know the temp path */
ut_a(table->dir_path_of_temp_table == NULL || is_temp);
+ if (dict_table_is_discarded(table)
+ || table->ibd_file_missing) {
+ /* Do not attempt to drop known-to-be-missing
+ tablespaces. */
+ space_id = 0;
+ }
+
+ /* We do not allow temporary tables with a remote path. */
+ ut_a(!(is_temp && DICT_TF_HAS_DATA_DIR(table->flags)));
+
+ if (space_id && DICT_TF_HAS_DATA_DIR(table->flags)) {
+ dict_get_and_save_data_dir_path(table, true);
+ ut_a(table->data_dir_path);
+
+ filepath = os_file_make_remote_pathname(
+ table->data_dir_path, table->name, "ibd");
+ } else if (table->dir_path_of_temp_table) {
+ filepath = fil_make_ibd_name(
+ table->dir_path_of_temp_table, true);
+ } else {
+ filepath = fil_make_ibd_name(tablename, false);
+ }
if (dict_table_has_fts_index(table)
|| DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
ut_ad(table->n_ref_count == 0);
+ ut_ad(trx->state != TRX_STATE_NOT_STARTED);
err = fts_drop_tables(trx, table);
if (err != DB_SUCCESS) {
ut_print_timestamp(stderr);
- fprintf(stderr," InnoDB: Error: (%lu) not "
+ fprintf(stderr," InnoDB: Error: (%s) not "
"able to remove ancillary FTS tables "
- "for table ", err);
- ut_print_name(stderr, trx, TRUE, name);
+ "for table ", ut_strerr(err));
+ ut_print_name(stderr, trx, TRUE, tablename);
fputs("\n", stderr);
goto funct_exit;
}
+ }
+ /* The table->fts flag can be set on the table for which
+ the cluster index is being rebuilt. Such table might not have
+ DICT_TF2_FTS flag set. So keep this out of above
+ dict_table_has_fts_index condition */
+ if (table->fts) {
fts_free(table);
}
dict_table_remove_from_cache(table);
- if (dict_load_table(name, TRUE, DICT_ERR_IGNORE_NONE) != NULL) {
+ if (dict_load_table(tablename, TRUE,
+ DICT_ERR_IGNORE_NONE) != NULL) {
ut_print_timestamp(stderr);
fputs(" InnoDB: Error: not able to remove table ",
stderr);
- ut_print_name(stderr, trx, TRUE, name);
+ ut_print_name(stderr, trx, TRUE, tablename);
fputs(" from the dictionary cache!\n", stderr);
err = DB_ERROR;
}
@@ -3819,23 +4185,46 @@ check_next_foreign:
/* Do not drop possible .ibd tablespace if something went
wrong: we do not want to delete valuable data of the user */
- if (err == DB_SUCCESS && space_id > 0) {
- if (!fil_space_for_table_exists_in_mem(
- space_id, name, FALSE, !is_temp)) {
+ /* Don't spam the log if we can't find the tablespace of
+ a temp table or if the tablesace has been discarded. */
+ print_msg = !(is_temp || ibd_file_missing);
+
+ if (err == DB_SUCCESS && space_id > TRX_SYS_SPACE) {
+ if (!is_temp
+ && !fil_space_for_table_exists_in_mem(
+ space_id, tablename, FALSE,
+ print_msg, false, NULL, 0)) {
+ /* This might happen if we are dropping a
+ discarded tablespace */
err = DB_SUCCESS;
+ if (print_msg) {
+ char msg_tablename[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ msg_tablename, sizeof(tablename),
+ tablename, FALSE);
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Removed the table %s from "
+ "InnoDB's data dictionary",
+ msg_tablename);
+ }
+
+ /* Force a delete of any discarded
+ or temporary files. */
+
+ fil_delete_file(filepath);
+
+ } else if (fil_delete_tablespace(
+ space_id,
+ BUF_REMOVE_FLUSH_NO_WRITE)
+ != DB_SUCCESS) {
fprintf(stderr,
"InnoDB: We removed now the InnoDB"
" internal data dictionary entry\n"
"InnoDB: of table ");
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, ".\n");
- } else if (!fil_delete_tablespace(space_id)) {
- fprintf(stderr,
- "InnoDB: We removed now the InnoDB"
- " internal data dictionary entry\n"
- "InnoDB: of table ");
- ut_print_name(stderr, trx, TRUE, name);
+ ut_print_name(stderr, trx, TRUE, tablename);
fprintf(stderr, ".\n");
ut_print_timestamp(stderr);
@@ -3843,13 +4232,12 @@ check_next_foreign:
" InnoDB: Error: not able to"
" delete tablespace %lu of table ",
(ulong) space_id);
- ut_print_name(stderr, trx, TRUE, name);
+ ut_print_name(stderr, trx, TRUE, tablename);
fputs("!\n", stderr);
err = DB_ERROR;
}
}
- mem_heap_free(heap);
break;
case DB_OUT_OF_FILE_SPACE:
@@ -3874,7 +4262,7 @@ check_next_foreign:
fprintf(stderr, "InnoDB: unknown error code %lu"
" while dropping table:", (ulong) err);
- ut_print_name(stderr, trx, TRUE, name);
+ ut_print_name(stderr, trx, TRUE, tablename);
fprintf(stderr, ".\n");
trx->error_state = DB_SUCCESS;
@@ -3884,16 +4272,25 @@ check_next_foreign:
/* Mark all indexes available in the data dictionary
cache again. */
- for (index = dict_table_get_first_index(table);
+ page_no = page_nos;
+
+ for (dict_index_t* index = dict_table_get_first_index(table);
index != NULL;
index = dict_table_get_next_index(index)) {
rw_lock_x_lock(dict_index_get_lock(index));
- index->to_be_dropped = FALSE;
+ ut_a(index->page == FIL_NULL);
+ index->page = *page_no++;
rw_lock_x_unlock(dict_index_get_lock(index));
}
}
funct_exit:
+ if (heap) {
+ mem_heap_free(heap);
+ }
+ if (filepath) {
+ mem_free(filepath);
+ }
if (locked_dictionary) {
trx_commit_for_mysql(trx);
@@ -3905,7 +4302,7 @@ funct_exit:
srv_wake_master_thread();
- return((int) err);
+ return(err);
}
/*********************************************************************//**
@@ -3929,9 +4326,9 @@ row_mysql_drop_temp_tables(void)
mtr_start(&mtr);
btr_pcur_open_at_index_side(
- TRUE,
+ true,
dict_table_get_first_index(dict_sys->sys_tables),
- BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+ BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
for (;;) {
const rec_t* rec;
@@ -3950,6 +4347,8 @@ row_mysql_drop_temp_tables(void)
ROW_FORMAT=REDUNDANT. */
rec = btr_pcur_get_rec(&pcur);
field = rec_get_nth_field_old(
+ rec, DICT_FLD__SYS_TABLES__NAME, &len);
+ field = rec_get_nth_field_old(
rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
if (len != 4
|| !(mach_read_from_4(field) & DICT_N_COLS_COMPACT)) {
@@ -4003,15 +4402,15 @@ row_mysql_drop_temp_tables(void)
Drop all foreign keys in a database, see Bug#18942.
Called at the end of row_drop_database_for_mysql().
@return error code or DB_SUCCESS */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
drop_all_foreign_keys_in_db(
/*========================*/
const char* name, /*!< in: database name which ends to '/' */
trx_t* trx) /*!< in: transaction handle */
{
pars_info_t* pinfo;
- ulint err;
+ dberr_t err;
ut_a(name[strlen(name) - 1] == '/');
@@ -4063,22 +4462,24 @@ drop_all_foreign_keys_in_db(
Drops a database for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
row_drop_database_for_mysql(
/*========================*/
const char* name, /*!< in: database name which ends to '/' */
trx_t* trx) /*!< in: transaction handle */
{
- dict_table_t* table;
- char* table_name;
- int err = DB_SUCCESS;
- ulint namelen = strlen(name);
+ dict_table_t* table;
+ char* table_name;
+ dberr_t err = DB_SUCCESS;
+ ulint namelen = strlen(name);
ut_a(name != NULL);
ut_a(name[namelen - 1] == '/');
trx->op_info = "dropping database";
+ trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
+
trx_start_if_not_started_xa(trx);
loop:
row_mysql_lock_data_dictionary(trx);
@@ -4086,11 +4487,29 @@ loop:
while ((table_name = dict_get_first_table_name_in_db(name))) {
ut_a(memcmp(table_name, name, namelen) == 0);
- table = dict_table_open_on_name_no_stats(table_name, TRUE,
- DICT_ERR_IGNORE_NONE);
+ table = dict_table_open_on_name(
+ table_name, TRUE, FALSE, static_cast<dict_err_ignore_t>(
+ DICT_ERR_IGNORE_INDEX_ROOT
+ | DICT_ERR_IGNORE_CORRUPT));
- ut_a(table);
- ut_a(!table->can_be_evicted);
+ if (!table) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot load table %s from InnoDB internal "
+ "data dictionary during drop database",
+ table_name);
+ mem_free(table_name);
+ err = DB_TABLE_NOT_FOUND;
+ break;
+
+ }
+
+ if (row_is_mysql_tmp_table_name(table->name)) {
+ /* There could be an orphan temp table left from
+ interupted alter table rebuild operation */
+ dict_table_close(table, TRUE, FALSE);
+ } else {
+ ut_a(!table->can_be_evicted || table->ibd_file_missing);
+ }
/* Wait until MySQL does not have any queries running on
the table */
@@ -4121,8 +4540,8 @@ loop:
if (err != DB_SUCCESS) {
fputs("InnoDB: DROP DATABASE ", stderr);
ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, " failed with error %lu for table ",
- (ulint) err);
+ fprintf(stderr, " failed with error (%s) for table ",
+ ut_strerr(err));
ut_print_name(stderr, trx, TRUE, table_name);
putc('\n', stderr);
mem_free(table_name);
@@ -4135,7 +4554,7 @@ loop:
if (err == DB_SUCCESS) {
/* after dropping all tables try to drop all leftover
foreign keys in case orphaned ones exist */
- err = (int) drop_all_foreign_keys_in_db(name, trx);
+ err = drop_all_foreign_keys_in_db(name, trx);
if (err != DB_SUCCESS) {
fputs("InnoDB: DROP DATABASE ", stderr);
@@ -4157,9 +4576,9 @@ loop:
/*********************************************************************//**
Checks if a table name contains the string "/#sql" which denotes temporary
tables in MySQL.
-@return TRUE if temporary table */
-static
-ibool
+@return true if temporary table */
+UNIV_INTERN __attribute__((warn_unused_result))
+bool
row_is_mysql_tmp_table_name(
/*========================*/
const char* name) /*!< in: table name in the form
@@ -4172,8 +4591,8 @@ row_is_mysql_tmp_table_name(
/****************************************************************//**
Delete a single constraint.
@return error code or DB_SUCCESS */
-static
-int
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_delete_constraint_low(
/*======================*/
const char* id, /*!< in: constraint id */
@@ -4183,7 +4602,7 @@ row_delete_constraint_low(
pars_info_add_str_literal(info, "id", id);
- return((int) que_eval_sql(info,
+ return(que_eval_sql(info,
"PROCEDURE DELETE_CONSTRAINT () IS\n"
"BEGIN\n"
"DELETE FROM SYS_FOREIGN_COLS WHERE ID = :id;\n"
@@ -4195,8 +4614,8 @@ row_delete_constraint_low(
/****************************************************************//**
Delete a single constraint.
@return error code or DB_SUCCESS */
-static
-int
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_delete_constraint(
/*==================*/
const char* id, /*!< in: constraint id */
@@ -4205,7 +4624,7 @@ row_delete_constraint(
mem_heap_t* heap, /*!< in: memory heap */
trx_t* trx) /*!< in: transaction handle */
{
- ulint err;
+ dberr_t err;
/* New format constraints have ids <databasename>/<constraintname>. */
err = row_delete_constraint_low(
@@ -4222,29 +4641,30 @@ row_delete_constraint(
err = row_delete_constraint_low(id, trx);
}
- return((int) err);
+ return(err);
}
/*********************************************************************//**
Renames a table for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
row_rename_table_for_mysql(
/*=======================*/
const char* old_name, /*!< in: old table name */
const char* new_name, /*!< in: new table name */
- trx_t* trx, /*!< in: transaction handle */
- ibool commit) /*!< in: if TRUE then commit trx */
+ trx_t* trx, /*!< in/out: transaction */
+ bool commit) /*!< in: whether to commit trx */
{
dict_table_t* table = NULL;
ibool dict_locked = FALSE;
- ulint err = DB_ERROR;
+ dberr_t err = DB_ERROR;
mem_heap_t* heap = NULL;
const char** constraints_to_drop = NULL;
ulint n_constraints_to_drop = 0;
ibool old_is_tmp, new_is_tmp;
pars_info_t* info = NULL;
+ int retry;
ut_a(old_name != NULL);
ut_a(new_name != NULL);
@@ -4279,8 +4699,8 @@ row_rename_table_for_mysql(
dict_locked = trx->dict_operation_lock_mode == RW_X_LATCH;
- table = dict_table_open_on_name_no_stats(old_name, dict_locked,
- DICT_ERR_IGNORE_NONE);
+ table = dict_table_open_on_name(old_name, dict_locked, FALSE,
+ DICT_ERR_IGNORE_NONE);
if (!table) {
err = DB_TABLE_NOT_FOUND;
@@ -4299,18 +4719,19 @@ row_rename_table_for_mysql(
"InnoDB: " REFMAN "innodb-troubleshooting.html\n",
stderr);
goto funct_exit;
- } else if (table->ibd_file_missing) {
+
+ } else if (table->ibd_file_missing
+ && !dict_table_is_discarded(table)) {
+
err = DB_TABLE_NOT_FOUND;
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" does not have an .ibd file"
- " in the database directory.\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
- stderr);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Table %s does not have an .ibd file in the database "
+ "directory. See " REFMAN "innodb-troubleshooting.html",
+ old_name);
+
goto funct_exit;
+
} else if (new_is_tmp) {
/* MySQL is doing an ALTER TABLE command and it renames the
original table to a temporary table name. We want to preserve
@@ -4329,27 +4750,75 @@ row_rename_table_for_mysql(
}
}
+ /* Is a foreign key check running on this table? */
+ for (retry = 0; retry < 100
+ && table->n_foreign_key_checks_running > 0; ++retry) {
+ row_mysql_unlock_data_dictionary(trx);
+ os_thread_yield();
+ row_mysql_lock_data_dictionary(trx);
+ }
+
+ if (table->n_foreign_key_checks_running > 0) {
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Error: in ALTER TABLE ", stderr);
+ ut_print_name(stderr, trx, TRUE, old_name);
+ fprintf(stderr, "\n"
+ "InnoDB: a FOREIGN KEY check is running.\n"
+ "InnoDB: Cannot rename table.\n");
+ err = DB_TABLE_IN_FK_CHECK;
+ goto funct_exit;
+ }
+
/* We use the private SQL parser of Innobase to generate the query
graphs needed in updating the dictionary data from system tables. */
info = pars_info_create();
pars_info_add_str_literal(info, "new_table_name", new_name);
-
pars_info_add_str_literal(info, "old_table_name", old_name);
err = que_eval_sql(info,
"PROCEDURE RENAME_TABLE () IS\n"
"BEGIN\n"
- "UPDATE SYS_TABLES SET NAME = :new_table_name\n"
+ "UPDATE SYS_TABLES"
+ " SET NAME = :new_table_name\n"
" WHERE NAME = :old_table_name;\n"
"END;\n"
, FALSE, trx);
- if (err != DB_SUCCESS) {
+ /* SYS_TABLESPACES and SYS_DATAFILES track non-system tablespaces
+ which have space IDs > 0. */
+ if (err == DB_SUCCESS
+ && table->space != TRX_SYS_SPACE
+ && !table->ibd_file_missing) {
+ /* Make a new pathname to update SYS_DATAFILES. */
+ char* new_path = row_make_new_pathname(table, new_name);
+
+ info = pars_info_create();
+ pars_info_add_str_literal(info, "new_table_name", new_name);
+ pars_info_add_str_literal(info, "new_path_name", new_path);
+ pars_info_add_int4_literal(info, "space_id", table->space);
+
+ err = que_eval_sql(info,
+ "PROCEDURE RENAME_SPACE () IS\n"
+ "BEGIN\n"
+ "UPDATE SYS_TABLESPACES"
+ " SET NAME = :new_table_name\n"
+ " WHERE SPACE = :space_id;\n"
+ "UPDATE SYS_DATAFILES"
+ " SET PATH = :new_path_name\n"
+ " WHERE SPACE = :space_id;\n"
+ "END;\n"
+ , FALSE, trx);
+
+ mem_free(new_path);
+ }
+ if (err != DB_SUCCESS) {
goto end;
- } else if (!new_is_tmp) {
+ }
+
+ if (!new_is_tmp) {
/* Rename all constraints. */
info = pars_info_create();
@@ -4486,12 +4955,12 @@ end:
/* The following call will also rename the .ibd data file if
the table is stored in a single-table tablespace */
- if (!dict_table_rename_in_cache(table, new_name,
- !new_is_tmp)) {
+ err = dict_table_rename_in_cache(
+ table, new_name, !new_is_tmp);
+ if (err != DB_SUCCESS) {
trx->error_state = DB_SUCCESS;
trx_rollback_to_savepoint(trx, NULL);
trx->error_state = DB_SUCCESS;
- err = DB_ERROR;
goto funct_exit;
}
@@ -4527,8 +4996,8 @@ end:
stderr);
}
- ut_a(dict_table_rename_in_cache(table,
- old_name, FALSE));
+ ut_a(DB_SUCCESS == dict_table_rename_in_cache(
+ table, old_name, FALSE));
trx->error_state = DB_SUCCESS;
trx_rollback_to_savepoint(trx, NULL);
trx->error_state = DB_SUCCESS;
@@ -4538,7 +5007,7 @@ end:
funct_exit:
if (table != NULL) {
- dict_table_close(table, dict_locked);
+ dict_table_close(table, dict_locked, FALSE);
}
if (commit) {
@@ -4558,9 +5027,9 @@ funct_exit:
Checks that the index contains entries in an ascending order, unique
constraint is not broken, and calculates the number of index entries
in the read view of the current transaction.
-@return TRUE if ok */
+@return true if ok */
UNIV_INTERN
-ibool
+bool
row_check_index_for_mysql(
/*======================*/
row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
@@ -4575,7 +5044,7 @@ row_check_index_for_mysql(
byte* buf;
ulint ret;
rec_t* rec;
- ibool is_ok = TRUE;
+ bool is_ok = true;
int cmp;
ibool contains_null;
ulint i;
@@ -4588,10 +5057,20 @@ row_check_index_for_mysql(
*n_rows = 0;
- /* Full Text index are implemented by auxiliary tables,
- not the B-tree */
- if (index->type & DICT_FTS) {
- return(TRUE);
+ if (dict_index_is_clust(index)) {
+ /* The clustered index of a table is always available.
+ During online ALTER TABLE that rebuilds the table, the
+ clustered index in the old table will have
+ index->online_log pointing to the new table. All
+ indexes of the old table will remain valid and the new
+ table will be unaccessible to MySQL until the
+ completion of the ALTER TABLE. */
+ } else if (dict_index_is_online_ddl(index)
+ || (index->type & DICT_FTS)) {
+ /* Full Text index are implemented by auxiliary tables,
+ not the B-tree. We also skip secondary indexes that are
+ being created online. */
+ return(true);
}
buf = static_cast<byte*>(mem_alloc(UNIV_PAGE_SIZE));
@@ -4672,7 +5151,7 @@ not_ok:
"InnoDB: record ", stderr);
rec_print_new(stderr, rec, offsets);
putc('\n', stderr);
- is_ok = FALSE;
+ is_ok = false;
} else if (dict_index_is_unique(index)
&& !contains_null
&& matched_fields
@@ -4702,9 +5181,8 @@ not_ok:
mem_heap_empty(heap);
- prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, rec,
- index, offsets,
- &n_ext, heap);
+ prev_entry = row_rec_to_index_entry(
+ rec, index, offsets, &n_ext, heap);
if (UNIV_LIKELY_NULL(tmp_heap)) {
mem_heap_free(tmp_heap);
@@ -4718,9 +5196,9 @@ not_ok:
/*********************************************************************//**
Determines if a table is a magic monitor table.
-@return TRUE if monitor table */
+@return true if monitor table */
UNIV_INTERN
-ibool
+bool
row_is_magic_monitor_table(
/*=======================*/
const char* table_name) /*!< in: name of the table, in the
@@ -4751,7 +5229,7 @@ row_mysql_init(void)
{
mutex_create(
row_drop_list_mutex_key,
- &row_drop_list_mutex, SYNC_NO_ORDER_CHECK);
+ &row_drop_list_mutex, SYNC_NO_ORDER_CHECK);
UT_LIST_INIT(row_mysql_drop_list);
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index ab28b396920..ee603be453a 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,8 +42,10 @@ Created 3/14/1997 Heikki Tuuri
#include "row0upd.h"
#include "row0vers.h"
#include "row0mysql.h"
+#include "row0log.h"
#include "log0log.h"
#include "srv0mon.h"
+#include "srv0start.h"
/*************************************************************************
IMPORTANT NOTE: Any operation that generates redo MUST check that there
@@ -110,119 +112,134 @@ row_purge_reposition_pcur(
return(node->found_clust);
}
+/** Status of row_purge_remove_clust() */
+enum row_purge_status {
+ ROW_PURGE_DONE, /*!< The row has been removed. */
+ ROW_PURGE_FAIL, /*!< The purge was not successful. */
+ ROW_PURGE_SUSPEND/*!< Cannot purge now, due to online rebuild. */
+};
+
/***********************************************************//**
Removes a delete marked clustered index record if possible.
-@return TRUE if success, or if not found, or if modified after the
-delete marking */
-static
-ibool
+@retval ROW_PURGE_DONE if the row was not found, or it was successfully removed
+@retval ROW_PURGE_FAIL if the row was modified after the delete marking
+@retval ROW_PURGE_SUSPEND if the row refers to an off-page column and
+an online ALTER TABLE (table rebuild) is in progress. */
+static __attribute__((nonnull, warn_unused_result))
+enum row_purge_status
row_purge_remove_clust_if_poss_low(
/*===============================*/
- purge_node_t* node, /*!< in: row purge node */
+ purge_node_t* node, /*!< in/out: row purge node */
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
- dict_index_t* index;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- ibool success;
- ulint err;
- mtr_t mtr;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ dict_index_t* index;
+ enum row_purge_status status = ROW_PURGE_DONE;
+ mtr_t mtr;
+ rec_t* rec;
+ mem_heap_t* heap = NULL;
+ ulint* offsets;
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs_init(offsets_);
- index = dict_table_get_first_index(node->table);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
- pcur = &node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
+ index = dict_table_get_first_index(node->table);
log_free_check();
mtr_start(&mtr);
- success = row_purge_reposition_pcur(mode, node, &mtr);
-
- if (!success) {
- /* The record is already removed */
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- return(TRUE);
+ if (!row_purge_reposition_pcur(mode, node, &mtr)) {
+ /* The record was already removed. */
+ goto func_exit;
}
- rec = btr_pcur_get_rec(pcur);
+ rec = btr_pcur_get_rec(&node->pcur);
- if (node->roll_ptr != row_get_rec_roll_ptr(
- rec, index, rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap))) {
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- /* Someone else has modified the record later: do not remove */
- btr_pcur_commit_specify_mtr(pcur, &mtr);
+ offsets = rec_get_offsets(
+ rec, index, offsets_, ULINT_UNDEFINED, &heap);
- return(TRUE);
+ if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
+ /* Someone else has modified the record later: do not remove */
+ goto func_exit;
}
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
+ if (dict_index_get_online_status(index) == ONLINE_INDEX_CREATION
+ && rec_offs_any_extern(offsets)) {
+ status = ROW_PURGE_SUSPEND;
+ goto func_exit;
}
if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
+ status = btr_cur_optimistic_delete(
+ btr_pcur_get_btr_cur(&node->pcur), 0, &mtr)
+ ? ROW_PURGE_DONE : ROW_PURGE_FAIL;
} else {
+ dberr_t err;
ut_ad(mode == BTR_MODIFY_TREE);
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
- RB_NONE, &mtr);
+ btr_cur_pessimistic_delete(
+ &err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0,
+ RB_NONE, &mtr);
- if (err == DB_SUCCESS) {
- success = TRUE;
- } else if (err == DB_OUT_OF_FILE_SPACE) {
- success = FALSE;
- } else {
+ switch (err) {
+ case DB_SUCCESS:
+ break;
+ case DB_OUT_OF_FILE_SPACE:
+ status = ROW_PURGE_FAIL;
+ break;
+ default:
ut_error;
}
}
- btr_pcur_commit_specify_mtr(pcur, &mtr);
+func_exit:
+ if (heap) {
+ mem_heap_free(heap);
+ }
- return(success);
+ btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
+
+ return(status);
}
/***********************************************************//**
Removes a clustered index record if it has not been modified after the delete
-marking. */
-static
-void
+marking.
+@retval true if the row was not found, or it was successfully removed
+@retval false the purge needs to be suspended, either because of
+running out of file space or because the row refers to an off-page
+column and an online ALTER TABLE (table rebuild) is in progress. */
+static __attribute__((nonnull, warn_unused_result))
+bool
row_purge_remove_clust_if_poss(
/*===========================*/
- purge_node_t* node) /*!< in: row purge node */
+ purge_node_t* node) /*!< in/out: row purge node */
{
- ibool success;
- ulint n_tries = 0;
-
- /* fputs("Purge: Removing clustered record\n", stderr); */
-
- success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF);
- if (success) {
-
- return;
+ switch (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
+ case ROW_PURGE_DONE:
+ return(true);
+ case ROW_PURGE_SUSPEND:
+ return(false);
+ case ROW_PURGE_FAIL:
+ break;
}
-retry:
- success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE);
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
- if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
+ for (ulint n_tries = 0;
+ n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
+ n_tries++) {
+ switch (row_purge_remove_clust_if_poss_low(
+ node, BTR_MODIFY_TREE)) {
+ case ROW_PURGE_DONE:
+ return(true);
+ case ROW_PURGE_SUSPEND:
+ return(false);
+ case ROW_PURGE_FAIL:
+ os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
+ }
}
- ut_a(success);
+ return(false);
}
/***********************************************************//**
@@ -234,21 +251,21 @@ is newer than the purge view.
NOTE: This function should only be called by the purge thread, only
while holding a latch on the leaf page of the secondary index entry
(or keeping the buffer pool watch on the page). It is possible that
-this function first returns TRUE and then FALSE, if a user transaction
+this function first returns true and then false, if a user transaction
inserts a record that the secondary index entry would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
-@return TRUE if the secondary index record can be purged */
+@return true if the secondary index record can be purged */
UNIV_INTERN
-ibool
+bool
row_purge_poss_sec(
/*===============*/
purge_node_t* node, /*!< in/out: row purge node */
dict_index_t* index, /*!< in: secondary index */
const dtuple_t* entry) /*!< in: secondary index entry */
{
- ibool can_delete;
+ bool can_delete;
mtr_t mtr;
ut_ad(!dict_index_is_clust(index));
@@ -268,7 +285,7 @@ row_purge_poss_sec(
Removes a secondary index entry if possible, by modifying the
index tree. Does not try to buffer the delete.
@return TRUE if success or if not found */
-static
+static __attribute__((nonnull, warn_unused_result))
ibool
row_purge_remove_sec_if_poss_tree(
/*==============================*/
@@ -279,13 +296,35 @@ row_purge_remove_sec_if_poss_tree(
btr_pcur_t pcur;
btr_cur_t* btr_cur;
ibool success = TRUE;
- ulint err;
+ dberr_t err;
mtr_t mtr;
enum row_search_result search_result;
log_free_check();
mtr_start(&mtr);
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ /* The index->online_status may change if the
+ index->name starts with TEMP_INDEX_PREFIX (meaning
+ that the index is or was being created online). It is
+ protected by index->lock. */
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
+
+ if (dict_index_is_online_ddl(index)) {
+ /* Online secondary index creation will not
+ copy any delete-marked records. Therefore
+ there is nothing to be purged. We must also
+ skip the purge when a completed index is
+ dropped by rollback_inplace_alter_table(). */
+ goto func_exit_no_pcur;
+ }
+ } else {
+ /* For secondary indexes,
+ index->online_status==ONLINE_INDEX_CREATION unless
+ index->name starts with TEMP_INDEX_PREFIX. */
+ ut_ad(!dict_index_is_online_ddl(index));
+ }
+
search_result = row_search_index_entry(index, entry, BTR_MODIFY_TREE,
&pcur, &mtr);
@@ -327,7 +366,7 @@ row_purge_remove_sec_if_poss_tree(
& rec_get_info_bits(btr_cur_get_rec(btr_cur),
dict_table_is_comp(index->table)));
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
RB_NONE, &mtr);
switch (UNIV_EXPECT(err, DB_SUCCESS)) {
case DB_SUCCESS:
@@ -342,6 +381,7 @@ row_purge_remove_sec_if_poss_tree(
func_exit:
btr_pcur_close(&pcur);
+func_exit_no_pcur:
mtr_commit(&mtr);
return(success);
@@ -350,9 +390,10 @@ func_exit:
/***************************************************************
Removes a secondary index entry without modifying the index tree,
if possible.
-@return TRUE if success or if not found */
-static
-ibool
+@retval true if success or if not found
+@retval false if row_purge_remove_sec_if_poss_tree() should be invoked */
+static __attribute__((nonnull, warn_unused_result))
+bool
row_purge_remove_sec_if_poss_leaf(
/*==============================*/
purge_node_t* node, /*!< in: row purge node */
@@ -361,12 +402,40 @@ row_purge_remove_sec_if_poss_leaf(
{
mtr_t mtr;
btr_pcur_t pcur;
+ ulint mode;
enum row_search_result search_result;
+ bool success = true;
log_free_check();
mtr_start(&mtr);
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ /* The index->online_status may change if the
+ index->name starts with TEMP_INDEX_PREFIX (meaning
+ that the index is or was being created online). It is
+ protected by index->lock. */
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+ if (dict_index_is_online_ddl(index)) {
+ /* Online secondary index creation will not
+ copy any delete-marked records. Therefore
+ there is nothing to be purged. We must also
+ skip the purge when a completed index is
+ dropped by rollback_inplace_alter_table(). */
+ goto func_exit_no_pcur;
+ }
+
+ mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED | BTR_DELETE;
+ } else {
+ /* For secondary indexes,
+ index->online_status==ONLINE_INDEX_CREATION unless
+ index->name starts with TEMP_INDEX_PREFIX. */
+ ut_ad(!dict_index_is_online_ddl(index));
+
+ mode = BTR_MODIFY_LEAF | BTR_DELETE;
+ }
+
/* Set the purge node for the call to row_purge_poss_sec(). */
pcur.btr_cur.purge_node = node;
/* Set the query thread, so that ibuf_insert_low() will be
@@ -374,10 +443,9 @@ row_purge_remove_sec_if_poss_leaf(
pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node));
search_result = row_search_index_entry(
- index, entry, BTR_MODIFY_LEAF | BTR_DELETE, &pcur, &mtr);
+ index, entry, mode, &pcur, &mtr);
switch (search_result) {
- ibool success;
case ROW_FOUND:
/* Before attempting to purge a record, check
if it is safe to do so. */
@@ -390,11 +458,10 @@ row_purge_remove_sec_if_poss_leaf(
btr_cur_get_rec(btr_cur),
dict_table_is_comp(index->table)));
- if (!btr_cur_optimistic_delete(btr_cur, &mtr)) {
+ if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
/* The index entry could not be deleted. */
- success = FALSE;
- goto func_exit;
+ success = false;
}
}
/* fall through (the index entry is still needed,
@@ -405,9 +472,8 @@ row_purge_remove_sec_if_poss_leaf(
/* The deletion was buffered. */
case ROW_NOT_FOUND:
/* The index entry does not exist, nothing to do. */
- success = TRUE;
- func_exit:
btr_pcur_close(&pcur);
+ func_exit_no_pcur:
mtr_commit(&mtr);
return(success);
}
@@ -418,19 +484,26 @@ row_purge_remove_sec_if_poss_leaf(
/***********************************************************//**
Removes a secondary index entry if possible. */
-UNIV_INLINE
+UNIV_INLINE __attribute__((nonnull(1,2)))
void
row_purge_remove_sec_if_poss(
/*=========================*/
purge_node_t* node, /*!< in: row purge node */
dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry */
+ const dtuple_t* entry) /*!< in: index entry */
{
ibool success;
ulint n_tries = 0;
/* fputs("Purge: Removing secondary record\n", stderr); */
+ if (!entry) {
+ /* The node->row must have lacked some fields of this
+ index. This is possible when the undo log record was
+ written before this index was created. */
+ return;
+ }
+
if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
return;
@@ -454,18 +527,18 @@ retry:
}
/***********************************************************//**
-Purges a delete marking of a record. */
-static
-void
+Purges a delete marking of a record.
+@retval true if the row was not found, or it was successfully removed
+@retval false the purge needs to be suspended, either because of
+running out of file space or because the row refers to an off-page
+column and an online ALTER TABLE (table rebuild) is in progress. */
+static __attribute__((nonnull, warn_unused_result))
+bool
row_purge_del_mark(
/*===============*/
- purge_node_t* node) /*!< in: row purge node */
+ purge_node_t* node) /*!< in/out: row purge node */
{
mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
-
- ut_ad(node);
heap = mem_heap_create(1024);
@@ -477,13 +550,11 @@ row_purge_del_mark(
break;
}
- index = node->index;
-
if (node->index->type != DICT_FTS) {
- /* Build the index entry */
- entry = row_build_index_entry(node->row, NULL, index, heap);
- ut_a(entry);
- row_purge_remove_sec_if_poss(node, index, entry);
+ dtuple_t* entry = row_build_index_entry_low(
+ node->row, NULL, node->index, heap);
+ row_purge_remove_sec_if_poss(node, node->index, entry);
+ mem_heap_empty(heap);
}
node->index = dict_table_get_next_index(node->index);
@@ -491,14 +562,15 @@ row_purge_del_mark(
mem_heap_free(heap);
- row_purge_remove_clust_if_poss(node);
+ return(row_purge_remove_clust_if_poss(node));
}
/***********************************************************//**
Purges an update of an existing record. Also purges an update of a delete
-marked record if that record contained an externally stored field. */
-static
-void
+marked record if that record contained an externally stored field.
+@return true if purged, false if skipped */
+static __attribute__((nonnull, warn_unused_result))
+bool
row_purge_upd_exist_or_extern_func(
/*===============================*/
#ifdef UNIV_DEBUG
@@ -508,16 +580,24 @@ row_purge_upd_exist_or_extern_func(
trx_undo_rec_t* undo_rec) /*!< in: record to purge */
{
mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
- ibool is_insert;
- ulint rseg_id;
- ulint page_no;
- ulint offset;
- ulint i;
- mtr_t mtr;
- ut_ad(node);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
+
+ if (dict_index_get_online_status(dict_table_get_first_index(
+ node->table))
+ == ONLINE_INDEX_CREATION) {
+ for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
+
+ const upd_field_t* ufield
+ = upd_get_nth_field(node->update, i);
+
+ if (dfield_is_ext(&ufield->new_val)) {
+ return(false);
+ }
+ }
+ }
if (node->rec_type == TRX_UNDO_UPD_DEL_REC
|| (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
@@ -534,15 +614,13 @@ row_purge_upd_exist_or_extern_func(
break;
}
- index = node->index;
-
if (row_upd_changes_ord_field_binary(node->index, node->update,
thr, NULL, NULL)) {
/* Build the older version of the index entry */
- entry = row_build_index_entry(node->row, NULL,
- index, heap);
- ut_a(entry);
- row_purge_remove_sec_if_poss(node, index, entry);
+ dtuple_t* entry = row_build_index_entry_low(
+ node->row, NULL, node->index, heap);
+ row_purge_remove_sec_if_poss(node, node->index, entry);
+ mem_heap_empty(heap);
}
node->index = dict_table_get_next_index(node->index);
@@ -552,7 +630,7 @@ row_purge_upd_exist_or_extern_func(
skip_secondaries:
/* Free possible externally stored fields */
- for (i = 0; i < upd_get_n_fields(node->update); i++) {
+ for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
const upd_field_t* ufield
= upd_get_nth_field(node->update, i);
@@ -562,6 +640,12 @@ skip_secondaries:
buf_block_t* block;
ulint internal_offset;
byte* data_field;
+ dict_index_t* index;
+ ibool is_insert;
+ ulint rseg_id;
+ ulint page_no;
+ ulint offset;
+ mtr_t mtr;
/* We use the fact that new_val points to
undo_rec and get thus the offset of
@@ -590,9 +674,17 @@ skip_secondaries:
index tree */
index = dict_table_get_first_index(node->table);
-
mtr_x_lock(dict_index_get_lock(index), &mtr);
-
+#ifdef UNIV_DEBUG
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_CREATION:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ ut_ad(0);
+ case ONLINE_INDEX_COMPLETE:
+ case ONLINE_INDEX_ABORTED:
+ break;
+ }
+#endif /* UNIV_DEBUG */
/* NOTE: we must also acquire an X-latch to the
root page of the tree. We will need it when we
free pages from the tree. If the tree is of height 1,
@@ -622,6 +714,8 @@ skip_secondaries:
mtr_commit(&mtr);
}
}
+
+ return(true);
}
#ifdef UNIV_DEBUG
@@ -634,14 +728,14 @@ skip_secondaries:
/***********************************************************//**
Parses the row reference and other info in a modify undo log record.
-@return TRUE if purge operation required */
+@return true if purge operation required */
static
-ibool
+bool
row_purge_parse_undo_rec(
/*=====================*/
purge_node_t* node, /*!< in: row undo node */
trx_undo_rec_t* undo_rec, /*!< in: record to purge */
- ibool* updated_extern, /*!< out: TRUE if an externally
+ bool* updated_extern, /*!< out: true if an externally
stored field was updated */
que_thr_t* thr) /*!< in: query thread */
{
@@ -665,40 +759,29 @@ row_purge_parse_undo_rec(
if (type == TRX_UNDO_UPD_DEL_REC && !*updated_extern) {
- return(FALSE);
+ return(false);
}
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
&info_bits);
node->table = NULL;
- if (type == TRX_UNDO_UPD_EXIST_REC
- && node->cmpl_info & UPD_NODE_NO_ORD_CHANGE
- && !(*updated_extern)) {
-
- /* Purge requires no changes to indexes: we may return */
-
- return(FALSE);
- }
-
/* Prevent DROP TABLE etc. from running when we are doing the purge
for this row */
- rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__);
+ rw_lock_s_lock_inline(&dict_operation_lock, 0, __FILE__, __LINE__);
- node->table = dict_table_open_on_id(table_id, FALSE);
+ node->table = dict_table_open_on_id(table_id, FALSE, FALSE);
if (node->table == NULL) {
-err_exit:
/* The table has been dropped: no need to do purge */
- rw_lock_s_unlock_gen(&dict_operation_lock, 0);
- return(FALSE);
+ goto err_exit;
}
if (node->table->ibd_file_missing) {
/* We skip purge of missing .ibd files */
- dict_table_close(node->table, FALSE);
+ dict_table_close(node->table, FALSE, FALSE);
node->table = NULL;
@@ -708,12 +791,22 @@ err_exit:
clust_index = dict_table_get_first_index(node->table);
if (clust_index == NULL) {
+ /* The table was corrupt in the data dictionary.
+ dict_set_corrupted() works on an index, and
+ we do not have an index to call it with. */
+close_exit:
+ dict_table_close(node->table, FALSE, FALSE);
+err_exit:
+ rw_lock_s_unlock(&dict_operation_lock);
+ return(false);
+ }
- dict_table_close(node->table, FALSE);
-
- /* The table was corrupt in the data dictionary */
+ if (type == TRX_UNDO_UPD_EXIST_REC
+ && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
+ && !*updated_extern) {
- goto err_exit;
+ /* Purge requires no changes to indexes: we may return */
+ goto close_exit;
}
ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
@@ -734,13 +827,14 @@ err_exit:
node->heap);
}
- return(TRUE);
+ return(true);
}
/***********************************************************//**
-Purges the parsed record. */
-static
-void
+Purges the parsed record.
+@return true if purged, false if skipped */
+static __attribute__((nonnull, warn_unused_result))
+bool
row_purge_record_func(
/*==================*/
purge_node_t* node, /*!< in: row purge node */
@@ -748,10 +842,11 @@ row_purge_record_func(
#ifdef UNIV_DEBUG
const que_thr_t*thr, /*!< in: query thread */
#endif /* UNIV_DEBUG */
- ibool updated_extern) /*!< in: TRUE if external columns
+ bool updated_extern) /*!< in: whether external columns
were updated */
{
dict_index_t* clust_index;
+ bool purged = true;
clust_index = dict_table_get_first_index(node->table);
@@ -759,7 +854,10 @@ row_purge_record_func(
switch (node->rec_type) {
case TRX_UNDO_DEL_MARK_REC:
- row_purge_del_mark(node);
+ purged = row_purge_del_mark(node);
+ if (!purged) {
+ break;
+ }
MONITOR_INC(MONITOR_N_DEL_ROW_PURGE);
break;
default:
@@ -768,20 +866,25 @@ row_purge_record_func(
}
/* fall through */
case TRX_UNDO_UPD_EXIST_REC:
- row_purge_upd_exist_or_extern(thr, node, undo_rec);
+ purged = row_purge_upd_exist_or_extern(thr, node, undo_rec);
+ if (!purged) {
+ break;
+ }
MONITOR_INC(MONITOR_N_UPD_EXIST_EXTERN);
break;
}
if (node->found_clust) {
btr_pcur_close(&node->pcur);
+ node->found_clust = FALSE;
}
if (node->table != NULL) {
- dict_table_close(node->table, FALSE);
+ dict_table_close(node->table, FALSE, FALSE);
node->table = NULL;
}
+ return(purged);
}
#ifdef UNIV_DEBUG
@@ -804,18 +907,24 @@ row_purge(
trx_undo_rec_t* undo_rec, /*!< in: record to purge */
que_thr_t* thr) /*!< in: query thread */
{
- ut_ad(node);
- ut_ad(thr);
-
if (undo_rec != &trx_purge_dummy_rec) {
- ibool updated_extern;
+ bool updated_extern;
- if (row_purge_parse_undo_rec(
- node, undo_rec, &updated_extern, thr)) {
+ while (row_purge_parse_undo_rec(
+ node, undo_rec, &updated_extern, thr)) {
- row_purge_record(node, undo_rec, thr, updated_extern);
+ bool purged = row_purge_record(
+ node, undo_rec, thr, updated_extern);
+
+ rw_lock_s_unlock(&dict_operation_lock);
+
+ if (purged
+ || srv_shutdown_state != SRV_SHUTDOWN_NONE) {
+ return;
+ }
- rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+ /* Retry the purge in a second. */
+ os_thread_sleep(1000000);
}
}
}
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
new file mode 100644
index 00000000000..72e0bf43d77
--- /dev/null
+++ b/storage/innobase/row/row0quiesce.cc
@@ -0,0 +1,702 @@
+/*****************************************************************************
+
+Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file row/row0quiesce.cc
+Quiesce a tablespace.
+
+Created 2012-02-08 by Sunny Bains.
+*******************************************************/
+
+#include "row0quiesce.h"
+#include "row0mysql.h"
+
+#ifdef UNIV_NONINL
+#include "row0quiesce.ic"
+#endif
+
+#include "ibuf0ibuf.h"
+#include "srv0start.h"
+#include "trx0purge.h"
+
+/*********************************************************************//**
+Write the meta data (index user fields) config file.
+@return DB_SUCCESS or error code. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_index_fields(
+/*===========================*/
+ const dict_index_t* index, /*!< in: write the meta data for
+ this index */
+ FILE* file, /*!< in: file to write to */
+ THD* thd) /*!< in/out: session */
+{
+ byte row[sizeof(ib_uint32_t) * 2];
+
+ for (ulint i = 0; i < index->n_fields; ++i) {
+ byte* ptr = row;
+ const dict_field_t* field = &index->fields[i];
+
+ mach_write_to_4(ptr, field->prefix_len);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, field->fixed_len);
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_9",
+ close(fileno(file)););
+
+ if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing index fields.");
+
+ return(DB_IO_ERROR);
+ }
+
+ /* Include the NUL byte in the length. */
+ ib_uint32_t len = strlen(field->name) + 1;
+ ut_a(len > 1);
+
+ mach_write_to_4(row, len);
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_10",
+ close(fileno(file)););
+
+ if (fwrite(row, 1, sizeof(len), file) != sizeof(len)
+ || fwrite(field->name, 1, len, file) != len) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing index column.");
+
+ return(DB_IO_ERROR);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Write the meta data config file index information.
+@return DB_SUCCESS or error code. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_indexes(
+/*======================*/
+ const dict_table_t* table, /*!< in: write the meta data for
+ this table */
+ FILE* file, /*!< in: file to write to */
+ THD* thd) /*!< in/out: session */
+{
+ {
+ byte row[sizeof(ib_uint32_t)];
+
+ /* Write the number of indexes in the table. */
+ mach_write_to_4(row, UT_LIST_GET_LEN(table->indexes));
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_11",
+ close(fileno(file)););
+
+ if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing index count.");
+
+ return(DB_IO_ERROR);
+ }
+ }
+
+ dberr_t err = DB_SUCCESS;
+
+ /* Write the index meta data. */
+ for (const dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != 0 && err == DB_SUCCESS;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ byte* ptr;
+ byte row[sizeof(index_id_t)
+ + sizeof(ib_uint32_t) * 8];
+
+ ptr = row;
+
+ ut_ad(sizeof(index_id_t) == 8);
+ mach_write_to_8(ptr, index->id);
+ ptr += sizeof(index_id_t);
+
+ mach_write_to_4(ptr, index->space);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, index->page);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, index->type);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, index->trx_id_offset);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, index->n_user_defined_cols);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, index->n_uniq);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, index->n_nullable);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, index->n_fields);
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_12",
+ close(fileno(file)););
+
+ if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing index meta-data.");
+
+ return(DB_IO_ERROR);
+ }
+
+ /* Write the length of the index name.
+ NUL byte is included in the length. */
+ ib_uint32_t len = strlen(index->name) + 1;
+ ut_a(len > 1);
+
+ mach_write_to_4(row, len);
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_1",
+ close(fileno(file)););
+
+ if (fwrite(row, 1, sizeof(len), file) != sizeof(len)
+ || fwrite(index->name, 1, len, file) != len) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing index name.");
+
+ return(DB_IO_ERROR);
+ }
+
+ err = row_quiesce_write_index_fields(index, file, thd);
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Write the meta data (table columns) config file. Serialise the contents of
+dict_col_t structure, along with the column name. All fields are serialized
+as ib_uint32_t.
+@return DB_SUCCESS or error code. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_table(
+/*====================*/
+ const dict_table_t* table, /*!< in: write the meta data for
+ this table */
+ FILE* file, /*!< in: file to write to */
+ THD* thd) /*!< in/out: session */
+{
+ dict_col_t* col;
+ byte row[sizeof(ib_uint32_t) * 7];
+
+ col = table->cols;
+
+ for (ulint i = 0; i < table->n_cols; ++i, ++col) {
+ byte* ptr = row;
+
+ mach_write_to_4(ptr, col->prtype);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, col->mtype);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, col->len);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, col->mbminmaxlen);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, col->ind);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, col->ord_part);
+ ptr += sizeof(ib_uint32_t);
+
+ mach_write_to_4(ptr, col->max_prefix);
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_2",
+ close(fileno(file)););
+
+ if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing table column data.");
+
+ return(DB_IO_ERROR);
+ }
+
+ /* Write out the column name as [len, byte array]. The len
+ includes the NUL byte. */
+ ib_uint32_t len;
+ const char* col_name;
+
+ col_name = dict_table_get_col_name(table, dict_col_get_no(col));
+
+ /* Include the NUL byte in the length. */
+ len = strlen(col_name) + 1;
+ ut_a(len > 1);
+
+ mach_write_to_4(row, len);
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_3",
+ close(fileno(file)););
+
+ if (fwrite(row, 1, sizeof(len), file) != sizeof(len)
+ || fwrite(col_name, 1, len, file) != len) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing column name.");
+
+ return(DB_IO_ERROR);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Write the meta data config file header.
+@return DB_SUCCESS or error code. */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_header(
+/*=====================*/
+ const dict_table_t* table, /*!< in: write the meta data for
+ this table */
+ FILE* file, /*!< in: file to write to */
+ THD* thd) /*!< in/out: session */
+{
+ byte value[sizeof(ib_uint32_t)];
+
+ /* Write the meta-data version number. */
+ mach_write_to_4(value, IB_EXPORT_CFG_VERSION_V1);
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_4", close(fileno(file)););
+
+ if (fwrite(&value, 1, sizeof(value), file) != sizeof(value)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing meta-data version number.");
+
+ return(DB_IO_ERROR);
+ }
+
+ /* Write the server hostname. */
+ ib_uint32_t len;
+ const char* hostname = server_get_hostname();
+
+ /* Play it safe and check for NULL. */
+ if (hostname == 0) {
+ static const char NullHostname[] = "Hostname unknown";
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Unable to determine server hostname.");
+
+ hostname = NullHostname;
+ }
+
+ /* The server hostname includes the NUL byte. */
+ len = strlen(hostname) + 1;
+ mach_write_to_4(value, len);
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_5", close(fileno(file)););
+
+ if (fwrite(&value, 1, sizeof(value), file) != sizeof(value)
+ || fwrite(hostname, 1, len, file) != len) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing hostname.");
+
+ return(DB_IO_ERROR);
+ }
+
+ /* The table name includes the NUL byte. */
+ ut_a(table->name != 0);
+ len = strlen(table->name) + 1;
+
+ /* Write the table name. */
+ mach_write_to_4(value, len);
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_6", close(fileno(file)););
+
+ if (fwrite(&value, 1, sizeof(value), file) != sizeof(value)
+ || fwrite(table->name, 1, len, file) != len) {
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing table name.");
+
+ return(DB_IO_ERROR);
+ }
+
+ byte row[sizeof(ib_uint32_t) * 3];
+
+ /* Write the next autoinc value. */
+ mach_write_to_8(row, table->autoinc);
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_7", close(fileno(file)););
+
+ if (fwrite(row, 1, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing table autoinc value.");
+
+ return(DB_IO_ERROR);
+ }
+
+ byte* ptr = row;
+
+ /* Write the system page size. */
+ mach_write_to_4(ptr, UNIV_PAGE_SIZE);
+ ptr += sizeof(ib_uint32_t);
+
+ /* Write the table->flags. */
+ mach_write_to_4(ptr, table->flags);
+ ptr += sizeof(ib_uint32_t);
+
+ /* Write the number of columns in the table. */
+ mach_write_to_4(ptr, table->n_cols);
+
+ DBUG_EXECUTE_IF("ib_export_io_write_failure_8", close(fileno(file)););
+
+ if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno),
+ "while writing table meta-data.");
+
+ return(DB_IO_ERROR);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Write the table meta data after quiesce.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+row_quiesce_write_cfg(
+/*==================*/
+ dict_table_t* table, /*!< in: write the meta data for
+ this table */
+ THD* thd) /*!< in/out: session */
+{
+ dberr_t err;
+ char name[OS_FILE_MAX_PATH];
+
+ srv_get_meta_data_filename(table, name, sizeof(name));
+
+ ib_logf(IB_LOG_LEVEL_INFO, "Writing table metadata to '%s'", name);
+
+ FILE* file = fopen(name, "w+b");
+
+ if (file == NULL) {
+ ib_errf(thd, IB_LOG_LEVEL_WARN, ER_CANT_CREATE_FILE,
+ name, errno, strerror(errno));
+
+ err = DB_IO_ERROR;
+ } else {
+ err = row_quiesce_write_header(table, file, thd);
+
+ if (err == DB_SUCCESS) {
+ err = row_quiesce_write_table(table, file, thd);
+ }
+
+ if (err == DB_SUCCESS) {
+ err = row_quiesce_write_indexes(table, file, thd);
+ }
+
+ if (fflush(file) != 0) {
+
+ char msg[BUFSIZ];
+
+ ut_snprintf(msg, sizeof(msg), "%s flush() failed",
+ name);
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno), msg);
+ }
+
+ if (fclose(file) != 0) {
+ char msg[BUFSIZ];
+
+ ut_snprintf(msg, sizeof(msg), "%s flose() failed",
+ name);
+
+ ib_senderrf(
+ thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
+ errno, strerror(errno), msg);
+ }
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Check whether a table has an FTS index defined on it.
+@return true if an FTS index exists on the table */
+static
+bool
+row_quiesce_table_has_fts_index(
+/*============================*/
+ const dict_table_t* table) /*!< in: quiesce this table */
+{
+ bool exists = false;
+
+ dict_mutex_enter_for_mysql();
+
+ for (const dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
+ index != 0;
+ index = UT_LIST_GET_NEXT(indexes, index)) {
+
+ if (index->type & DICT_FTS) {
+ exists = true;
+ break;
+ }
+ }
+
+ dict_mutex_exit_for_mysql();
+
+ return(exists);
+}
+
+/*********************************************************************//**
+Quiesce the tablespace that the table resides in. */
+UNIV_INTERN
+void
+row_quiesce_table_start(
+/*====================*/
+ dict_table_t* table, /*!< in: quiesce this table */
+ trx_t* trx) /*!< in/out: transaction/session */
+{
+ ut_a(trx->mysql_thd != 0);
+ ut_a(srv_n_purge_threads > 0);
+ ut_ad(!srv_read_only_mode);
+
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ ut_a(trx->mysql_thd != 0);
+
+ innobase_format_name(
+ table_name, sizeof(table_name), table->name, FALSE);
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Sync to disk of '%s' started.", table_name);
+
+ if (trx_purge_state() != PURGE_STATE_DISABLED) {
+ trx_purge_stop();
+ }
+
+ ut_a(table->id > 0);
+
+ ulint count = 0;
+
+ while (ibuf_contract_in_background(table->id, TRUE) != 0) {
+ if (!(++count % 20)) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Merging change buffer entries for '%s'",
+ table_name);
+ }
+ }
+
+ if (!trx_is_interrupted(trx)) {
+ buf_LRU_flush_or_remove_pages(
+ table->space, BUF_REMOVE_FLUSH_WRITE, trx);
+
+ if (trx_is_interrupted(trx)) {
+
+ ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!");
+
+ } else if (row_quiesce_write_cfg(table, trx->mysql_thd)
+ != DB_SUCCESS) {
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "There was an error writing to the "
+ "meta data file");
+ } else {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Table '%s' flushed to disk", table_name);
+ }
+ } else {
+ ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!");
+ }
+
+ dberr_t err = row_quiesce_set_state(table, QUIESCE_COMPLETE, trx);
+ ut_a(err == DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Cleanup after table quiesce. */
+UNIV_INTERN
+void
+row_quiesce_table_complete(
+/*=======================*/
+ dict_table_t* table, /*!< in: quiesce this table */
+ trx_t* trx) /*!< in/out: transaction/session */
+{
+ ulint count = 0;
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ ut_a(trx->mysql_thd != 0);
+
+ innobase_format_name(
+ table_name, sizeof(table_name), table->name, FALSE);
+
+ /* We need to wait for the operation to complete if the
+ transaction has been killed. */
+
+ while (table->quiesce != QUIESCE_COMPLETE) {
+
+ /* Print a warning after every minute. */
+ if (!(count % 60)) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Waiting for quiesce of '%s' to complete",
+ table_name);
+ }
+
+ /* Sleep for a second. */
+ os_thread_sleep(1000000);
+
+ ++count;
+ }
+
+ /* Remove the .cfg file now that the user has resumed
+ normal operations. Otherwise it will cause problems when
+ the user tries to drop the database (remove directory). */
+ char cfg_name[OS_FILE_MAX_PATH];
+
+ srv_get_meta_data_filename(table, cfg_name, sizeof(cfg_name));
+
+ os_file_delete_if_exists(cfg_name);
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Deleting the meta-data file '%s'", cfg_name);
+
+ if (trx_purge_state() != PURGE_STATE_DISABLED) {
+ trx_purge_run();
+ }
+
+ dberr_t err = row_quiesce_set_state(table, QUIESCE_NONE, trx);
+ ut_a(err == DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Set a table's quiesce state.
+@return DB_SUCCESS or error code. */
+UNIV_INTERN
+dberr_t
+row_quiesce_set_state(
+/*==================*/
+ dict_table_t* table, /*!< in: quiesce this table */
+ ib_quiesce_t state, /*!< in: quiesce state to set */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ ut_a(srv_n_purge_threads > 0);
+
+ if (srv_read_only_mode) {
+
+ ib_senderrf(trx->mysql_thd,
+ IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
+
+ return(DB_UNSUPPORTED);
+
+ } else if (table->space == TRX_SYS_SPACE) {
+
+ char table_name[MAX_FULL_NAME_LEN + 1];
+
+ innobase_format_name(
+ table_name, sizeof(table_name), table->name, FALSE);
+
+ ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
+ ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
+
+ return(DB_UNSUPPORTED);
+ } else if (row_quiesce_table_has_fts_index(table)) {
+
+ ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
+ ER_NOT_SUPPORTED_YET,
+ "FLUSH TABLES on tables that have an FTS index. "
+ "FTS auxiliary tables will not be flushed.");
+
+ } else if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
+ /* If this flag is set then the table may not have any active
+ FTS indexes but it will still have the auxiliary tables. */
+
+ ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
+ ER_NOT_SUPPORTED_YET,
+ "FLUSH TABLES on a table that had an FTS index, "
+ "created on a hidden column, the "
+ "auxiliary tables haven't been dropped as yet. "
+ "FTS auxiliary tables will not be flushed.");
+ }
+
+ row_mysql_lock_data_dictionary(trx);
+
+ dict_table_x_lock_indexes(table);
+
+ switch (state) {
+ case QUIESCE_START:
+ ut_a(table->quiesce == QUIESCE_NONE);
+ break;
+
+ case QUIESCE_COMPLETE:
+ ut_a(table->quiesce == QUIESCE_START);
+ break;
+
+ case QUIESCE_NONE:
+ ut_a(table->quiesce == QUIESCE_COMPLETE);
+ break;
+ }
+
+ table->quiesce = state;
+
+ dict_table_x_unlock_indexes(table);
+
+ row_mysql_unlock_data_dictionary(trx);
+
+ return(DB_SUCCESS);
+}
+
diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc
index 8c703b1e06c..be786f954fb 100644
--- a/storage/innobase/row/row0row.cc
+++ b/storage/innobase/row/row0row.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -50,28 +50,26 @@ Created 4/20/1996 Heikki Tuuri
/*****************************************************************//**
When an insert or purge to a table is performed, this function builds
the entry to be inserted into or purged from an index on the table.
-@return index entry which should be inserted or purged, or NULL if the
-externally stored columns in the clustered index record are
-unavailable and ext != NULL */
+@return index entry which should be inserted or purged
+@retval NULL if the externally stored columns in the clustered index record
+are unavailable and ext != NULL, or row is missing some needed columns. */
UNIV_INTERN
dtuple_t*
-row_build_index_entry(
-/*==================*/
- const dtuple_t* row, /*!< in: row which should be
- inserted or purged */
- row_ext_t* ext, /*!< in: externally stored column prefixes,
- or NULL */
- dict_index_t* index, /*!< in: index on the table */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the index entry is allocated */
+row_build_index_entry_low(
+/*======================*/
+ const dtuple_t* row, /*!< in: row which should be
+ inserted or purged */
+ const row_ext_t* ext, /*!< in: externally stored column
+ prefixes, or NULL */
+ dict_index_t* index, /*!< in: index on the table */
+ mem_heap_t* heap) /*!< in: memory heap from which
+ the memory for the index entry
+ is allocated */
{
dtuple_t* entry;
ulint entry_len;
ulint i;
- ut_ad(row && index && heap);
- ut_ad(dtuple_check_typed(row));
-
entry_len = dict_index_get_n_fields(index);
entry = dtuple_create(heap, entry_len);
@@ -96,8 +94,19 @@ row_build_index_entry(
= dtuple_get_nth_field(entry, i);
const dfield_t* dfield2
= dtuple_get_nth_field(row, col_no);
- ulint len
- = dfield_get_len(dfield2);
+ ulint len;
+
+#if DATA_MISSING != 0
+# error "DATA_MISSING != 0"
+#endif
+ if (UNIV_UNLIKELY(dfield_get_type(dfield2)->mtype
+ == DATA_MISSING)) {
+ /* The field has not been initialized in the row.
+ This should be from trx_undo_rec_get_partial_row(). */
+ return(NULL);
+ }
+
+ len = dfield_get_len(dfield2);
dfield_copy(dfield, dfield2);
@@ -171,8 +180,6 @@ row_build_index_entry(
}
}
- ut_ad(dtuple_check_typed(entry));
-
return(entry);
}
@@ -211,21 +218,23 @@ row_build(
of an index, or NULL if
index->table should be
consulted instead */
+ const dtuple_t* add_cols,
+ /*!< in: default values of
+ added columns, or NULL */
+ const ulint* col_map,/*!< in: mapping of old column
+ numbers to new ones, or NULL */
row_ext_t** ext, /*!< out, own: cache of
externally stored column
prefixes, or NULL */
mem_heap_t* heap) /*!< in: memory heap from which
the memory needed is allocated */
{
+ const byte* copy;
dtuple_t* row;
- const dict_table_t* table;
- ulint n_fields;
ulint n_ext_cols;
ulint* ext_cols = NULL; /* remove warning */
ulint len;
- ulint row_len;
byte* buf;
- ulint i;
ulint j;
mem_heap_t* tmp_heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
@@ -234,6 +243,7 @@ row_build(
ut_ad(index && rec && heap);
ut_ad(dict_index_is_clust(index));
ut_ad(!mutex_own(&trx_sys->mutex));
+ ut_ad(!col_map || col_table);
if (!offsets) {
offsets = rec_get_offsets(rec, index, offsets_,
@@ -260,55 +270,84 @@ row_build(
buf = static_cast<byte*>(
mem_heap_alloc(heap, rec_offs_size(offsets)));
- rec = rec_copy(buf, rec, offsets);
- /* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(rec, index, (ulint*) offsets);
+ copy = rec_copy(buf, rec, offsets);
+ } else {
+ copy = rec;
}
- table = index->table;
- row_len = dict_table_get_n_cols(table);
-
- row = dtuple_create(heap, row_len);
-
- dict_table_copy_types(row, table);
-
- dtuple_set_info_bits(row, rec_get_info_bits(
- rec, dict_table_is_comp(table)));
-
- n_fields = rec_offs_n_fields(offsets);
n_ext_cols = rec_offs_n_extern(offsets);
if (n_ext_cols) {
ext_cols = static_cast<ulint*>(
mem_heap_alloc(heap, n_ext_cols * sizeof *ext_cols));
}
- for (i = j = 0; i < n_fields; i++) {
- dict_field_t* ind_field
+ /* Avoid a debug assertion in rec_offs_validate(). */
+ rec_offs_make_valid(copy, index, const_cast<ulint*>(offsets));
+
+ if (!col_table) {
+ ut_ad(!col_map);
+ ut_ad(!add_cols);
+ col_table = index->table;
+ }
+
+ if (add_cols) {
+ ut_ad(col_map);
+ row = dtuple_copy(add_cols, heap);
+ /* dict_table_copy_types() would set the fields to NULL */
+ for (ulint i = 0; i < dict_table_get_n_cols(col_table); i++) {
+ dict_col_copy_type(
+ dict_table_get_nth_col(col_table, i),
+ dfield_get_type(dtuple_get_nth_field(row, i)));
+ }
+ } else {
+ row = dtuple_create(heap, dict_table_get_n_cols(col_table));
+ dict_table_copy_types(row, col_table);
+ }
+
+ dtuple_set_info_bits(row, rec_get_info_bits(
+ copy, rec_offs_comp(offsets)));
+
+ j = 0;
+
+ for (ulint i = 0; i < rec_offs_n_fields(offsets); i++) {
+ const dict_field_t* ind_field
= dict_index_get_nth_field(index, i);
+
+ if (ind_field->prefix_len) {
+ /* Column prefixes can only occur in key
+ fields, which cannot be stored externally. For
+ a column prefix, there should also be the full
+ field in the clustered index tuple. The row
+ tuple comprises full fields, not prefixes. */
+ ut_ad(!rec_offs_nth_extern(offsets, i));
+ continue;
+ }
+
const dict_col_t* col
= dict_field_get_col(ind_field);
ulint col_no
= dict_col_get_no(col);
- dfield_t* dfield
- = dtuple_get_nth_field(row, col_no);
-
- if (ind_field->prefix_len == 0) {
- const byte* field = rec_get_nth_field(
- rec, offsets, i, &len);
+ if (col_map) {
+ col_no = col_map[col_no];
- dfield_set_data(dfield, field, len);
+ if (col_no == ULINT_UNDEFINED) {
+ /* dropped column */
+ continue;
+ }
}
+ dfield_t* dfield = dtuple_get_nth_field(row, col_no);
+
+ const byte* field = rec_get_nth_field(
+ copy, offsets, i, &len);
+
+ dfield_set_data(dfield, field, len);
+
if (rec_offs_nth_extern(offsets, i)) {
dfield_set_ext(dfield);
- if (UNIV_LIKELY_NULL(col_table)) {
- ut_a(col_no
- < dict_table_get_n_cols(col_table));
- col = dict_table_get_nth_col(
- col_table, col_no);
- }
+ col = dict_table_get_nth_col(col_table, col_no);
if (col->ord_part) {
/* We will have to fetch prefixes of
@@ -319,14 +358,20 @@ row_build(
}
}
+ rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets));
+
ut_ad(dtuple_check_typed(row));
if (!ext) {
/* REDUNDANT and COMPACT formats store a local
768-byte prefix of each externally stored
- column. No cache is needed. */
- ut_ad(dict_table_get_format(index->table)
- < UNIV_FORMAT_B);
+ column. No cache is needed.
+
+ During online table rebuild,
+ row_log_table_apply_delete_low()
+ may use a cache that was set up by
+ row_log_table_delete(). */
+
} else if (j) {
*ext = row_ext_create(j, ext_cols, index->table->flags, row,
heap);
@@ -402,28 +447,14 @@ row_rec_to_index_entry_low(
/*******************************************************************//**
Converts an index record to a typed data tuple. NOTE that externally
stored (often big) fields are NOT copied to heap.
-@return own: index entry built; see the NOTE below! */
+@return own: index entry built */
UNIV_INTERN
dtuple_t*
row_rec_to_index_entry(
/*===================*/
- ulint type, /*!< in: ROW_COPY_DATA, or
- ROW_COPY_POINTERS: the former
- copies also the data fields to
- heap as the latter only places
- pointers to data fields on the
- index page */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: in the case
- ROW_COPY_POINTERS the data
- fields in the row will point
- directly into this record,
- therefore, the buffer page of
- this record must be at least
- s-latched and the latch held
- as long as the dtuple is used! */
+ const rec_t* rec, /*!< in: record in the index */
const dict_index_t* index, /*!< in: index */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec) */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec) */
ulint* n_ext, /*!< out: number of externally
stored columns */
mem_heap_t* heap) /*!< in: memory heap from which
@@ -431,25 +462,21 @@ row_rec_to_index_entry(
{
dtuple_t* entry;
byte* buf;
+ const rec_t* copy_rec;
ut_ad(rec && heap && index);
ut_ad(rec_offs_validate(rec, index, offsets));
- if (type == ROW_COPY_DATA) {
- /* Take a copy of rec to heap */
- buf = static_cast<byte*>(
- mem_heap_alloc(heap, rec_offs_size(offsets)));
+ /* Take a copy of rec to heap */
+ buf = static_cast<byte*>(
+ mem_heap_alloc(heap, rec_offs_size(offsets)));
- rec = rec_copy(buf, rec, offsets);
- /* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(rec, index, offsets);
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- } else {
- ut_a(!rec_offs_any_null_extern(rec, offsets));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- }
+ copy_rec = rec_copy(buf, rec, offsets);
- entry = row_rec_to_index_entry_low(rec, index, offsets, n_ext, heap);
+ rec_offs_make_valid(copy_rec, index, const_cast<ulint*>(offsets));
+ entry = row_rec_to_index_entry_low(
+ copy_rec, index, offsets, n_ext, heap);
+ rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets));
dtuple_set_info_bits(entry,
rec_get_info_bits(rec, rec_offs_comp(offsets)));
diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc
index 96884e89511..bfda669d97a 100644
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -57,7 +57,6 @@ Created 12/19/1997 Heikki Tuuri
#include "read0read.h"
#include "buf0lru.h"
#include "ha_prototypes.h"
-#include "srv0mon.h"
#include "my_compare.h" /* enum icp_result */
@@ -673,8 +672,8 @@ sel_enqueue_prefetched_row(
/*********************************************************************//**
Builds a previous version of a clustered index record for a consistent read
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_sel_build_prev_vers(
/*====================*/
read_view_t* read_view, /*!< in: read view */
@@ -691,7 +690,7 @@ row_sel_build_prev_vers(
afterwards */
mtr_t* mtr) /*!< in: mtr */
{
- ulint err;
+ dberr_t err;
if (*old_vers_heap) {
mem_heap_empty(*old_vers_heap);
@@ -707,10 +706,9 @@ row_sel_build_prev_vers(
/*********************************************************************//**
Builds the last committed version of a clustered index record for a
-semi-consistent read.
-@return DB_SUCCESS or error code */
-static
-ulint
+semi-consistent read. */
+static __attribute__((nonnull))
+void
row_sel_build_committed_vers_for_mysql(
/*===================================*/
dict_index_t* clust_index, /*!< in: clustered index */
@@ -726,18 +724,16 @@ row_sel_build_committed_vers_for_mysql(
afterwards */
mtr_t* mtr) /*!< in: mtr */
{
- ulint err;
-
if (prebuilt->old_vers_heap) {
mem_heap_empty(prebuilt->old_vers_heap);
} else {
- prebuilt->old_vers_heap = mem_heap_create(200);
+ prebuilt->old_vers_heap = mem_heap_create(
+ rec_offs_size(*offsets));
}
- err = row_vers_build_for_semi_consistent_read(
+ row_vers_build_for_semi_consistent_read(
rec, mtr, clust_index, offsets, offset_heap,
prebuilt->old_vers_heap, old_vers);
- return(err);
}
/*********************************************************************//**
@@ -809,8 +805,8 @@ row_sel_test_other_conds(
Retrieves the clustered index record corresponding to a record in a
non-clustered index. Does the necessary locking.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_sel_get_clust_rec(
/*==================*/
sel_node_t* node, /*!< in: select_node */
@@ -828,7 +824,7 @@ row_sel_get_clust_rec(
dict_index_t* index;
rec_t* clust_rec;
rec_t* old_vers;
- ulint err;
+ dberr_t err;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
@@ -982,7 +978,7 @@ err_exit:
Sets a lock on a record.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
UNIV_INLINE
-enum db_err
+dberr_t
sel_set_rec_lock(
/*=============*/
const buf_block_t* block, /*!< in: buffer block of rec */
@@ -995,7 +991,7 @@ sel_set_rec_lock(
que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
- enum db_err err;
+ dberr_t err;
trx = thr_get_trx(thr);
@@ -1084,7 +1080,7 @@ row_sel_open_pcur(
(FALSE: no init) */
btr_pcur_open_at_index_side(plan->asc, index, BTR_SEARCH_LEAF,
- &(plan->pcur), FALSE, mtr);
+ &(plan->pcur), false, 0, mtr);
}
ut_ad(plan->n_rows_prefetched == 0);
@@ -1313,8 +1309,8 @@ func_exit:
/*********************************************************************//**
Performs a select step.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_sel(
/*====*/
sel_node_t* node, /*!< in: select node */
@@ -1347,7 +1343,7 @@ row_sel(
&mtr must be committed before we move
to the next non-clustered record */
ulint found_flag;
- ulint err;
+ dberr_t err;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
@@ -2083,11 +2079,9 @@ row_sel_step(
table_node = static_cast<sym_node_t*>(
que_node_get_next(table_node))) {
- enum db_err err;
-
- err = static_cast<enum db_err>(lock_table(
+ dberr_t err = lock_table(
0, table_node->table, i_lock_mode,
- thr));
+ thr);
if (err != DB_SUCCESS) {
trx_t* trx;
@@ -2120,7 +2114,7 @@ row_sel_step(
}
}
- enum db_err err = static_cast<enum db_err>(row_sel(node, thr));
+ dberr_t err = row_sel(node, thr);
/* NOTE! if queries are parallelized, the following assignment may
have problems; the assignment should be made only if thr is the
@@ -2305,42 +2299,6 @@ row_printf_step(
return(thr);
}
-/********************************************************************
-Creates a key in Innobase dtuple format.*/
-
-void
-row_create_key(
-/*===========*/
- dtuple_t* tuple, /* in: tuple where to build;
- NOTE: we assume that the type info
- in the tuple is already according
- to index! */
- dict_index_t* index, /* in: index of the key value */
- doc_id_t* doc_id) /* in: doc id to search. */
-{
- dtype_t type;
- dict_field_t* field;
- doc_id_t temp_doc_id;
- dfield_t* dfield = dtuple_get_nth_field(tuple, 0);
-
- ut_a(dict_index_get_n_unique(index) == 1);
-
- /* Permit us to access any field in the tuple (ULINT_MAX): */
- dtuple_set_n_fields(tuple, ULINT_MAX);
-
- field = dict_index_get_nth_field(index, 0);
- dict_col_copy_type(field->col, &type);
- ut_a(dtype_get_mtype(&type) == DATA_INT);
-
- /* Convert to storage byte order */
- mach_write_to_8((byte*) &temp_doc_id, *doc_id);
- *doc_id = temp_doc_id;
-
- ut_a(sizeof(*doc_id) == field->fixed_len);
- dfield_set_data(dfield, doc_id, field->fixed_len);
-
- dtuple_set_n_fields(tuple, 1);
-}
/****************************************************************//**
Converts a key value stored in MySQL format to an Innobase dtuple. The last
field of the key value may be just a prefix of a fixed length field: hence
@@ -2536,6 +2494,7 @@ row_sel_convert_mysql_key_to_innobase(
dfield_set_len(dfield, len
- (ulint) (key_ptr - key_end));
}
+ ut_ad(0);
}
n_fields++;
@@ -3008,8 +2967,8 @@ row_sel_store_mysql_rec(
/*********************************************************************//**
Builds a previous version of a clustered index record for a consistent read
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_sel_build_prev_vers_for_mysql(
/*==============================*/
read_view_t* read_view, /*!< in: read view */
@@ -3026,7 +2985,7 @@ row_sel_build_prev_vers_for_mysql(
afterwards */
mtr_t* mtr) /*!< in: mtr */
{
- ulint err;
+ dberr_t err;
if (prebuilt->old_vers_heap) {
mem_heap_empty(prebuilt->old_vers_heap);
@@ -3045,8 +3004,8 @@ Retrieves the clustered index record corresponding to a record in a
non-clustered index. Does the necessary locking. Used in the MySQL
interface.
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
-static
-enum db_err
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_sel_get_clust_rec_for_mysql(
/*============================*/
row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */
@@ -3073,7 +3032,7 @@ row_sel_get_clust_rec_for_mysql(
dict_index_t* clust_index;
const rec_t* clust_rec;
rec_t* old_vers;
- enum db_err err;
+ dberr_t err;
trx_t* trx;
*out_rec = NULL;
@@ -3172,17 +3131,13 @@ row_sel_get_clust_rec_for_mysql(
clust_rec, clust_index, *offsets,
trx->read_view)) {
- ulint db_err;
-
/* The following call returns 'offsets' associated with
'old_vers' */
- db_err = row_sel_build_prev_vers_for_mysql(
+ err = row_sel_build_prev_vers_for_mysql(
trx->read_view, clust_index, prebuilt,
clust_rec, offsets, offset_heap, &old_vers,
mtr);
- err = static_cast<enum db_err>(db_err);
-
if (err != DB_SUCCESS || old_vers == NULL) {
goto err_exit;
@@ -3226,7 +3181,10 @@ row_sel_get_clust_rec_for_mysql(
func_exit:
*out_rec = clust_rec;
- if (prebuilt->select_lock_type != LOCK_NONE) {
+ /* Store the current position if select_lock_type is not
+ LOCK_NONE or if we are scanning using InnoDB APIs */
+ if (prebuilt->select_lock_type != LOCK_NONE
+ || prebuilt->innodb_api) {
/* We may use the cursor in update or in unlock_row():
store its position */
@@ -3633,7 +3591,7 @@ row_search_idx_cond_check(
return(result);
case ICP_ERROR:
case ICP_ABORTED_BY_USER:
- return(result);
+ return(result);
}
ut_error;
@@ -3649,7 +3607,7 @@ position and fetch next or fetch prev must not be tried to the cursor!
@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
DB_LOCK_TABLE_FULL, DB_CORRUPTION, or DB_TOO_BIG_RECORD */
UNIV_INTERN
-ulint
+dberr_t
row_search_for_mysql(
/*=================*/
byte* buf, /*!< in/out: buffer for the fetched
@@ -3678,9 +3636,9 @@ row_search_for_mysql(
dict_index_t* clust_index;
que_thr_t* thr;
const rec_t* rec;
- const rec_t* result_rec;
+ const rec_t* result_rec = NULL;
const rec_t* clust_rec;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
ibool unique_search = FALSE;
ibool mtr_has_extra_clust_latch = FALSE;
ibool moves_up = FALSE;
@@ -3701,48 +3659,41 @@ row_search_for_mysql(
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
ibool table_lock_waited = FALSE;
+ byte* next_buf = 0;
rec_offs_init(offsets_);
ut_ad(index && pcur && search_tuple);
- if (UNIV_UNLIKELY(prebuilt->table->ibd_file_missing)) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error:\n"
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for\n"
- "InnoDB: table %s does not exist.\n"
- "InnoDB: Have you deleted the .ibd file"
- " from the database directory under\n"
- "InnoDB: the MySQL datadir, or have you used"
- " DISCARD TABLESPACE?\n"
- "InnoDB: Look from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
- "InnoDB: how you can resolve the problem.\n",
- prebuilt->table->name);
+ /* We don't support FTS queries from the HANDLER interfaces, because
+ we implemented FTS as reversed inverted index with auxiliary tables.
+ So anything related to traditional index query would not apply to
+ it. */
+ if (index->type & DICT_FTS) {
+ return(DB_END_OF_INDEX);
+ }
#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
+ ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
#endif /* UNIV_SYNC_DEBUG */
- return(DB_ERROR);
- }
- if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
+ if (dict_table_is_discarded(prebuilt->table)) {
+
+ return(DB_TABLESPACE_DELETED);
+
+ } else if (prebuilt->table->ibd_file_missing) {
+
+ return(DB_TABLESPACE_NOT_FOUND);
+
+ } else if (!prebuilt->index_usable) {
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
return(DB_MISSING_HISTORY);
- }
- if (dict_index_is_corrupted(index)) {
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
+ } else if (dict_index_is_corrupted(index)) {
+
return(DB_CORRUPTION);
- }
- if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
+ } else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
fprintf(stderr,
"InnoDB: Error: trying to free a corrupt\n"
"InnoDB: table handle. Magic n %lu, table name ",
@@ -3846,7 +3797,6 @@ row_search_for_mysql(
prebuilt->n_rows_fetched++;
- srv_n_rows_read++;
err = DB_SUCCESS;
goto func_exit;
}
@@ -3925,7 +3875,8 @@ row_search_for_mysql(
&& dict_index_is_clust(index)
&& !prebuilt->templ_contains_blob
&& !prebuilt->used_in_HANDLER
- && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
+ && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)
+ && !prebuilt->innodb_api) {
mode = PAGE_CUR_GE;
@@ -3973,8 +3924,8 @@ row_search_for_mysql(
rec, offsets)) {
case ICP_NO_MATCH:
case ICP_OUT_OF_RANGE:
- case ICP_ERROR:
case ICP_ABORTED_BY_USER:
+ case ICP_ERROR:
goto shortcut_mismatch;
case ICP_MATCH:
goto shortcut_match;
@@ -4005,8 +3956,6 @@ row_search_for_mysql(
/* ut_print_name(stderr, index->name);
fputs(" shortcut\n", stderr); */
- srv_n_rows_read++;
-
err = DB_SUCCESS;
goto release_search_latch_if_needed;
@@ -4179,12 +4128,12 @@ wait_table_again:
/* Try to place a gap lock on the next index record
to prevent phantoms in ORDER BY ... DESC queries */
- const rec_t* next = page_rec_get_next_const(rec);
+ const rec_t* next_rec = page_rec_get_next_const(rec);
- offsets = rec_get_offsets(next, index, offsets,
+ offsets = rec_get_offsets(next_rec, index, offsets,
ULINT_UNDEFINED, &heap);
err = sel_set_rec_lock(btr_pcur_get_block(pcur),
- next, index, offsets,
+ next_rec, index, offsets,
prebuilt->select_lock_type,
LOCK_GAP, thr);
@@ -4197,16 +4146,10 @@ wait_table_again:
goto lock_wait_or_error;
}
}
- } else {
- if (mode == PAGE_CUR_G) {
- btr_pcur_open_at_index_side(
- TRUE, index, BTR_SEARCH_LEAF, pcur, FALSE,
- &mtr);
- } else if (mode == PAGE_CUR_L) {
- btr_pcur_open_at_index_side(
- FALSE, index, BTR_SEARCH_LEAF, pcur, FALSE,
- &mtr);
- }
+ } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_L) {
+ btr_pcur_open_at_index_side(
+ mode == PAGE_CUR_G, index, BTR_SEARCH_LEAF,
+ pcur, false, 0, &mtr);
}
rec_loop:
@@ -4348,6 +4291,9 @@ wrong_offs:
/* Calculate the 'offsets' associated with 'rec' */
+ ut_ad(fil_page_get_type(btr_pcur_get_page(pcur)) == FIL_PAGE_INDEX);
+ ut_ad(btr_page_get_index_id(btr_pcur_get_page(pcur)) == index->id);
+
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
@@ -4539,15 +4485,10 @@ no_gap_lock:
/* The following call returns 'offsets'
associated with 'old_vers' */
- err = row_sel_build_committed_vers_for_mysql(
+ row_sel_build_committed_vers_for_mysql(
clust_index, prebuilt, rec,
&offsets, &heap, &old_vers, &mtr);
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
/* Check whether it was a deadlock or not, if not
a deadlock and the transaction had to wait then
release the lock it is waiting on. */
@@ -4649,8 +4590,8 @@ no_gap_lock:
case ICP_NO_MATCH:
goto next_rec;
case ICP_OUT_OF_RANGE:
- case ICP_ERROR:
case ICP_ABORTED_BY_USER:
+ case ICP_ERROR:
err = DB_RECORD_NOT_FOUND;
goto idx_cond_failed;
case ICP_MATCH:
@@ -4690,12 +4631,15 @@ locks_ok:
delete marked record and the record following it.
For now this is applicable only to clustered indexes while
- doing a unique search. There is scope for further optimization
+ doing a unique search except for HANDLER queries because
+ HANDLER allows NEXT and PREV even in unique search on
+ clustered index. There is scope for further optimization
applicable to unique secondary indexes. Current behaviour is
to widen the scope of a lock on an already delete marked record
if the same record is deleted twice by the same transaction */
if (index == clust_index && unique_search
- && !prebuilt->used_in_HANDLER) {
+ && !prebuilt->used_in_HANDLER) {
+
err = DB_RECORD_NOT_FOUND;
goto normal_return;
@@ -4712,8 +4656,8 @@ locks_ok:
}
goto next_rec;
case ICP_OUT_OF_RANGE:
- case ICP_ERROR:
case ICP_ABORTED_BY_USER:
+ case ICP_ERROR:
err = DB_RECORD_NOT_FOUND;
goto idx_cond_failed;
case ICP_MATCH:
@@ -4831,9 +4775,10 @@ requires_clust_rec:
&& !prebuilt->templ_contains_blob
&& !prebuilt->clust_index_was_generated
&& !prebuilt->used_in_HANDLER
+ && !prebuilt->innodb_api
&& prebuilt->template_type
!= ROW_MYSQL_DUMMY_TEMPLATE
- && !prebuilt->result) {
+ && !prebuilt->in_fts_query) {
/* Inside an update, for example, we do not cache rows,
since we may use the cursor position to do the actual
@@ -4849,29 +4794,58 @@ requires_clust_rec:
/* We only convert from InnoDB row format to MySQL row
format when ICP is disabled. */
- if (!prebuilt->idx_cond
- && !row_sel_store_mysql_rec(
- row_sel_fetch_last_buf(prebuilt),
- prebuilt, result_rec,
- result_rec != rec,
- result_rec != rec ? clust_index : index,
- offsets)) {
-
- /* Only fresh inserts may contain incomplete
- externally stored columns. Pretend that such
- records do not exist. Such records may only be
- accessed at the READ UNCOMMITTED isolation
- level or when rolling back a recovered
- transaction. Rollback happens at a lower
- level, not here. */
- goto next_rec;
- }
+ if (!prebuilt->idx_cond) {
- row_sel_enqueue_cache_row_for_mysql(buf, prebuilt);
+ /* We use next_buf to track the allocation of buffers
+ where we store and enqueue the buffers for our
+ pre-fetch optimisation.
+
+ If next_buf == 0 then we store the converted record
+ directly into the MySQL record buffer (buf). If it is
+ != 0 then we allocate a pre-fetch buffer and store the
+ converted record there.
+
+ If the conversion fails and the MySQL record buffer
+ was not written to then we reset next_buf so that
+ we can re-use the MySQL record buffer in the next
+ iteration. */
+
+ next_buf = next_buf
+ ? row_sel_fetch_last_buf(prebuilt) : buf;
+
+ if (!row_sel_store_mysql_rec(
+ next_buf, prebuilt, result_rec,
+ result_rec != rec,
+ result_rec != rec ? clust_index : index,
+ offsets)) {
+
+ if (next_buf == buf) {
+ ut_a(prebuilt->n_fetch_cached == 0);
+ next_buf = 0;
+ }
+
+ /* Only fresh inserts may contain incomplete
+ externally stored columns. Pretend that such
+ records do not exist. Such records may only be
+ accessed at the READ UNCOMMITTED isolation
+ level or when rolling back a recovered
+ transaction. Rollback happens at a lower
+ level, not here. */
+ goto next_rec;
+ }
+
+ if (next_buf != buf) {
+ row_sel_enqueue_cache_row_for_mysql(
+ next_buf, prebuilt);
+ }
+ } else {
+ row_sel_enqueue_cache_row_for_mysql(buf, prebuilt);
+ }
if (prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE) {
goto next_rec;
}
+
} else {
if (UNIV_UNLIKELY
(prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) {
@@ -4892,7 +4866,7 @@ requires_clust_rec:
rec_offs_size(offsets));
mach_write_to_4(buf,
rec_offs_extra_size(offsets) + 4);
- } else if (!prebuilt->idx_cond) {
+ } else if (!prebuilt->idx_cond && !prebuilt->innodb_api) {
/* The record was not yet converted to MySQL format. */
if (!row_sel_store_mysql_rec(
buf, prebuilt, result_rec,
@@ -4935,11 +4909,16 @@ idx_cond_failed:
|| !dict_index_is_clust(index)
|| direction != 0
|| prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->used_in_HANDLER) {
+ || prebuilt->used_in_HANDLER
+ || prebuilt->innodb_api) {
/* Inside an update always store the cursor position */
btr_pcur_store_position(pcur, &mtr);
+
+ if (prebuilt->innodb_api) {
+ prebuilt->innodb_api_rec = result_rec;
+ }
}
goto normal_return;
@@ -5032,7 +5011,7 @@ lock_table_wait:
mtr_commit(&mtr);
mtr_has_extra_clust_latch = FALSE;
- trx->error_state = static_cast<enum db_err>(err);
+ trx->error_state = err;
/* The following is a patch for MySQL */
@@ -5101,8 +5080,23 @@ normal_return:
mtr_commit(&mtr);
- if (prebuilt->n_fetch_cached > 0) {
- row_sel_dequeue_cached_row_for_mysql(buf, prebuilt);
+ if (prebuilt->idx_cond != 0) {
+
+ /* When ICP is active we don't write to the MySQL buffer
+ directly, only to buffers that are enqueued in the pre-fetch
+ queue. We need to dequeue the first buffer and copy the contents
+ to the record buffer that was passed in by MySQL. */
+
+ if (prebuilt->n_fetch_cached > 0) {
+ row_sel_dequeue_cached_row_for_mysql(buf, prebuilt);
+ err = DB_SUCCESS;
+ }
+
+ } else if (next_buf != 0) {
+
+ /* We may or may not have enqueued some buffers to the
+ pre-fetch queue, but we definitely wrote to the record
+ buffer passed to use by MySQL. */
err = DB_SUCCESS;
}
@@ -5112,9 +5106,6 @@ normal_return:
dict_index_name_print(stderr, index);
fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
#endif /* UNIV_SEARCH_DEBUG */
- if (err == DB_SUCCESS) {
- srv_n_rows_read++;
- }
func_exit:
trx->op_info = "";
@@ -5139,6 +5130,9 @@ func_exit:
#ifdef UNIV_SYNC_DEBUG
ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
#endif /* UNIV_SYNC_DEBUG */
+
+ DEBUG_SYNC_C("innodb_row_search_for_mysql_exit");
+
return(err);
}
@@ -5157,7 +5151,22 @@ row_search_check_if_query_cache_permitted(
dict_table_t* table;
ibool ret = FALSE;
- table = dict_table_open_on_name(norm_name, FALSE);
+ /* Disable query cache altogether for all tables if recovered XA
+ transactions in prepared state exist. This is because we do not
+ restore the table locks for those transactions and we may wrongly
+ set ret=TRUE above if "lock_table_get_n_locks(table) == 0". See
+ "Bug#14658648 XA ROLLBACK (DISTRIBUTED DATABASE) NOT WORKING WITH
+ QUERY CACHE ENABLED".
+ Read trx_sys->n_prepared_recovered_trx without mutex protection,
+ not possible to end up with a torn read since n_prepared_recovered_trx
+ is word size. */
+ if (trx_sys->n_prepared_recovered_trx > 0) {
+
+ return(FALSE);
+ }
+
+ table = dict_table_open_on_name(norm_name, FALSE, FALSE,
+ DICT_ERR_IGNORE_NONE);
if (table == NULL) {
@@ -5191,7 +5200,7 @@ row_search_check_if_query_cache_permitted(
}
}
- dict_table_close(table, FALSE);
+ dict_table_close(table, FALSE, FALSE);
return(ret);
}
@@ -5229,8 +5238,6 @@ row_search_autoinc_read_column(
data = rec_get_nth_field(rec, offsets, col_no, &len);
- ut_a(len != UNIV_SQL_NULL);
-
switch (mtype) {
case DATA_INT:
ut_a(len <= sizeof value);
@@ -5289,7 +5296,7 @@ Read the max AUTOINC value from an index.
@return DB_SUCCESS if all OK else error code, DB_RECORD_NOT_FOUND if
column name can't be found in index */
UNIV_INTERN
-ulint
+dberr_t
row_search_max_autoinc(
/*===================*/
dict_index_t* index, /*!< in: index to search */
@@ -5299,7 +5306,7 @@ row_search_max_autoinc(
ulint i;
ulint n_cols;
dict_field_t* dfield = NULL;
- ulint error = DB_SUCCESS;
+ dberr_t error = DB_SUCCESS;
n_cols = dict_index_get_n_ordering_defined_by_user(index);
@@ -5321,10 +5328,9 @@ row_search_max_autoinc(
mtr_start(&mtr);
- /* Open at the high/right end (FALSE), and INIT
- cursor (TRUE) */
+ /* Open at the high/right end (false), and init cursor */
btr_pcur_open_at_index_side(
- FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+ false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
const rec_t* rec;
diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc
index 78fd4ad5199..25b2b6b62ce 100644
--- a/storage/innobase/row/row0uins.cc
+++ b/storage/innobase/row/row0uins.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,6 +38,7 @@ Created 2/25/1997 Heikki Tuuri
#include "mach0data.h"
#include "row0undo.h"
#include "row0vers.h"
+#include "row0log.h"
#include "trx0trx.h"
#include "trx0rec.h"
#include "row0row.h"
@@ -60,25 +61,64 @@ introduced where a call to log_free_check() is bypassed. */
Removes a clustered index record. The pcur in node was positioned on the
record, now it is detached.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_ins_remove_clust_rec(
/*==========================*/
undo_node_t* node) /*!< in: undo node */
{
btr_cur_t* btr_cur;
ibool success;
- ulint err;
- ulint n_tries = 0;
+ dberr_t err;
+ ulint n_tries = 0;
mtr_t mtr;
+ dict_index_t* index = node->pcur.btr_cur.index;
+ bool online;
+
+ ut_ad(dict_index_is_clust(index));
mtr_start(&mtr);
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur),
- &mtr);
+ /* This is similar to row_undo_mod_clust(). Even though we
+ call row_log_table_rollback() elsewhere, the DDL thread may
+ already have copied this row to the sort buffers or to the new
+ table. We must log the removal, so that the row will be
+ correctly purged. However, we can log the removal out of sync
+ with the B-tree modification. */
+
+ online = dict_index_is_online_ddl(index);
+ if (online) {
+ ut_ad(node->trx->dict_operation_lock_mode
+ != RW_X_LATCH);
+ ut_ad(node->table->id != DICT_INDEXES_ID);
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ }
+
+ success = btr_pcur_restore_position(
+ online
+ ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+ : BTR_MODIFY_LEAF, &node->pcur, &mtr);
ut_a(success);
+ btr_cur = btr_pcur_get_btr_cur(&node->pcur);
+
+ ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)
+ == node->trx->id);
+
+ if (online && dict_index_is_online_ddl(index)) {
+ const rec_t* rec = btr_cur_get_rec(btr_cur);
+ mem_heap_t* heap = NULL;
+ const ulint* offsets = rec_get_offsets(
+ rec, index, NULL, ULINT_UNDEFINED, &heap);
+ row_log_table_delete(
+ rec, index, offsets,
+ trx_read_trx_id(row_get_trx_id_offset(index, offsets)
+ + rec));
+ mem_heap_free(heap);
+ }
+
if (node->table->id == DICT_INDEXES_ID) {
+ ut_ad(!online);
ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
/* Drop the index tree associated with the row in
@@ -90,14 +130,12 @@ row_undo_ins_remove_clust_rec(
mtr_start(&mtr);
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF,
- &(node->pcur), &mtr);
+ success = btr_pcur_restore_position(
+ BTR_MODIFY_LEAF, &node->pcur, &mtr);
ut_a(success);
}
- btr_cur = btr_pcur_get_btr_cur(&(node->pcur));
-
- if (btr_cur_optimistic_delete(btr_cur, &mtr)) {
+ if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
err = DB_SUCCESS;
goto func_exit;
}
@@ -111,7 +149,7 @@ retry:
&(node->pcur), &mtr);
ut_a(success);
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
trx_is_recv(node->trx)
? RB_RECOVERY
: RB_NORMAL, &mtr);
@@ -142,8 +180,8 @@ func_exit:
/***************************************************************//**
Removes a secondary index entry if found.
@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_ins_remove_sec_low(
/*========================*/
ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
@@ -154,22 +192,31 @@ row_undo_ins_remove_sec_low(
{
btr_pcur_t pcur;
btr_cur_t* btr_cur;
- ulint err;
+ dberr_t err = DB_SUCCESS;
mtr_t mtr;
enum row_search_result search_result;
+ log_free_check();
+
mtr_start(&mtr);
- btr_cur = btr_pcur_get_btr_cur(&pcur);
+ if (mode == BTR_MODIFY_LEAF) {
+ mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
+ }
- ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF);
+ if (row_log_online_op_try(index, entry, 0)) {
+ goto func_exit_no_pcur;
+ }
search_result = row_search_index_entry(index, entry, mode,
&pcur, &mtr);
switch (search_result) {
case ROW_NOT_FOUND:
- err = DB_SUCCESS;
goto func_exit;
case ROW_FOUND:
break;
@@ -181,23 +228,24 @@ row_undo_ins_remove_sec_low(
ut_error;
}
- if (mode == BTR_MODIFY_LEAF) {
- err = btr_cur_optimistic_delete(btr_cur, &mtr)
+ btr_cur = btr_pcur_get_btr_cur(&pcur);
+
+ if (mode != BTR_MODIFY_TREE) {
+ err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
? DB_SUCCESS : DB_FAIL;
} else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
/* No need to distinguish RB_RECOVERY here, because we
are deleting a secondary index record: the distinction
between RB_NORMAL and RB_RECOVERY only matters when
deleting a record that contains externally stored
columns. */
ut_ad(!dict_index_is_clust(index));
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
RB_NORMAL, &mtr);
}
func_exit:
btr_pcur_close(&pcur);
+func_exit_no_pcur:
mtr_commit(&mtr);
return(err);
@@ -207,14 +255,14 @@ func_exit:
Removes a secondary index entry from the index if found. Tries first
optimistic, then pessimistic descent down the tree.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_ins_remove_sec(
/*====================*/
dict_index_t* index, /*!< in: index */
dtuple_t* entry) /*!< in: index entry to insert */
{
- ulint err;
+ dberr_t err;
ulint n_tries = 0;
/* Try first optimistic descent to the B-tree */
@@ -261,7 +309,7 @@ row_undo_ins_parse_undo_rec(
table_id_t table_id;
ulint type;
ulint dummy;
- ibool dummy_extern;
+ bool dummy_extern;
ut_ad(node);
@@ -271,12 +319,13 @@ row_undo_ins_parse_undo_rec(
node->rec_type = type;
node->update = NULL;
- node->table = dict_table_open_on_id(table_id, dict_locked);
+ node->table = dict_table_open_on_id(table_id, dict_locked, FALSE);
/* Skip the UNDO if we can't find the table or the .ibd file. */
if (UNIV_UNLIKELY(node->table == NULL)) {
} else if (UNIV_UNLIKELY(node->table->ibd_file_missing)) {
- dict_table_close(node->table, dict_locked);
+close_table:
+ dict_table_close(node->table, dict_locked, FALSE);
node->table = NULL;
} else {
clust_index = dict_table_get_first_index(node->table);
@@ -286,10 +335,7 @@ row_undo_ins_parse_undo_rec(
ptr, clust_index, &node->ref, node->heap);
if (!row_undo_search_clust_to_pcur(node)) {
-
- dict_table_close(node->table, dict_locked);
-
- node->table = NULL;
+ goto close_table;
}
} else {
@@ -299,10 +345,7 @@ row_undo_ins_parse_undo_rec(
node->table->name);
fprintf(stderr, " has no indexes, "
"ignoring the table\n");
-
- dict_table_close(node->table, dict_locked);
-
- node->table = NULL;
+ goto close_table;
}
}
}
@@ -310,27 +353,32 @@ row_undo_ins_parse_undo_rec(
/***************************************************************//**
Removes secondary index records.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_ins_remove_sec_rec(
/*========================*/
undo_node_t* node) /*!< in/out: row undo node */
{
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
+ dict_index_t* index = node->index;
mem_heap_t* heap;
heap = mem_heap_create(1024);
- while (node->index != NULL) {
+ while (index != NULL) {
dtuple_t* entry;
- if (node->index->type & DICT_FTS) {
- dict_table_next_uncorrupted_index(node->index);
+ if (index->type & DICT_FTS) {
+ dict_table_next_uncorrupted_index(index);
continue;
}
- entry = row_build_index_entry(node->row, node->ext,
- node->index, heap);
+ /* An insert undo record TRX_UNDO_INSERT_REC will
+ always contain all fields of the index. It does not
+ matter if any indexes were created afterwards; all
+ index entries can be reconstructed from the row. */
+ entry = row_build_index_entry(
+ node->row, node->ext, index, heap);
if (UNIV_UNLIKELY(!entry)) {
/* The database must have crashed after
inserting a clustered index record but before
@@ -343,9 +391,7 @@ row_undo_ins_remove_sec_rec(
transactions. */
ut_a(trx_is_recv(node->trx));
} else {
- log_free_check();
-
- err = row_undo_ins_remove_sec(node->index, entry);
+ err = row_undo_ins_remove_sec(index, entry);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
goto func_exit;
@@ -353,10 +399,11 @@ row_undo_ins_remove_sec_rec(
}
mem_heap_empty(heap);
- dict_table_next_uncorrupted_index(node->index);
+ dict_table_next_uncorrupted_index(index);
}
func_exit:
+ node->index = index;
mem_heap_free(heap);
return(err);
}
@@ -369,15 +416,14 @@ if it figures out that an index record will be removed in the purge
anyway, it will remove it in the rollback.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
UNIV_INTERN
-ulint
+dberr_t
row_undo_ins(
/*=========*/
undo_node_t* node) /*!< in: row undo node */
{
- ulint err;
- ibool dict_locked;
+ dberr_t err;
+ ibool dict_locked;
- ut_ad(node);
ut_ad(node->state == UNDO_NODE_INSERT);
dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH;
@@ -392,24 +438,46 @@ row_undo_ins(
/* Iterate over all the indexes and undo the insert.*/
+ node->index = dict_table_get_first_index(node->table);
+ ut_ad(dict_index_is_clust(node->index));
+
+ if (dict_index_is_online_ddl(node->index)) {
+ /* Note that we are rolling back this transaction, so
+ that all inserts and updates with this DB_TRX_ID can
+ be skipped. */
+ row_log_table_rollback(node->index, node->trx->id);
+ }
+
/* Skip the clustered index (the first index) */
- node->index = dict_table_get_next_index(
- dict_table_get_first_index(node->table));
+ node->index = dict_table_get_next_index(node->index);
dict_table_skip_corrupt_index(node->index);
err = row_undo_ins_remove_sec_rec(node);
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- goto func_exit;
- }
+ if (err == DB_SUCCESS) {
- log_free_check();
+ log_free_check();
- err = row_undo_ins_remove_clust_rec(node);
+ if (node->table->id == DICT_INDEXES_ID) {
-func_exit:
- dict_table_close(node->table, dict_locked);
+ if (!dict_locked) {
+ mutex_enter(&dict_sys->mutex);
+ }
+ }
+
+ // FIXME: We need to update the dict_index_t::space and
+ // page number fields too.
+ err = row_undo_ins_remove_clust_rec(node);
+
+ if (node->table->id == DICT_INDEXES_ID
+ && !dict_locked) {
+
+ mutex_exit(&dict_sys->mutex);
+ }
+ }
+
+ dict_table_close(node->table, dict_locked, FALSE);
node->table = NULL;
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 4869909f5a6..c1a4ba76052 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -37,6 +37,7 @@ Created 2/27/1997 Heikki Tuuri
#include "mach0data.h"
#include "row0undo.h"
#include "row0vers.h"
+#include "row0log.h"
#include "trx0trx.h"
#include "trx0rec.h"
#include "row0row.h"
@@ -71,11 +72,20 @@ introduced where a call to log_free_check() is bypassed. */
/***********************************************************//**
Undoes a modify in a clustered index record.
@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_mod_clust_low(
/*===================*/
undo_node_t* node, /*!< in: row undo node */
+ ulint** offsets,/*!< out: rec_get_offsets() on the record */
+ mem_heap_t** offsets_heap,
+ /*!< in/out: memory heap that can be emptied */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ const dtuple_t**rebuilt_old_pk,
+ /*!< out: row_log_table_get_pk()
+ before the update, or NULL if
+ the table is not being rebuilt online or
+ the PRIMARY KEY definition does not change */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr, /*!< in: mtr; must be committed before
latching any further pages */
@@ -83,12 +93,12 @@ row_undo_mod_clust_low(
{
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
- ulint err;
+ dberr_t err;
#ifdef UNIV_DEBUG
ibool success;
#endif /* UNIV_DEBUG */
- pcur = &(node->pcur);
+ pcur = &node->pcur;
btr_cur = btr_pcur_get_btr_cur(pcur);
#ifdef UNIV_DEBUG
@@ -97,31 +107,40 @@ row_undo_mod_clust_low(
btr_pcur_restore_position(mode, pcur, mtr);
ut_ad(success);
+ ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur),
+ btr_cur_get_index(btr_cur))
+ == thr_get_trx(thr)->id);
+
+ if (mode != BTR_MODIFY_LEAF
+ && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) {
+ *rebuilt_old_pk = row_log_table_get_pk(
+ btr_cur_get_rec(btr_cur),
+ btr_cur_get_index(btr_cur), NULL, &heap);
+ } else {
+ *rebuilt_old_pk = NULL;
+ }
- if (mode == BTR_MODIFY_LEAF) {
+ if (mode != BTR_MODIFY_TREE) {
+ ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
- err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG
- | BTR_KEEP_SYS_FLAG,
- btr_cur, node->update,
- node->cmpl_info, thr, mtr);
+ err = btr_cur_optimistic_update(
+ BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG
+ | BTR_KEEP_SYS_FLAG,
+ btr_cur, offsets, offsets_heap,
+ node->update, node->cmpl_info,
+ thr, thr_get_trx(thr)->id, mtr);
} else {
- mem_heap_t* heap = NULL;
big_rec_t* dummy_big_rec;
- ut_ad(mode == BTR_MODIFY_TREE);
-
err = btr_cur_pessimistic_update(
BTR_NO_LOCKING_FLAG
| BTR_NO_UNDO_LOG_FLAG
| BTR_KEEP_SYS_FLAG,
- btr_cur, &heap, &dummy_big_rec, node->update,
- node->cmpl_info, thr, mtr);
+ btr_cur, offsets, offsets_heap, heap,
+ &dummy_big_rec, node->update,
+ node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
ut_a(!dummy_big_rec);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
}
return(err);
@@ -134,8 +153,8 @@ delete-marked record and there no longer exist transactions
that would see the delete-marked record. In other words, we
roll back the insert by purging the record.
@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_mod_remove_clust_low(
/*==========================*/
undo_node_t* node, /*!< in: row undo node */
@@ -144,7 +163,7 @@ row_undo_mod_remove_clust_low(
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
btr_cur_t* btr_cur;
- ulint err;
+ dberr_t err;
ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
@@ -159,8 +178,14 @@ row_undo_mod_remove_clust_low(
btr_cur = btr_pcur_get_btr_cur(&node->pcur);
+ /* We are about to remove an old, delete-marked version of the
+ record that may have been delete-marked by a different transaction
+ than the rolling-back one. */
+ ut_ad(rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
+ dict_table_is_comp(node->table)));
+
if (mode == BTR_MODIFY_LEAF) {
- err = btr_cur_optimistic_delete(btr_cur, mtr)
+ err = btr_cur_optimistic_delete(btr_cur, 0, mtr)
? DB_SUCCESS
: DB_FAIL;
} else {
@@ -169,7 +194,7 @@ row_undo_mod_remove_clust_low(
/* This operation is analogous to purge, we can free also
inherited externally stored fields */
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
thr_is_recv(thr)
? RB_RECOVERY_PURGE_REC
: RB_NONE, mtr);
@@ -186,8 +211,8 @@ row_undo_mod_remove_clust_low(
Undoes a modify in a clustered index record. Sets also the node state for the
next round of undo.
@return DB_SUCCESS or error code: we may run out of file space */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_mod_clust(
/*===============*/
undo_node_t* node, /*!< in: row undo node */
@@ -195,21 +220,42 @@ row_undo_mod_clust(
{
btr_pcur_t* pcur;
mtr_t mtr;
- ulint err;
+ dberr_t err;
+ dict_index_t* index;
+ bool online;
- ut_ad(node && thr);
+ ut_ad(thr_get_trx(thr) == node->trx);
+ ut_ad(node->trx->dict_operation_lock_mode);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)
+ || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
log_free_check();
+ pcur = &node->pcur;
+ index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
+ mtr_start(&mtr);
- pcur = &(node->pcur);
+ online = dict_index_is_online_ddl(index);
+ if (online) {
+ ut_ad(node->trx->dict_operation_lock_mode != RW_X_LATCH);
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ }
- mtr_start(&mtr);
+ mem_heap_t* heap = mem_heap_create(1024);
+ mem_heap_t* offsets_heap = NULL;
+ ulint* offsets = NULL;
+ const dtuple_t* rebuilt_old_pk;
/* Try optimistic processing of the record, keeping changes within
the index page */
- err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_LEAF);
+ err = row_undo_mod_clust_low(node, &offsets, &offsets_heap,
+ heap, &rebuilt_old_pk,
+ thr, &mtr, online
+ ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+ : BTR_MODIFY_LEAF);
if (err != DB_SUCCESS) {
btr_pcur_commit_specify_mtr(pcur, &mtr);
@@ -219,7 +265,40 @@ row_undo_mod_clust(
mtr_start(&mtr);
- err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE);
+ err = row_undo_mod_clust_low(
+ node, &offsets, &offsets_heap, heap, &rebuilt_old_pk,
+ thr, &mtr, BTR_MODIFY_TREE);
+ ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE);
+ }
+
+ /* Online rebuild cannot be initiated while we are holding
+ dict_operation_lock and index->lock. (It can be aborted.) */
+ ut_ad(online || !dict_index_is_online_ddl(index));
+
+ if (err == DB_SUCCESS && online) {
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
+ || rw_lock_own(&index->lock, RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+ switch (node->rec_type) {
+ case TRX_UNDO_DEL_MARK_REC:
+ row_log_table_insert(
+ btr_pcur_get_rec(pcur), index, offsets);
+ break;
+ case TRX_UNDO_UPD_EXIST_REC:
+ row_log_table_update(
+ btr_pcur_get_rec(pcur), index, offsets,
+ rebuilt_old_pk);
+ break;
+ case TRX_UNDO_UPD_DEL_REC:
+ row_log_table_delete(
+ btr_pcur_get_rec(pcur), index, offsets,
+ node->trx->id);
+ break;
+ default:
+ ut_ad(0);
+ break;
+ }
}
btr_pcur_commit_specify_mtr(pcur, &mtr);
@@ -228,8 +307,11 @@ row_undo_mod_clust(
mtr_start(&mtr);
- err = row_undo_mod_remove_clust_low(node, thr, &mtr,
- BTR_MODIFY_LEAF);
+ /* It is not necessary to call row_log_table,
+ because the record is delete-marked and would thus
+ be omitted from the rebuilt copy of the table. */
+ err = row_undo_mod_remove_clust_low(
+ node, thr, &mtr, BTR_MODIFY_LEAF);
if (err != DB_SUCCESS) {
btr_pcur_commit_specify_mtr(pcur, &mtr);
@@ -240,6 +322,9 @@ row_undo_mod_clust(
err = row_undo_mod_remove_clust_low(node, thr, &mtr,
BTR_MODIFY_TREE);
+
+ ut_ad(err == DB_SUCCESS
+ || err == DB_OUT_OF_FILE_SPACE);
}
btr_pcur_commit_specify_mtr(pcur, &mtr);
@@ -249,14 +334,18 @@ row_undo_mod_clust(
trx_undo_rec_release(node->trx, node->undo_no);
+ if (offsets_heap) {
+ mem_heap_free(offsets_heap);
+ }
+ mem_heap_free(heap);
return(err);
}
/***********************************************************//**
Delete marks or removes a secondary index entry if found.
@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_mod_del_mark_or_remove_sec_low(
/*====================================*/
undo_node_t* node, /*!< in: row undo node */
@@ -270,7 +359,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
btr_cur_t* btr_cur;
ibool success;
ibool old_has;
- ulint err;
+ dberr_t err = DB_SUCCESS;
mtr_t mtr;
mtr_t mtr_vers;
enum row_search_result search_result;
@@ -278,9 +367,30 @@ row_undo_mod_del_mark_or_remove_sec_low(
log_free_check();
mtr_start(&mtr);
- btr_cur = btr_pcur_get_btr_cur(&pcur);
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ /* The index->online_status may change if the
+ index->name starts with TEMP_INDEX_PREFIX (meaning
+ that the index is or was being created online). It is
+ protected by index->lock. */
+ if (mode == BTR_MODIFY_LEAF) {
+ mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
+ }
+
+ if (row_log_online_op_try(index, entry, 0)) {
+ goto func_exit_no_pcur;
+ }
+ } else {
+ /* For secondary indexes,
+ index->online_status==ONLINE_INDEX_CREATION unless
+ index->name starts with TEMP_INDEX_PREFIX. */
+ ut_ad(!dict_index_is_online_ddl(index));
+ }
- ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF);
+ btr_cur = btr_pcur_get_btr_cur(&pcur);
search_result = row_search_index_entry(index, entry, mode,
&pcur, &mtr);
@@ -296,8 +406,6 @@ row_undo_mod_del_mark_or_remove_sec_low(
In normal processing, if an update ends in a deadlock
before it has inserted all updated secondary index
records, then the undo will not find those records. */
-
- err = DB_SUCCESS;
goto func_exit;
case ROW_FOUND:
break;
@@ -329,16 +437,14 @@ row_undo_mod_del_mark_or_remove_sec_low(
} else {
/* Remove the index record */
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(btr_cur, &mtr);
+ if (mode != BTR_MODIFY_TREE) {
+ success = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
if (success) {
err = DB_SUCCESS;
} else {
err = DB_FAIL;
}
} else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
/* No need to distinguish RB_RECOVERY_PURGE here,
because we are deleting a secondary index record:
the distinction between RB_NORMAL and
@@ -346,7 +452,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
record that contains externally stored
columns. */
ut_ad(!dict_index_is_clust(index));
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+ btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
RB_NORMAL, &mtr);
/* The delete operation may fail if we have little
@@ -359,6 +465,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
func_exit:
btr_pcur_close(&pcur);
+func_exit_no_pcur:
mtr_commit(&mtr);
return(err);
@@ -373,8 +480,8 @@ not cause problems because in row0sel.cc, in queries we always retrieve the
clustered index record or an earlier version of it, if the secondary index
record through which we do the search is delete-marked.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_mod_del_mark_or_remove_sec(
/*================================*/
undo_node_t* node, /*!< in: row undo node */
@@ -382,7 +489,7 @@ row_undo_mod_del_mark_or_remove_sec(
dict_index_t* index, /*!< in: index */
dtuple_t* entry) /*!< in: index entry */
{
- ulint err;
+ dberr_t err;
err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
entry, BTR_MODIFY_LEAF);
@@ -401,42 +508,67 @@ Delete unmarks a secondary index entry which must be found. It might not be
delete-marked at the moment, but it does not harm to unmark it anyway. We also
need to update the fields of the secondary index record if we updated its
fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
-@return DB_FAIL or DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+@retval DB_SUCCESS on success
+@retval DB_FAIL if BTR_MODIFY_TREE should be tried
+@retval DB_OUT_OF_FILE_SPACE when running out of tablespace
+@retval DB_DUPLICATE_KEY if the value was missing
+ and an insert would lead to a duplicate exists */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_mod_del_unmark_sec_and_undo_update(
/*========================================*/
ulint mode, /*!< in: search mode: BTR_MODIFY_LEAF or
BTR_MODIFY_TREE */
que_thr_t* thr, /*!< in: query thread */
dict_index_t* index, /*!< in: index */
- const dtuple_t* entry) /*!< in: index entry */
+ dtuple_t* entry) /*!< in: index entry */
{
- mem_heap_t* heap;
btr_pcur_t pcur;
- btr_cur_t* btr_cur;
+ btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
upd_t* update;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
big_rec_t* dummy_big_rec;
mtr_t mtr;
trx_t* trx = thr_get_trx(thr);
+ const ulint flags
+ = BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
enum row_search_result search_result;
- /* Ignore indexes that are being created. */
- if (UNIV_UNLIKELY(*index->name == TEMP_INDEX_PREFIX)) {
-
- return(DB_SUCCESS);
- }
+ ut_ad(trx->id);
log_free_check();
mtr_start(&mtr);
- ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF);
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ /* The index->online_status may change if the
+ index->name starts with TEMP_INDEX_PREFIX (meaning
+ that the index is or was being created online). It is
+ protected by index->lock. */
+ if (mode == BTR_MODIFY_LEAF) {
+ mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+ mtr_x_lock(dict_index_get_lock(index), &mtr);
+ }
+
+ if (row_log_online_op_try(index, entry, trx->id)) {
+ goto func_exit_no_pcur;
+ }
+ } else {
+ /* For secondary indexes,
+ index->online_status==ONLINE_INDEX_CREATION unless
+ index->name starts with TEMP_INDEX_PREFIX. */
+ ut_ad(!dict_index_is_online_ddl(index));
+ }
search_result = row_search_index_entry(index, entry, mode,
&pcur, &mtr);
switch (search_result) {
+ mem_heap_t* heap;
+ mem_heap_t* offsets_heap;
+ ulint* offsets;
case ROW_BUFFERED:
case ROW_NOT_DELETED_REF:
/* These are invalid outcomes, because the mode passed
@@ -444,80 +576,183 @@ row_undo_mod_del_unmark_sec_and_undo_update(
flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
ut_error;
case ROW_NOT_FOUND:
- fputs("InnoDB: error in sec index entry del undo in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, btr_pcur_get_rec(&pcur), index);
- putc('\n', stderr);
- trx_print(stderr, trx, 0);
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- ut_ad(0);
+ if (*index->name != TEMP_INDEX_PREFIX) {
+ /* During online secondary index creation, it
+ is possible that MySQL is waiting for a
+ meta-data lock upgrade before invoking
+ ha_innobase::commit_inplace_alter_table()
+ while this ROLLBACK is executing. InnoDB has
+ finished building the index, but it does not
+ yet exist in MySQL. In this case, we suppress
+ the printout to the error log. */
+ fputs("InnoDB: error in sec index entry del undo in\n"
+ "InnoDB: ", stderr);
+ dict_index_name_print(stderr, trx, index);
+ fputs("\n"
+ "InnoDB: tuple ", stderr);
+ dtuple_print(stderr, entry);
+ fputs("\n"
+ "InnoDB: record ", stderr);
+ rec_print(stderr, btr_pcur_get_rec(&pcur), index);
+ putc('\n', stderr);
+ trx_print(stderr, trx, 0);
+ fputs("\n"
+ "InnoDB: Submit a detailed bug report"
+ " to http://bugs.mysql.com\n", stderr);
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "record in index %s was not found"
+ " on rollback, trying to insert",
+ index->name);
+ }
+
+ if (btr_cur->up_match >= dict_index_get_n_unique(index)
+ || btr_cur->low_match >= dict_index_get_n_unique(index)) {
+ if (*index->name != TEMP_INDEX_PREFIX) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "record in index %s was not found on"
+ " rollback, and a duplicate exists",
+ index->name);
+ }
+ err = DB_DUPLICATE_KEY;
+ break;
+ }
+
+ /* Insert the missing record that we were trying to
+ delete-unmark. */
+ big_rec_t* big_rec;
+ rec_t* insert_rec;
+ offsets = NULL;
+ offsets_heap = NULL;
+
+ err = btr_cur_optimistic_insert(
+ flags, btr_cur, &offsets, &offsets_heap,
+ entry, &insert_rec, &big_rec,
+ 0, thr, &mtr);
+ ut_ad(!big_rec);
+
+ if (err == DB_FAIL && mode == BTR_MODIFY_TREE) {
+ err = btr_cur_pessimistic_insert(
+ flags, btr_cur,
+ &offsets, &offsets_heap,
+ entry, &insert_rec, &big_rec,
+ 0, thr, &mtr);
+ /* There are no off-page columns in
+ secondary indexes. */
+ ut_ad(!big_rec);
+ }
+
+ if (err == DB_SUCCESS) {
+ page_update_max_trx_id(
+ btr_cur_get_block(btr_cur),
+ btr_cur_get_page_zip(btr_cur),
+ trx->id, &mtr);
+ }
+
+ if (offsets_heap) {
+ mem_heap_free(offsets_heap);
+ }
+
break;
case ROW_FOUND:
- btr_cur = btr_pcur_get_btr_cur(&pcur);
- err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
- btr_cur, FALSE, thr, &mtr);
+ err = btr_cur_del_mark_set_sec_rec(
+ BTR_NO_LOCKING_FLAG,
+ btr_cur, FALSE, thr, &mtr);
ut_a(err == DB_SUCCESS);
- heap = mem_heap_create(100);
-
+ heap = mem_heap_create(
+ sizeof(upd_t)
+ + dtuple_get_n_fields(entry) * sizeof(upd_field_t));
+ offsets_heap = NULL;
+ offsets = rec_get_offsets(
+ btr_cur_get_rec(btr_cur),
+ index, NULL, ULINT_UNDEFINED, &offsets_heap);
update = row_upd_build_sec_rec_difference_binary(
- index, entry, btr_cur_get_rec(btr_cur), trx, heap);
+ btr_cur_get_rec(btr_cur), index, offsets, entry, heap);
if (upd_get_n_fields(update) == 0) {
/* Do nothing */
- } else if (mode == BTR_MODIFY_LEAF) {
+ } else if (mode != BTR_MODIFY_TREE) {
/* Try an optimistic updating of the record, keeping
changes within the page */
+ /* TODO: pass offsets, not &offsets */
err = btr_cur_optimistic_update(
- BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
- btr_cur, update, 0, thr, &mtr);
+ flags, btr_cur, &offsets, &offsets_heap,
+ update, 0, thr, thr_get_trx(thr)->id, &mtr);
switch (err) {
case DB_OVERFLOW:
case DB_UNDERFLOW:
case DB_ZIP_OVERFLOW:
err = DB_FAIL;
+ default:
+ break;
}
} else {
- ut_a(mode == BTR_MODIFY_TREE);
err = btr_cur_pessimistic_update(
- BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG,
- btr_cur, &heap, &dummy_big_rec,
- update, 0, thr, &mtr);
+ flags, btr_cur, &offsets, &offsets_heap,
+ heap, &dummy_big_rec,
+ update, 0, thr, thr_get_trx(thr)->id, &mtr);
ut_a(!dummy_big_rec);
}
mem_heap_free(heap);
+ mem_heap_free(offsets_heap);
}
btr_pcur_close(&pcur);
+func_exit_no_pcur:
mtr_commit(&mtr);
return(err);
}
/***********************************************************//**
+Flags a secondary index corrupted. */
+static __attribute__((nonnull))
+void
+row_undo_mod_sec_flag_corrupted(
+/*============================*/
+ trx_t* trx, /*!< in/out: transaction */
+ dict_index_t* index) /*!< in: secondary index */
+{
+ ut_ad(!dict_index_is_clust(index));
+
+ switch (trx->dict_operation_lock_mode) {
+ case RW_S_LATCH:
+ /* Because row_undo() is holding an S-latch
+ on the data dictionary during normal rollback,
+ we can only mark the index corrupted in the
+ data dictionary cache. TODO: fix this somehow.*/
+ mutex_enter(&dict_sys->mutex);
+ dict_set_corrupted_index_cache_only(index, index->table);
+ mutex_exit(&dict_sys->mutex);
+ break;
+ default:
+ ut_ad(0);
+ /* fall through */
+ case RW_X_LATCH:
+ /* This should be the rollback of a data dictionary
+ transaction. */
+ dict_set_corrupted(index, trx, "rollback");
+ }
+}
+
+/***********************************************************//**
Undoes a modify in secondary indexes when undo record type is UPD_DEL.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_mod_upd_del_sec(
/*=====================*/
undo_node_t* node, /*!< in: row undo node */
que_thr_t* thr) /*!< in: query thread */
{
mem_heap_t* heap;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
+ ut_ad(!node->undo_row);
heap = mem_heap_create(1024);
@@ -530,6 +765,13 @@ row_undo_mod_upd_del_sec(
continue;
}
+ /* During online index creation,
+ HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
+ guarantee that any active transaction has not modified
+ indexed columns such that col->ord_part was 0 at the
+ time when the undo log record was written. When we get
+ to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
+ it should always cover all affected indexes. */
entry = row_build_index_entry(
node->row, node->ext, index, heap);
@@ -566,15 +808,17 @@ row_undo_mod_upd_del_sec(
/***********************************************************//**
Undoes a modify in secondary indexes when undo record type is DEL_MARK.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_mod_del_mark_sec(
/*======================*/
undo_node_t* node, /*!< in: row undo node */
que_thr_t* thr) /*!< in: query thread */
{
mem_heap_t* heap;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
+
+ ut_ad(!node->undo_row);
heap = mem_heap_create(1024);
@@ -587,6 +831,13 @@ row_undo_mod_del_mark_sec(
continue;
}
+ /* During online index creation,
+ HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
+ guarantee that any active transaction has not modified
+ indexed columns such that col->ord_part was 0 at the
+ time when the undo log record was written. When we get
+ to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
+ it should always cover all affected indexes. */
entry = row_build_index_entry(
node->row, node->ext, index, heap);
@@ -599,8 +850,17 @@ row_undo_mod_del_mark_sec(
BTR_MODIFY_TREE, thr, index, entry);
}
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-
+ if (err == DB_DUPLICATE_KEY) {
+ row_undo_mod_sec_flag_corrupted(
+ thr_get_trx(thr), index);
+ err = DB_SUCCESS;
+ /* Do not return any error to the caller. The
+ duplicate will be reported by ALTER TABLE or
+ CREATE UNIQUE INDEX. Unfortunately we cannot
+ report the duplicate key value to the DDL
+ thread, because the altered_table object is
+ private to its call stack. */
+ } else if (err != DB_SUCCESS) {
break;
}
@@ -616,18 +876,18 @@ row_undo_mod_del_mark_sec(
/***********************************************************//**
Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo_mod_upd_exist_sec(
/*=======================*/
undo_node_t* node, /*!< in: row undo node */
que_thr_t* thr) /*!< in: query thread */
{
mem_heap_t* heap;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
if (node->index == NULL
- || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
+ || ((node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) {
/* No change in secondary indexes */
return(err);
@@ -713,7 +973,11 @@ row_undo_mod_upd_exist_sec(
BTR_MODIFY_TREE, thr, index, entry);
}
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
+ if (err == DB_DUPLICATE_KEY) {
+ row_undo_mod_sec_flag_corrupted(
+ thr_get_trx(thr), index);
+ err = DB_SUCCESS;
+ } else if (err != DB_SUCCESS) {
break;
}
@@ -728,12 +992,11 @@ row_undo_mod_upd_exist_sec(
/***********************************************************//**
Parses the row reference and other info in a modify undo log record. */
-static
+static __attribute__((nonnull))
void
row_undo_mod_parse_undo_rec(
/*========================*/
undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr, /*!< in: query thread */
ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */
{
dict_index_t* clust_index;
@@ -745,16 +1008,13 @@ row_undo_mod_parse_undo_rec(
ulint info_bits;
ulint type;
ulint cmpl_info;
- ibool dummy_extern;
- trx_t* trx;
+ bool dummy_extern;
- ut_ad(node && thr);
- trx = thr_get_trx(thr);
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
&dummy_extern, &undo_no, &table_id);
node->rec_type = type;
- node->table = dict_table_open_on_id(table_id, dict_locked);
+ node->table = dict_table_open_on_id(table_id, dict_locked, FALSE);
/* TODO: other fixes associated with DROP TABLE + rollback in the
same table by another user */
@@ -765,7 +1025,7 @@ row_undo_mod_parse_undo_rec(
}
if (node->table->ibd_file_missing) {
- dict_table_close(node->table, dict_locked);
+ dict_table_close(node->table, dict_locked, FALSE);
/* We skip undo operations to missing .ibd files */
node->table = NULL;
@@ -782,14 +1042,14 @@ row_undo_mod_parse_undo_rec(
node->heap);
trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
- roll_ptr, info_bits, trx,
+ roll_ptr, info_bits, node->trx,
node->heap, &(node->update));
node->new_trx_id = trx_id;
node->cmpl_info = cmpl_info;
if (!row_undo_search_clust_to_pcur(node)) {
- dict_table_close(node->table, dict_locked);
+ dict_table_close(node->table, dict_locked, FALSE);
node->table = NULL;
}
@@ -799,21 +1059,23 @@ row_undo_mod_parse_undo_rec(
Undoes a modify operation on a row of a table.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
row_undo_mod(
/*=========*/
undo_node_t* node, /*!< in: row undo node */
que_thr_t* thr) /*!< in: query thread */
{
- ulint err;
- ibool dict_locked;
+ dberr_t err;
+ ibool dict_locked;
ut_ad(node && thr);
ut_ad(node->state == UNDO_NODE_MODIFY);
dict_locked = thr_get_trx(thr)->dict_operation_lock_mode == RW_X_LATCH;
- row_undo_mod_parse_undo_rec(node, thr, dict_locked);
+ ut_ad(thr_get_trx(thr) == node->trx);
+
+ row_undo_mod_parse_undo_rec(node, dict_locked);
if (node->table == NULL) {
/* It is already undone, or will be undone by another query
@@ -825,8 +1087,18 @@ row_undo_mod(
return(DB_SUCCESS);
}
- node->index = dict_table_get_next_index(
- dict_table_get_first_index(node->table));
+ node->index = dict_table_get_first_index(node->table);
+ ut_ad(dict_index_is_clust(node->index));
+
+ if (dict_index_is_online_ddl(node->index)) {
+ /* Note that we are rolling back this transaction, so
+ that all inserts and updates with this DB_TRX_ID can
+ be skipped. */
+ row_log_table_rollback(node->index, node->trx->id);
+ }
+
+ /* Skip the clustered index (the first index) */
+ node->index = dict_table_get_next_index(node->index);
/* Skip all corrupted secondary index */
dict_table_skip_corrupt_index(node->index);
@@ -851,7 +1123,7 @@ row_undo_mod(
err = row_undo_mod_clust(node, thr);
}
- dict_table_close(node->table, dict_locked);
+ dict_table_close(node->table, dict_locked, FALSE);
node->table = NULL;
diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc
index 757d3544ba4..9977a1e8f04 100644
--- a/storage/innobase/row/row0undo.cc
+++ b/storage/innobase/row/row0undo.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -216,8 +216,9 @@ row_undo_search_clust_to_pcur(
}
node->row = row_build(ROW_COPY_DATA, clust_index, rec,
- offsets, NULL, ext, node->heap);
- if (node->update) {
+ offsets, NULL,
+ NULL, NULL, ext, node->heap);
+ if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
node->undo_row = dtuple_copy(node->row, node->heap);
row_upd_replace(node->undo_row, &node->undo_ext,
clust_index, node->update, node->heap);
@@ -244,14 +245,14 @@ Fetches an undo log record and does the undo for the recorded operation.
If none left, or a partial rollback completed, returns control to the
parent node, which is always a query thread node.
@return DB_SUCCESS if operation successfully completed, else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_undo(
/*=====*/
undo_node_t* node, /*!< in: row undo node */
que_thr_t* thr) /*!< in: query thread */
{
- ulint err;
+ dberr_t err;
trx_t* trx;
roll_ptr_t roll_ptr;
ibool locked_data_dict;
@@ -332,7 +333,7 @@ row_undo_step(
/*==========*/
que_thr_t* thr) /*!< in: query thread */
{
- ulint err;
+ dberr_t err;
undo_node_t* node;
trx_t* trx;
@@ -348,17 +349,17 @@ row_undo_step(
err = row_undo(node, thr);
- trx->error_state = static_cast<enum db_err>(err);
+ trx->error_state = err;
if (err != DB_SUCCESS) {
/* SQL error detected */
- fprintf(stderr, "InnoDB: Fatal error %lu in rollback.\n",
- (ulong) err);
+ fprintf(stderr, "InnoDB: Fatal error (%s) in rollback.\n",
+ ut_strerr(err));
if (err == DB_OUT_OF_FILE_SPACE) {
fprintf(stderr,
- "InnoDB: Error 13 means out of tablespace.\n"
+ "InnoDB: Out of tablespace.\n"
"InnoDB: Consider increasing"
" your tablespace.\n");
diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc
index 28faa59add8..f97c0c3c82b 100644
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@@ -23,14 +23,13 @@ Update of a row
Created 12/27/1996 Heikki Tuuri
*******************************************************/
-#include "m_string.h" /* for my_sys.h */
-#include "my_sys.h" /* DEBUG_SYNC_C */
#include "row0upd.h"
#ifdef UNIV_NONINL
#include "row0upd.ic"
#endif
+#include "ha_prototypes.h"
#include "dict0dict.h"
#include "trx0undo.h"
#include "rem0rec.h"
@@ -43,8 +42,9 @@ Created 12/27/1996 Heikki Tuuri
#include "que0que.h"
#include "row0ext.h"
#include "row0ins.h"
-#include "row0sel.h"
+#include "row0log.h"
#include "row0row.h"
+#include "row0sel.h"
#include "rem0cmp.h"
#include "lock0lock.h"
#include "log0log.h"
@@ -178,8 +178,8 @@ NOTE that this function will temporarily commit mtr and lose the
pcur position!
@return DB_SUCCESS or an error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_upd_check_references_constraints(
/*=================================*/
upd_node_t* node, /*!< in: row update node */
@@ -197,7 +197,7 @@ row_upd_check_references_constraints(
trx_t* trx;
const rec_t* rec;
ulint n_ext;
- ulint err;
+ dberr_t err;
ibool got_s_lock = FALSE;
if (UT_LIST_GET_FIRST(table->referenced_list) == NULL) {
@@ -212,11 +212,12 @@ row_upd_check_references_constraints(
heap = mem_heap_create(500);
- entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index, offsets,
- &n_ext, heap);
+ entry = row_rec_to_index_entry(rec, index, offsets, &n_ext, heap);
mtr_commit(mtr);
+ DEBUG_SYNC_C("foreign_constraint_check_for_update");
+
mtr_start(mtr);
if (trx->dict_operation_lock_mode == 0) {
@@ -225,6 +226,7 @@ row_upd_check_references_constraints(
row_mysql_freeze_data_dictionary(trx);
}
+run_again:
foreign = UT_LIST_GET_FIRST(table->referenced_list);
while (foreign) {
@@ -238,18 +240,20 @@ row_upd_check_references_constraints(
|| row_upd_changes_first_fields_binary(
entry, index, node->update,
foreign->n_fields))) {
+ dict_table_t* foreign_table = foreign->foreign_table;
dict_table_t* ref_table = NULL;
- if (foreign->foreign_table == NULL) {
+ if (foreign_table == NULL) {
ref_table = dict_table_open_on_name(
- foreign->foreign_table_name_lookup, FALSE);
+ foreign->foreign_table_name_lookup,
+ FALSE, FALSE, DICT_ERR_IGNORE_NONE);
}
- if (foreign->foreign_table) {
+ if (foreign_table) {
os_inc_counter(dict_sys->mutex,
- foreign->foreign_table
+ foreign_table
->n_foreign_key_checks_running);
}
@@ -261,18 +265,20 @@ row_upd_check_references_constraints(
err = row_ins_check_foreign_constraint(
FALSE, foreign, table, entry, thr);
- if (foreign->foreign_table) {
+ if (foreign_table) {
os_dec_counter(dict_sys->mutex,
- foreign->foreign_table
+ foreign_table
->n_foreign_key_checks_running);
}
if (ref_table != NULL) {
- dict_table_close(ref_table, FALSE);
+ dict_table_close(ref_table, FALSE, FALSE);
}
- if (err != DB_SUCCESS) {
-
+ /* Some table foreign key dropped, try again */
+ if (err == DB_DICT_CHANGED) {
+ goto run_again;
+ } else if (err != DB_SUCCESS) {
goto func_exit;
}
}
@@ -289,6 +295,8 @@ func_exit:
mem_heap_free(heap);
+ DEBUG_SYNC_C("foreign_constraint_check_for_update_done");
+
return(err);
}
@@ -465,6 +473,47 @@ row_upd_changes_field_size_or_external(
return(FALSE);
}
+
+/***********************************************************//**
+Returns true if row update contains disowned external fields.
+@return true if the update contains disowned external fields. */
+UNIV_INTERN
+bool
+row_upd_changes_disowned_external(
+/*==============================*/
+ const upd_t* update) /*!< in: update vector */
+{
+ const upd_field_t* upd_field;
+ const dfield_t* new_val;
+ ulint new_len;
+ ulint n_fields;
+ ulint i;
+
+ n_fields = upd_get_n_fields(update);
+
+ for (i = 0; i < n_fields; i++) {
+ const byte* field_ref;
+
+ upd_field = upd_get_nth_field(update, i);
+ new_val = &(upd_field->new_val);
+ new_len = dfield_get_len(new_val);
+
+ if (!dfield_is_ext(new_val)) {
+ continue;
+ }
+
+ ut_ad(new_len >= BTR_EXTERN_FIELD_REF_SIZE);
+
+ field_ref = static_cast<const byte*>(dfield_get_data(new_val))
+ + new_len - BTR_EXTERN_FIELD_REF_SIZE;
+
+ if (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG) {
+ return(true);
+ }
+ }
+
+ return(false);
+}
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
@@ -560,7 +609,7 @@ byte*
row_upd_write_sys_vals_to_log(
/*==========================*/
dict_index_t* index, /*!< in: clustered index */
- trx_t* trx, /*!< in: transaction */
+ trx_id_t trx_id, /*!< in: transaction id */
roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */
byte* log_ptr,/*!< pointer to a buffer of size > 20 opened
in mlog */
@@ -576,7 +625,7 @@ row_upd_write_sys_vals_to_log(
trx_write_roll_ptr(log_ptr, roll_ptr);
log_ptr += DATA_ROLL_PTR_LEN;
- log_ptr += mach_ull_write_compressed(log_ptr, trx->id);
+ log_ptr += mach_ull_write_compressed(log_ptr, trx_id);
return(log_ptr);
}
@@ -779,10 +828,10 @@ UNIV_INTERN
upd_t*
row_upd_build_sec_rec_difference_binary(
/*====================================*/
+ const rec_t* rec, /*!< in: secondary index record */
dict_index_t* index, /*!< in: index */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
const dtuple_t* entry, /*!< in: entry to insert */
- const rec_t* rec, /*!< in: secondary index record */
- trx_t* trx, /*!< in: transaction */
mem_heap_t* heap) /*!< in: memory heap from which allocated */
{
upd_field_t* upd_field;
@@ -792,18 +841,16 @@ row_upd_build_sec_rec_difference_binary(
upd_t* update;
ulint n_diff;
ulint i;
- ulint offsets_[REC_OFFS_SMALL_SIZE];
- const ulint* offsets;
- rec_offs_init(offsets_);
/* This function is used only for a secondary index */
ut_a(!dict_index_is_clust(index));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ ut_ad(rec_offs_n_fields(offsets) == dtuple_get_n_fields(entry));
+ ut_ad(!rec_offs_any_extern(offsets));
update = upd_create(dtuple_get_n_fields(entry), heap);
n_diff = 0;
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
for (i = 0; i < dtuple_get_n_fields(entry); i++) {
@@ -828,7 +875,7 @@ row_upd_build_sec_rec_difference_binary(
dfield_copy(&(upd_field->new_val), dfield);
- upd_field_set_field_no(upd_field, i, index, trx);
+ upd_field_set_field_no(upd_field, i, index, NULL);
n_diff++;
}
@@ -846,12 +893,15 @@ the equal ordering fields. NOTE: we compare the fields as binary strings!
@return own: update vector of differing fields, excluding roll ptr and
trx id */
UNIV_INTERN
-upd_t*
+const upd_t*
row_upd_build_difference_binary(
/*============================*/
dict_index_t* index, /*!< in: clustered index */
const dtuple_t* entry, /*!< in: entry to insert */
const rec_t* rec, /*!< in: clustered index record */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
+ bool no_sys, /*!< in: skip the system columns
+ DB_TRX_ID and DB_ROLL_PTR */
trx_t* trx, /*!< in: transaction */
mem_heap_t* heap) /*!< in: memory heap from which allocated */
{
@@ -861,11 +911,9 @@ row_upd_build_difference_binary(
ulint len;
upd_t* update;
ulint n_diff;
- ulint roll_ptr_pos;
ulint trx_id_pos;
ulint i;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
rec_offs_init(offsets_);
/* This function is used only for a clustered index */
@@ -875,11 +923,16 @@ row_upd_build_difference_binary(
n_diff = 0;
- roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR);
trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+ ut_ad(dict_index_get_sys_col_pos(index, DATA_ROLL_PTR)
+ == trx_id_pos + 1);
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
+ if (!offsets) {
+ offsets = rec_get_offsets(rec, index, offsets_,
+ ULINT_UNDEFINED, &heap);
+ } else {
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ }
for (i = 0; i < dtuple_get_n_fields(entry); i++) {
@@ -890,9 +943,9 @@ row_upd_build_difference_binary(
/* NOTE: we compare the fields as binary strings!
(No collation) */
- if (i == trx_id_pos || i == roll_ptr_pos) {
+ if (no_sys && (i == trx_id_pos || i == trx_id_pos + 1)) {
- goto skip_compare;
+ continue;
}
if (!dfield_is_ext(dfield)
@@ -907,8 +960,6 @@ row_upd_build_difference_binary(
n_diff++;
}
-skip_compare:
- ;
}
update->n_fields = n_diff;
@@ -1386,9 +1437,9 @@ row_upd_changes_some_index_ord_field_binary(
/***********************************************************//**
Checks if an FTS Doc ID column is affected by an UPDATE.
-@return TRUE if the Doc ID column is changed */
+@return whether the Doc ID column is changed */
UNIV_INTERN
-ulint
+bool
row_upd_changes_doc_id(
/*===================*/
dict_table_t* table, /*!< in: table */
@@ -1431,61 +1482,6 @@ row_upd_changes_fts_column(
}
/***********************************************************//**
-Checks if an update vector changes the table's FTS-indexed columns.
-NOTE: must not be called for tables which do not have an FTS-index.
-Also, the vector returned must be explicitly freed as it's allocated
-using the ut_malloc() allocator.
-@return vector of FTS indexes that were affected by the update */
-UNIV_INTERN
-ib_vector_t*
-row_upd_changes_fts_columns(
-/*========================*/
- dict_table_t* table, /*!< in: table */
- upd_t* update) /*!< in: update vector for the row */
-{
- ulint i;
- ulint offset;
- fts_t* fts = table->fts;
- ib_vector_t* updated_fts_indexes = NULL;
-
- for (i = 0; i < upd_get_n_fields(update); ++i) {
- upd_field_t* upd_field = upd_get_nth_field(update, i);
-
- offset = row_upd_changes_fts_column(table, upd_field);
-
- if (offset != ULINT_UNDEFINED) {
-
- dict_index_t* index;
-
- /* TODO: Investigate if we can check whether the
- existing set of affected indexes matches the new
- affected set. If matched then we don't need to
- do the extra malloc()/free(). */
-
- /* This vector is created from the ut_malloc()
- allocator because we only want to keep one instance
- around not matter how many times this row is
- updated. The old entry should be deleted when
- we update the FTS row info with this new vector. */
- if (updated_fts_indexes == NULL) {
- ib_alloc_t* ut_alloc;
-
- ut_alloc = ib_ut_allocator_create();
-
- updated_fts_indexes = ib_vector_create(
- ut_alloc, sizeof(dict_index_t*), 2);
- }
-
- index = static_cast<dict_index_t*>(
- ib_vector_getp(fts->indexes, offset));
- ib_vector_push(updated_fts_indexes, &index);
- }
- }
-
- return(updated_fts_indexes);
-}
-
-/***********************************************************//**
Checks if an update vector changes some of the first ordering fields of an
index record. This is only used in foreign key checks and we can assume
that index does not contain column prefixes.
@@ -1633,7 +1629,7 @@ row_upd_store_row(
}
node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
- NULL, ext, node->heap);
+ NULL, NULL, NULL, ext, node->heap);
if (node->is_delete) {
node->upd_row = NULL;
node->upd_ext = NULL;
@@ -1652,8 +1648,8 @@ row_upd_store_row(
Updates a secondary index entry of a row.
@return DB_SUCCESS if operation successfully completed, else error
code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_upd_sec_index_entry(
/*====================*/
upd_node_t* node, /*!< in: row update node */
@@ -1667,11 +1663,13 @@ row_upd_sec_index_entry(
dict_index_t* index;
btr_cur_t* btr_cur;
ibool referenced;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
trx_t* trx = thr_get_trx(thr);
- ulint mode = BTR_MODIFY_LEAF;
+ ulint mode;
enum row_search_result search_result;
+ ut_ad(trx->id);
+
index = node->index;
referenced = row_upd_index_is_referenced(index, trx);
@@ -1682,19 +1680,74 @@ row_upd_sec_index_entry(
entry = row_build_index_entry(node->row, node->ext, index, heap);
ut_a(entry);
+ log_free_check();
+
+#ifdef UNIV_DEBUG
+ /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
+ Once it is fixed, remove the 'ifdef', 'if' and this comment. */
+ if (!trx->ddl) {
+ DEBUG_SYNC_C_IF_THD(trx->mysql_thd,
+ "before_row_upd_sec_index_entry");
+ }
+#endif /* UNIV_DEBUG */
+
mtr_start(&mtr);
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ /* The index->online_status may change if the
+ index->name starts with TEMP_INDEX_PREFIX (meaning
+ that the index is or was being created online). It is
+ protected by index->lock. */
+
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+
+ switch (dict_index_get_online_status(index)) {
+ case ONLINE_INDEX_COMPLETE:
+ /* This is a normal index. Do not log anything.
+ Perform the update on the index tree directly. */
+ break;
+ case ONLINE_INDEX_CREATION:
+ /* Log a DELETE and optionally INSERT. */
+ row_log_online_op(index, entry, 0);
+
+ if (!node->is_delete) {
+ mem_heap_empty(heap);
+ entry = row_build_index_entry(
+ node->upd_row, node->upd_ext,
+ index, heap);
+ ut_a(entry);
+ row_log_online_op(index, entry, trx->id);
+ }
+ /* fall through */
+ case ONLINE_INDEX_ABORTED:
+ case ONLINE_INDEX_ABORTED_DROPPED:
+ mtr_commit(&mtr);
+ goto func_exit;
+ }
+
+ /* We can only buffer delete-mark operations if there
+ are no foreign key constraints referring to the index. */
+ mode = referenced
+ ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+ : BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
+ | BTR_DELETE_MARK;
+ } else {
+ /* For secondary indexes,
+ index->online_status==ONLINE_INDEX_CREATION unless
+ index->name starts with TEMP_INDEX_PREFIX. */
+ ut_ad(!dict_index_is_online_ddl(index));
+
+ /* We can only buffer delete-mark operations if there
+ are no foreign key constraints referring to the index. */
+ mode = referenced
+ ? BTR_MODIFY_LEAF
+ : BTR_MODIFY_LEAF | BTR_DELETE_MARK;
+ }
+
/* Set the query thread, so that ibuf_insert_low() will be
able to invoke thd_get_trx(). */
btr_pcur_get_btr_cur(&pcur)->thr = thr;
- /* We can only try to use the insert/delete buffer to buffer
- delete-mark operations if the index we're modifying has no foreign
- key constraints referring to it. */
- if (!referenced) {
- mode |= BTR_DELETE_MARK;
- }
-
search_result = row_search_index_entry(index, entry, mode,
&pcur, &mtr);
@@ -1711,6 +1764,20 @@ row_upd_sec_index_entry(
break;
case ROW_NOT_FOUND:
+ if (*index->name == TEMP_INDEX_PREFIX) {
+ /* When online CREATE INDEX copied the update
+ that we already made to the clustered index,
+ and completed the secondary index creation
+ before we got here, the old secondary index
+ record would not exist. The CREATE INDEX
+ should be waiting for a MySQL meta-data lock
+ upgrade at least until this UPDATE
+ returns. After that point, the
+ TEMP_INDEX_PREFIX would be dropped from the
+ index name in commit_inplace_alter_table(). */
+ break;
+ }
+
fputs("InnoDB: error in sec index entry update in\n"
"InnoDB: ", stderr);
dict_index_name_print(stderr, trx, index);
@@ -1730,11 +1797,9 @@ row_upd_sec_index_entry(
case ROW_FOUND:
/* Delete mark the old index record; it can already be
delete marked if we return after a lock wait in
- row_ins_index_entry below */
-
+ row_ins_sec_index_entry() below */
if (!rec_get_deleted_flag(
- rec, dict_table_is_comp(index->table))) {
-
+ rec, dict_table_is_comp(index->table))) {
err = btr_cur_del_mark_set_sec_rec(
0, btr_cur, TRUE, thr, &mtr);
@@ -1764,13 +1829,15 @@ row_upd_sec_index_entry(
goto func_exit;
}
+ mem_heap_empty(heap);
+
/* Build a new index entry */
entry = row_build_index_entry(node->upd_row, node->upd_ext,
index, heap);
ut_a(entry);
/* Insert new index entry */
- err = row_ins_index_entry(index, entry, 0, TRUE, thr);
+ err = row_ins_sec_index_entry(index, entry, thr);
func_exit:
mem_heap_free(heap);
@@ -1783,8 +1850,8 @@ Updates the secondary index record if it is changed in the row update or
deletes it if this is a delete.
@return DB_SUCCESS if operation successfully completed, else error
code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_upd_sec_step(
/*=============*/
upd_node_t* node, /*!< in: row update node */
@@ -1897,8 +1964,8 @@ fields of the clustered index record change. This should be quite rare in
database applications.
@return DB_SUCCESS if operation successfully completed, else error
code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_upd_clust_rec_by_insert(
/*========================*/
upd_node_t* node, /*!< in/out: row update node */
@@ -1914,7 +1981,7 @@ row_upd_clust_rec_by_insert(
trx_t* trx;
dict_table_t* table;
dtuple_t* entry;
- ulint err;
+ dberr_t err;
ibool change_ownership = FALSE;
rec_t* rec;
ulint* offsets = NULL;
@@ -1939,7 +2006,7 @@ row_upd_clust_rec_by_insert(
default:
ut_error;
case UPD_NODE_INSERT_BLOB:
- /* A lock wait occurred in row_ins_index_entry() in
+ /* A lock wait occurred in row_ins_clust_index_entry() in
the previous invocation of this function. Mark the
off-page columns in the entry inherited. */
@@ -1948,7 +2015,7 @@ row_upd_clust_rec_by_insert(
ut_a(change_ownership);
/* fall through */
case UPD_NODE_INSERT_CLUSTERED:
- /* A lock wait occurred in row_ins_index_entry() in
+ /* A lock wait occurred in row_ins_clust_index_entry() in
the previous invocation of this function. */
break;
case UPD_NODE_UPDATE_CLUSTERED:
@@ -1961,8 +2028,8 @@ row_upd_clust_rec_by_insert(
ut_ad(page_rec_is_user_rec(rec));
err = btr_cur_del_mark_set_clust_rec(
- BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur),
- rec, index, offsets, TRUE, thr, mtr);
+ btr_cur_get_block(btr_cur), rec, index, offsets,
+ thr, mtr);
if (err != DB_SUCCESS) {
err_exit:
mtr_commit(mtr);
@@ -1999,9 +2066,9 @@ err_exit:
mtr_commit(mtr);
- err = row_ins_index_entry(index, entry,
- node->upd_ext ? node->upd_ext->n_ext : 0,
- TRUE, thr);
+ err = row_ins_clust_index_entry(
+ index, entry, thr,
+ node->upd_ext ? node->upd_ext->n_ext : 0);
node->state = change_ownership
? UPD_NODE_INSERT_BLOB
: UPD_NODE_INSERT_CLUSTERED;
@@ -2027,11 +2094,17 @@ err_exit:
offsets = rec_get_offsets(rec, index, offsets,
ULINT_UNDEFINED, &heap);
ut_ad(page_rec_is_user_rec(rec));
+ ut_ad(rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
btr_cur_disown_inherited_fields(
btr_cur_get_page_zip(btr_cur),
rec, index, offsets, node->update, mtr);
+ /* It is not necessary to call row_log_table for
+ this, because during online table rebuild, purge will
+ not free any BLOBs in the table, whether or not they
+ are owned by the clustered index record. */
+
mtr_commit(mtr);
}
@@ -2045,20 +2118,24 @@ Updates a clustered index record of a row when the ordering fields do
not change.
@return DB_SUCCESS if operation successfully completed, else error
code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_upd_clust_rec(
/*==============*/
upd_node_t* node, /*!< in: row update node */
dict_index_t* index, /*!< in: clustered index */
+ ulint* offsets,/*!< in: rec_get_offsets() on node->pcur */
+ mem_heap_t** offsets_heap,
+ /*!< in/out: memory heap, can be emptied */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr) /*!< in: mtr; gets committed here */
{
- mem_heap_t* heap = NULL;
- big_rec_t* big_rec = NULL;
+ mem_heap_t* heap = NULL;
+ big_rec_t* big_rec = NULL;
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
- ulint err;
+ dberr_t err;
+ const dtuple_t* rebuilt_old_pk = NULL;
ut_ad(node);
ut_ad(dict_index_is_clust(index));
@@ -2066,33 +2143,48 @@ row_upd_clust_rec(
pcur = node->pcur;
btr_cur = btr_pcur_get_btr_cur(pcur);
- ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
+ ut_ad(btr_cur_get_index(btr_cur) == index);
+ ut_ad(!rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
dict_table_is_comp(index->table)));
+ ut_ad(rec_offs_validate(btr_cur_get_rec(btr_cur), index, offsets));
+
+ if (dict_index_is_online_ddl(index)) {
+ rebuilt_old_pk = row_log_table_get_pk(
+ btr_cur_get_rec(btr_cur), index, offsets, &heap);
+ }
/* Try optimistic updating of the record, keeping changes within
the page; we do not check locks because we assume the x-lock on the
record to update */
if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) {
- err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG,
- btr_cur, node->update,
- node->cmpl_info, thr, mtr);
+ err = btr_cur_update_in_place(
+ BTR_NO_LOCKING_FLAG, btr_cur,
+ offsets, node->update,
+ node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
} else {
- err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG,
- btr_cur, node->update,
- node->cmpl_info, thr, mtr);
+ err = btr_cur_optimistic_update(
+ BTR_NO_LOCKING_FLAG, btr_cur,
+ &offsets, offsets_heap, node->update,
+ node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
+ }
+
+ if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
+ row_log_table_update(btr_cur_get_rec(btr_cur),
+ index, offsets, rebuilt_old_pk);
}
mtr_commit(mtr);
if (UNIV_LIKELY(err == DB_SUCCESS)) {
- return(DB_SUCCESS);
+ goto func_exit;
}
if (buf_LRU_buf_pool_running_out()) {
- return(DB_LOCK_TABLE_FULL);
+ err = DB_LOCK_TABLE_FULL;
+ goto func_exit;
}
/* We may have to modify the tree structure: do a pessimistic descent
down the index tree */
@@ -2110,14 +2202,16 @@ row_upd_clust_rec(
ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
dict_table_is_comp(index->table)));
+ if (!heap) {
+ heap = mem_heap_create(1024);
+ }
+
err = btr_cur_pessimistic_update(
BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur,
- &heap, &big_rec, node->update, node->cmpl_info, thr, mtr);
+ &offsets, offsets_heap, heap, &big_rec,
+ node->update, node->cmpl_info,
+ thr, thr_get_trx(thr)->id, mtr);
if (big_rec) {
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_t* rec;
- rec_offs_init(offsets_);
-
ut_a(err == DB_SUCCESS);
/* Write out the externally stored
columns while still x-latching
@@ -2140,12 +2234,10 @@ row_upd_clust_rec(
portion of the file, in case the file was somehow
truncated in the crash. */
- rec = btr_cur_get_rec(btr_cur);
DEBUG_SYNC_C("before_row_upd_extern");
err = btr_store_big_rec_extern_fields(
- index, btr_cur_get_block(btr_cur), rec,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
+ index, btr_cur_get_block(btr_cur),
+ btr_cur_get_rec(btr_cur), offsets,
big_rec, mtr, BTR_STORE_UPDATE);
DEBUG_SYNC_C("after_row_upd_extern");
/* If writing big_rec fails (for example, because of
@@ -2164,9 +2256,14 @@ row_upd_clust_rec(
ut_a(err == DB_SUCCESS);
}
- mtr_commit(mtr);
+ if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
+ row_log_table_update(btr_cur_get_rec(btr_cur),
+ index, offsets, rebuilt_old_pk);
+ }
- if (UNIV_LIKELY_NULL(heap)) {
+ mtr_commit(mtr);
+func_exit:
+ if (heap) {
mem_heap_free(heap);
}
@@ -2180,8 +2277,8 @@ row_upd_clust_rec(
/***********************************************************//**
Delete marks a clustered index record.
@return DB_SUCCESS if operation successfully completed, else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_upd_del_mark_clust_rec(
/*=======================*/
upd_node_t* node, /*!< in: row update node */
@@ -2196,7 +2293,7 @@ row_upd_del_mark_clust_rec(
{
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
- ulint err;
+ dberr_t err;
ut_ad(node);
ut_ad(dict_index_is_clust(index));
@@ -2214,8 +2311,8 @@ row_upd_del_mark_clust_rec(
locks, because we assume that we have an x-lock on the record */
err = btr_cur_del_mark_set_clust_rec(
- BTR_NO_LOCKING_FLAG, btr_cur_get_block(btr_cur),
- btr_cur_get_rec(btr_cur), index, offsets, TRUE, thr, mtr);
+ btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur),
+ index, offsets, thr, mtr);
if (err == DB_SUCCESS && referenced) {
/* NOTE that the following call loses the position of pcur ! */
@@ -2232,8 +2329,8 @@ row_upd_del_mark_clust_rec(
Updates the clustered index record.
@return DB_SUCCESS if operation successfully completed, DB_LOCK_WAIT
in case of a lock wait, else error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_upd_clust_step(
/*===============*/
upd_node_t* node, /*!< in: row update node */
@@ -2242,11 +2339,10 @@ row_upd_clust_step(
dict_index_t* index;
btr_pcur_t* pcur;
ibool success;
- ulint err;
- mtr_t* mtr;
- mtr_t mtr_buf;
+ dberr_t err;
+ mtr_t mtr;
rec_t* rec;
- mem_heap_t* heap = NULL;
+ mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets;
ibool referenced;
@@ -2259,9 +2355,8 @@ row_upd_clust_step(
pcur = node->pcur;
/* We have to restore the cursor to its position */
- mtr = &mtr_buf;
- mtr_start(mtr);
+ mtr_start(&mtr);
/* If the restoration does not succeed, then the same
transaction has deleted the record on which the cursor was,
@@ -2273,12 +2368,32 @@ row_upd_clust_step(
ut_a(pcur->rel_pos == BTR_PCUR_ON);
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
+ ulint mode;
+
+#ifdef UNIV_DEBUG
+ /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
+ Once it is fixed, remove the 'ifdef', 'if' and this comment. */
+ if (!thr_get_trx(thr)->ddl) {
+ DEBUG_SYNC_C_IF_THD(
+ thr_get_trx(thr)->mysql_thd,
+ "innodb_row_upd_clust_step_enter");
+ }
+#endif /* UNIV_DEBUG */
+
+ if (dict_index_is_online_ddl(index)) {
+ ut_ad(node->table->id != DICT_INDEXES_ID);
+ mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
+ mtr_s_lock(dict_index_get_lock(index), &mtr);
+ } else {
+ mode = BTR_MODIFY_LEAF;
+ }
+
+ success = btr_pcur_restore_position(mode, pcur, &mtr);
if (!success) {
err = DB_RECORD_NOT_FOUND;
- mtr_commit(mtr);
+ mtr_commit(&mtr);
return(err);
}
@@ -2289,18 +2404,20 @@ row_upd_clust_step(
if (node->is_delete && node->table->id == DICT_INDEXES_ID) {
- dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr);
+ ut_ad(!dict_index_is_online_ddl(index));
- mtr_commit(mtr);
+ dict_drop_index_tree(btr_pcur_get_rec(pcur), &mtr);
- mtr_start(mtr);
+ mtr_commit(&mtr);
+
+ mtr_start(&mtr);
success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
- mtr);
+ &mtr);
if (!success) {
err = DB_ERROR;
- mtr_commit(mtr);
+ mtr_commit(&mtr);
return(err);
}
@@ -2315,7 +2432,7 @@ row_upd_clust_step(
0, btr_pcur_get_block(pcur),
rec, index, offsets, thr);
if (err != DB_SUCCESS) {
- mtr_commit(mtr);
+ mtr_commit(&mtr);
goto exit_func;
}
}
@@ -2324,17 +2441,14 @@ row_upd_clust_step(
if (node->is_delete) {
err = row_upd_del_mark_clust_rec(
- node, index, offsets, thr, referenced, mtr);
+ node, index, offsets, thr, referenced, &mtr);
if (err == DB_SUCCESS) {
node->state = UPD_NODE_UPDATE_ALL_SEC;
node->index = dict_table_get_next_index(index);
}
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
+
+ goto exit_func;
}
/* If the update is made for MySQL, we already have the update vector
@@ -2348,13 +2462,11 @@ exit_func:
row_upd_eval_new_vals(node->update);
}
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
- return(row_upd_clust_rec(node, index, thr, mtr));
+ err = row_upd_clust_rec(
+ node, index, offsets, &heap, thr, &mtr);
+ goto exit_func;
}
row_upd_store_row(node);
@@ -2374,20 +2486,21 @@ exit_func:
externally! */
err = row_upd_clust_rec_by_insert(
- node, index, thr, referenced, mtr);
+ node, index, thr, referenced, &mtr);
if (err != DB_SUCCESS) {
- return(err);
+ goto exit_func;
}
node->state = UPD_NODE_UPDATE_ALL_SEC;
} else {
- err = row_upd_clust_rec(node, index, thr, mtr);
+ err = row_upd_clust_rec(
+ node, index, offsets, &heap, thr, &mtr);
if (err != DB_SUCCESS) {
- return(err);
+ goto exit_func;
}
node->state = UPD_NODE_UPDATE_SOME_SEC;
@@ -2395,6 +2508,10 @@ exit_func:
node->index = dict_table_get_next_index(index);
+exit_func:
+ if (heap) {
+ mem_heap_free(heap);
+ }
return(err);
}
@@ -2404,14 +2521,14 @@ to this node, we assume that we have a persistent cursor which was on a
record, and the position of the cursor is stored in the cursor.
@return DB_SUCCESS if operation successfully completed, else error
code or DB_LOCK_WAIT */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
row_upd(
/*====*/
upd_node_t* node, /*!< in: row update node */
que_thr_t* thr) /*!< in: query thread */
{
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
ut_ad(node && thr);
@@ -2449,6 +2566,17 @@ row_upd(
return(DB_SUCCESS);
}
+#ifdef UNIV_DEBUG
+ /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
+ Once it is fixed, remove the 'ifdef', 'if' and this comment. */
+ if (!thr_get_trx(thr)->ddl) {
+ DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
+ "after_row_upd_clust");
+ }
+#endif /* UNIV_DEBUG */
+
+ DBUG_EXECUTE_IF("row_upd_skip_sec", node->index = NULL;);
+
do {
/* Skip corrupted index */
dict_table_skip_corrupt_index(node->index);
@@ -2458,7 +2586,6 @@ row_upd(
}
if (node->index->type != DICT_FTS) {
- log_free_check();
err = row_upd_sec_step(node, thr);
if (err != DB_SUCCESS) {
@@ -2500,7 +2627,7 @@ row_upd_step(
upd_node_t* node;
sel_node_t* sel_node;
que_node_t* parent;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
trx_t* trx;
ut_ad(thr);
@@ -2579,7 +2706,7 @@ row_upd_step(
err = row_upd(node, thr);
error_handling:
- trx->error_state = static_cast<enum db_err>(err);
+ trx->error_state = err;
if (err != DB_SUCCESS) {
return(NULL);
diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc
index 0aad8675ff8..2c3191928fd 100644
--- a/storage/innobase/row/row0vers.cc
+++ b/storage/innobase/row/row0vers.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -114,7 +114,6 @@ row_vers_impl_x_locked_low(
on rec. */
for (version = clust_rec;; version = prev_version) {
- ulint err;
row_ext_t* ext;
const dtuple_t* row;
dtuple_t* entry;
@@ -128,24 +127,22 @@ row_vers_impl_x_locked_low(
heap = mem_heap_create(1024);
- err = trx_undo_prev_version_build(
+ trx_undo_prev_version_build(
clust_rec, mtr, version, clust_index, clust_offsets,
heap, &prev_version);
- /* Free version and clust_offsets. */
+ /* Free version and clust_offsets. */
mem_heap_free(old_heap);
if (prev_version == NULL) {
- /* clust_rec must be a fresh insert, because
+ /* clust_rec should be a fresh insert, because
no previous version was found or the transaction
has committed. The caller has to recheck as the
synopsis of this function states, whether trx_id
is active or not. */
- ut_a(err == DB_SUCCESS || err == DB_MISSING_HISTORY);
-
break;
}
@@ -155,15 +152,16 @@ row_vers_impl_x_locked_low(
vers_del = rec_get_deleted_flag(prev_version, comp);
- prev_trx_id = row_get_rec_trx_id(
- prev_version, clust_index, clust_offsets);
+ prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
+ clust_offsets);
/* The stack of versions is locked by mtr. Thus, it
is safe to fetch the prefixes for externally stored
columns. */
row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
- clust_offsets, NULL, &ext, heap);
+ clust_offsets,
+ NULL, NULL, NULL, &ext, heap);
entry = row_build_index_entry(row, ext, index, heap);
@@ -183,8 +181,6 @@ row_vers_impl_x_locked_low(
There is no guarantee that the transaction is still
active. */
- ut_ad(err == DB_SUCCESS);
-
/* We check if entry and rec are identified in the alphabetical
ordering */
@@ -355,7 +351,6 @@ row_vers_old_has_index_entry(
mem_heap_t* heap2;
const dtuple_t* row;
const dtuple_t* entry;
- ulint err;
ulint comp;
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
@@ -383,7 +378,8 @@ row_vers_old_has_index_entry(
Thus, it is safe to fetch the prefixes for
externally stored columns. */
row = row_build(ROW_COPY_POINTERS, clust_index,
- rec, clust_offsets, NULL, &ext, heap);
+ rec, clust_offsets,
+ NULL, NULL, NULL, &ext, heap);
entry = row_build_index_entry(row, ext, index, heap);
/* If entry == NULL, the record contains unset BLOB
@@ -420,12 +416,12 @@ row_vers_old_has_index_entry(
for (;;) {
heap2 = heap;
heap = mem_heap_create(1024);
- err = trx_undo_prev_version_build(rec, mtr, version,
- clust_index, clust_offsets,
- heap, &prev_version);
+ trx_undo_prev_version_build(rec, mtr, version,
+ clust_index, clust_offsets,
+ heap, &prev_version);
mem_heap_free(heap2); /* free version and clust_offsets */
- if (err != DB_SUCCESS || !prev_version) {
+ if (!prev_version) {
/* Versions end here */
mem_heap_free(heap);
@@ -444,7 +440,7 @@ row_vers_old_has_index_entry(
externally stored columns. */
row = row_build(ROW_COPY_POINTERS, clust_index,
prev_version, clust_offsets,
- NULL, &ext, heap);
+ NULL, NULL, NULL, &ext, heap);
entry = row_build_index_entry(row, ext, index, heap);
/* If entry == NULL, the record contains unset
@@ -477,7 +473,7 @@ read should see. We assume that the trx id stored in rec is such that
the consistent read should not see rec in its present version.
@return DB_SUCCESS or DB_MISSING_HISTORY */
UNIV_INTERN
-ulint
+dberr_t
row_vers_build_for_consistent_read(
/*===============================*/
const rec_t* rec, /*!< in: record in a clustered index; the
@@ -495,8 +491,9 @@ row_vers_build_for_consistent_read(
*old_vers is allocated; memory for possible
intermediate versions is allocated and freed
locally within the function */
- rec_t** old_vers)/*!< out, own: old version, or NULL if the
- record does not exist in the view, that is,
+ rec_t** old_vers)/*!< out, own: old version, or NULL
+ if the history is missing or the record
+ does not exist in the view, that is,
it was freshly inserted afterwards */
{
const rec_t* version;
@@ -504,7 +501,7 @@ row_vers_build_for_consistent_read(
trx_id_t trx_id;
mem_heap_t* heap = NULL;
byte* buf;
- ulint err;
+ dberr_t err;
ut_ad(dict_index_is_clust(index));
ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
@@ -558,27 +555,21 @@ row_vers_build_for_consistent_read(
rec_offs_make_valid(*old_vers, index,
*offsets);
err = DB_SUCCESS;
-
break;
}
}
err = trx_undo_prev_version_build(rec, mtr, version, index,
*offsets, heap,
- &prev_version);
+ &prev_version)
+ ? DB_SUCCESS : DB_MISSING_HISTORY;
if (heap2) {
mem_heap_free(heap2); /* free version */
}
- if (err != DB_SUCCESS) {
- break;
- }
-
if (prev_version == NULL) {
/* It was a freshly inserted version */
*old_vers = NULL;
- err = DB_SUCCESS;
-
break;
}
@@ -602,8 +593,6 @@ row_vers_build_for_consistent_read(
*old_vers = rec_copy(buf, prev_version, *offsets);
rec_offs_make_valid(*old_vers, index, *offsets);
- err = DB_SUCCESS;
-
break;
}
@@ -617,10 +606,9 @@ row_vers_build_for_consistent_read(
/*****************************************************************//**
Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
+which should be seen by a semi-consistent read. */
UNIV_INTERN
-ulint
+void
row_vers_build_for_semi_consistent_read(
/*====================================*/
const rec_t* rec, /*!< in: record in a clustered index; the
@@ -644,7 +632,6 @@ row_vers_build_for_semi_consistent_read(
const rec_t* version;
mem_heap_t* heap = NULL;
byte* buf;
- ulint err;
trx_id_t rec_trx_id = 0;
ut_ad(dict_index_is_clust(index));
@@ -683,7 +670,7 @@ row_vers_build_for_semi_consistent_read(
mutex_exit(&trx_sys->mutex);
if (!version_trx) {
-
+committed_version_trx:
/* We found a version that belongs to a
committed transaction: return it. */
@@ -693,7 +680,6 @@ row_vers_build_for_semi_consistent_read(
if (rec == version) {
*old_vers = rec;
- err = DB_SUCCESS;
break;
}
@@ -721,30 +707,30 @@ row_vers_build_for_semi_consistent_read(
*old_vers = rec_copy(buf, version, *offsets);
rec_offs_make_valid(*old_vers, index, *offsets);
- err = DB_SUCCESS;
-
break;
}
+ DEBUG_SYNC_C("after_row_vers_check_trx_active");
+
heap2 = heap;
heap = mem_heap_create(1024);
- err = trx_undo_prev_version_build(rec, mtr, version, index,
- *offsets, heap,
- &prev_version);
- if (heap2) {
- mem_heap_free(heap2); /* free version */
+ if (!trx_undo_prev_version_build(rec, mtr, version, index,
+ *offsets, heap,
+ &prev_version)) {
+ mem_heap_free(heap);
+ heap = heap2;
+ heap2 = NULL;
+ goto committed_version_trx;
}
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- break;
+ if (heap2) {
+ mem_heap_free(heap2); /* free version */
}
if (prev_version == NULL) {
/* It was a freshly inserted version */
*old_vers = NULL;
- err = DB_SUCCESS;
-
break;
}
@@ -759,6 +745,4 @@ row_vers_build_for_semi_consistent_read(
if (heap) {
mem_heap_free(heap);
}
-
- return(err);
}
diff --git a/storage/innobase/srv/srv0conc.cc b/storage/innobase/srv/srv0conc.cc
index d5c949f3a06..820700a95a8 100644
--- a/storage/innobase/srv/srv0conc.cc
+++ b/storage/innobase/srv/srv0conc.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -40,7 +40,6 @@ Created 2011/04/18 Sunny Bains
#include "srv0srv.h"
#include "sync0sync.h"
#include "trx0trx.h"
-#include "ha_prototypes.h"
#include "mysql/plugin.h"
@@ -73,13 +72,11 @@ UNIV_INTERN ulong srv_thread_concurrency = 0;
/** This mutex protects srv_conc data structures */
static os_fast_mutex_t srv_conc_mutex;
-/** Slot for a thread waiting in the concurrency control queue. */
-typedef struct srv_conc_slot_struct srv_conc_slot_t;
-
/** Concurrency list node */
-typedef UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_node_t;
+typedef UT_LIST_NODE_T(struct srv_conc_slot_t) srv_conc_node_t;
-struct srv_conc_slot_struct{
+/** Slot for a thread waiting in the concurrency control queue. */
+struct srv_conc_slot_t{
os_event_t event; /*!< event to wait */
ibool reserved; /*!< TRUE if slot
reserved */
@@ -106,10 +103,8 @@ UNIV_INTERN mysql_pfs_key_t srv_conc_mutex_key;
#endif /* !HAVE_ATOMIC_BUILTINS */
-typedef struct srv_conc_struct srv_conc_t;
-
/** Variables tracking the active and waiting threads. */
-struct srv_conc_struct {
+struct srv_conc_t {
char pad[64 - (sizeof(ulint) + sizeof(lint))];
/** Number of transactions that have declared_to_be_inside_innodb set.
@@ -148,7 +143,7 @@ srv_conc_init(void)
for (i = 0; i < OS_THREAD_MAX_N; i++) {
srv_conc_slot_t* conc_slot = &srv_conc_slots[i];
- conc_slot->event = os_event_create(NULL);
+ conc_slot->event = os_event_create();
ut_a(conc_slot->event);
}
#endif /* !HAVE_ATOMIC_BUILTINS */
@@ -224,9 +219,7 @@ srv_conc_enter_innodb_with_atomics(
(void) os_atomic_decrement_lint(
&srv_conc.n_waiting, 1);
- thd_wait_end(
- static_cast<THD*>(
- trx->mysql_thd));
+ thd_wait_end(trx->mysql_thd);
}
if (srv_adaptive_max_sleep_delay > 0) {
@@ -262,9 +255,7 @@ srv_conc_enter_innodb_with_atomics(
trx_search_latch_release_if_reserved(trx);
}
- thd_wait_begin(
- static_cast<THD*>(trx->mysql_thd),
- THD_WAIT_USER_LOCK);
+ thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
notified_mysql = TRUE;
}
@@ -477,10 +468,10 @@ retry:
#endif /* UNIV_SYNC_DEBUG */
trx->op_info = "waiting in InnoDB queue";
- thd_wait_begin(static_cast<THD*>(trx->mysql_thd), THD_WAIT_USER_LOCK);
+ thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
os_event_wait(slot->event);
- thd_wait_end(static_cast<THD*>(trx->mysql_thd));
+ thd_wait_end(trx->mysql_thd);
trx->op_info = "";
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index 9c6e56bcb9d..3b3da2f070f 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -1,6 +1,7 @@
/*****************************************************************************
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,7 +34,6 @@ Created 12/9/2009 Jimmy Yang
#include "trx0rseg.h"
#include "lock0lock.h"
#include "ibuf0ibuf.h"
-#include "btr0cur.h"
#ifdef UNIV_NONINL
#include "srv0mon.ic"
#endif
@@ -215,11 +215,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WRITE_REQUEST},
- {"buffer_pool_pages_in_flush", "buffer",
- "Number of pages in flush list",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_PAGE_INFLUSH},
-
{"buffer_pool_wait_free", "buffer",
"Number of times waited for free buffer"
" (innodb_buffer_pool_wait_free)",
@@ -259,12 +254,24 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DATA},
+ {"buffer_pool_bytes_data", "buffer",
+ "Buffer bytes containing data (innodb_buffer_pool_bytes_data)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DATA},
+
{"buffer_pool_pages_dirty", "buffer",
"Buffer pages currently dirty (innodb_buffer_pool_pages_dirty)",
static_cast<monitor_type_t>(
MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DIRTY},
+ {"buffer_pool_bytes_dirty", "buffer",
+ "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY},
+
{"buffer_pool_pages_free", "buffer",
"Buffer pages currently free (innodb_buffer_pool_pages_free)",
static_cast<monitor_type_t>(
@@ -350,25 +357,40 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
MONITOR_FLUSH_NEIGHBOR_PAGES},
- /* Cumulative counter for flush batches because of max_dirty */
- {"buffer_flush_max_dirty_total_pages", "buffer",
- "Total pages flushed as part of max_dirty batches",
- MONITOR_SET_OWNER, MONITOR_FLUSH_MAX_DIRTY_COUNT,
- MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE},
+ {"buffer_flush_n_to_flush_requested", "buffer",
+ "Number of pages requested for flushing.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_REQUESTED},
+
+ {"buffer_flush_avg_page_rate", "buffer",
+ "Average number of pages at which flushing is happening",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_PAGE_RATE},
+
+ {"buffer_flush_lsn_avg_rate", "buffer",
+ "Average redo generation rate",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_LSN_AVG_RATE},
+
+ {"buffer_flush_pct_for_dirty", "buffer",
+ "Percent of IO capacity used to avoid max dirty page limit",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_DIRTY},
- {"buffer_flush_max_dirty", "buffer",
- "Number of max_dirty batches",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE,
- MONITOR_FLUSH_MAX_DIRTY_COUNT},
+ {"buffer_flush_pct_for_lsn", "buffer",
+ "Percent of IO capacity used to avoid reusable redo space limit",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_LSN},
+
+ {"buffer_flush_sync_waits", "buffer",
+ "Number of times a wait happens due to sync flushing",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_SYNC_WAITS},
- {"buffer_flush_max_dirty_pages", "buffer",
- "Pages queued as a max_dirty batch",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE,
- MONITOR_FLUSH_MAX_DIRTY_PAGES},
- /* Cumulative counter for flush batches because of adaptive */
+ /* Cumulative counter for flush batches for adaptive flushing */
{"buffer_flush_adaptive_total_pages", "buffer",
- "Total pages flushed as part of adaptive batches",
+ "Total pages flushed as part of adaptive flushing",
MONITOR_SET_OWNER, MONITOR_FLUSH_ADAPTIVE_COUNT,
MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE},
@@ -382,22 +404,6 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
MONITOR_FLUSH_ADAPTIVE_PAGES},
- /* Cumulative counter for flush batches because of async */
- {"buffer_flush_async_total_pages", "buffer",
- "Total pages flushed as part of async batches",
- MONITOR_SET_OWNER, MONITOR_FLUSH_ASYNC_COUNT,
- MONITOR_FLUSH_ASYNC_TOTAL_PAGE},
-
- {"buffer_flush_async", "buffer",
- "Number of async batches",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_ASYNC_TOTAL_PAGE,
- MONITOR_FLUSH_ASYNC_COUNT},
-
- {"buffer_flush_async_pages", "buffer",
- "Pages queued as an async batch",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_ASYNC_TOTAL_PAGE,
- MONITOR_FLUSH_ASYNC_PAGES},
-
/* Cumulative counter for flush batches because of sync */
{"buffer_flush_sync_total_pages", "buffer",
"Total pages flushed as part of sync batches",
@@ -859,6 +865,16 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_PAGE_DECOMPRESS},
+ {"compression_pad_increments", "compression",
+ "Number of times padding is incremented to avoid compression failures",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PAD_INCREMENTS},
+
+ {"compression_pad_decrements", "compression",
+ "Number of times padding is decremented due to good compressibility",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS},
+
/* ========== Counters for Index ========== */
{"module_index", "index", "Index Manager",
MONITOR_MODULE,
@@ -1130,11 +1146,26 @@ static monitor_info_t innodb_counter_info[] =
MONITOR_MODULE,
MONITOR_DEFAULT_START, MONITOR_MODULE_DDL_STATS},
+ {"ddl_background_drop_indexes", "ddl",
+ "Number of indexes waiting to be dropped after failed index creation",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_INDEX},
+
{"ddl_background_drop_tables", "ddl",
"Number of tables in background drop table list",
MONITOR_NONE,
MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_TABLE},
+ {"ddl_online_create_index", "ddl",
+ "Number of indexes being created online",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ONLINE_CREATE_INDEX},
+
+ {"ddl_pending_alter_table", "ddl",
+ "Number of ALTER TABLE, CREATE INDEX, DROP INDEX in progress",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PENDING_ALTER_TABLE},
+
/* ===== Counters for ICP (Index Condition Pushdown) Module ===== */
{"module_icp", "icp", "Index Condition Pushdown",
MONITOR_MODULE,
@@ -1171,6 +1202,34 @@ has been turned on/off. */
UNIV_INTERN ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT
- 1) / NUM_BITS_ULINT];
+#ifndef HAVE_ATOMIC_BUILTINS_64
+/** Mutex protecting atomic operations on platforms that lack
+built-in operations for atomic memory access */
+ib_mutex_t monitor_mutex;
+
+/** Key to register monitor_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t monitor_mutex_key;
+
+/****************************************************************//**
+Initialize the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_create(void)
+/*================*/
+{
+ mutex_create(monitor_mutex_key, &monitor_mutex, SYNC_ANY_LATCH);
+}
+/****************************************************************//**
+Close the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_free(void)
+/*==============*/
+{
+ mutex_free(&monitor_mutex);
+}
+#endif /* !HAVE_ATOMIC_BUILTINS_64 */
+
/****************************************************************//**
Get a monitor's "monitor_info" by its monitor id (index into the
innodb_counter_info array.
@@ -1359,13 +1418,14 @@ srv_mon_process_existing_counter(
mon_option_t set_option) /*!< in: Turn on/off reset the
counter */
{
- mon_type_t value;
- monitor_info_t* monitor_info;
- ibool update_min = FALSE;
- buf_pool_stat_t stat;
- ulint LRU_len;
- ulint free_len;
- ulint flush_list_len;
+ mon_type_t value;
+ monitor_info_t* monitor_info;
+ ibool update_min = FALSE;
+ buf_pool_stat_t stat;
+ buf_pools_list_size_t buf_pools_list_size;
+ ulint LRU_len;
+ ulint free_len;
+ ulint flush_list_len;
monitor_info = srv_mon_get_info(monitor_id);
@@ -1381,7 +1441,7 @@ srv_mon_process_existing_counter(
/* export_vars.innodb_buffer_pool_reads. Num Reads from
disk (page not in buffer) */
case MONITOR_OVLD_BUF_POOL_READS:
- value = srv_buf_pool_reads;
+ value = srv_stats.buf_pool_reads;
break;
/* innodb_buffer_pool_read_requests, the number of logical
@@ -1394,12 +1454,12 @@ srv_mon_process_existing_counter(
/* innodb_buffer_pool_write_requests, the number of
write request */
case MONITOR_OVLD_BUF_POOL_WRITE_REQUEST:
- value = srv_buf_pool_write_requests;
+ value = srv_stats.buf_pool_write_requests;
break;
/* innodb_buffer_pool_wait_free */
case MONITOR_OVLD_BUF_POOL_WAIT_FREE:
- value = srv_buf_pool_wait_free;
+ value = srv_stats.buf_pool_wait_free;
break;
/* innodb_buffer_pool_read_ahead */
@@ -1431,12 +1491,25 @@ srv_mon_process_existing_counter(
value = LRU_len;
break;
+ /* innodb_buffer_pool_bytes_data */
+ case MONITOR_OVLD_BUF_POOL_BYTES_DATA:
+ buf_get_total_list_size_in_bytes(&buf_pools_list_size);
+ value = buf_pools_list_size.LRU_bytes
+ + buf_pools_list_size.unzip_LRU_bytes;
+ break;
+
/* innodb_buffer_pool_pages_dirty */
case MONITOR_OVLD_BUF_POOL_PAGES_DIRTY:
buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
value = flush_list_len;
break;
+ /* innodb_buffer_pool_bytes_dirty */
+ case MONITOR_OVLD_BUF_POOL_BYTES_DIRTY:
+ buf_get_total_list_size_in_bytes(&buf_pools_list_size);
+ value = buf_pools_list_size.flush_list_bytes;
+ break;
+
/* innodb_buffer_pool_pages_free */
case MONITOR_OVLD_BUF_POOL_PAGES_FREE:
buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
@@ -1463,12 +1536,12 @@ srv_mon_process_existing_counter(
/* innodb_data_reads, the total number of data reads */
case MONITOR_OVLD_BYTE_READ:
- value = srv_data_read;
+ value = srv_stats.data_read;
break;
/* innodb_data_writes, the total number of data writes. */
case MONITOR_OVLD_BYTE_WRITTEN:
- value = srv_data_written;
+ value = srv_stats.data_written;
break;
/* innodb_data_reads, the total number of data reads. */
@@ -1488,7 +1561,7 @@ srv_mon_process_existing_counter(
/* innodb_os_log_written */
case MONITOR_OVLD_OS_LOG_WRITTEN:
- value = (mon_type_t) srv_os_log_written;
+ value = (mon_type_t) srv_stats.os_log_written;
break;
/* innodb_os_log_fsyncs */
@@ -1504,33 +1577,33 @@ srv_mon_process_existing_counter(
/* innodb_os_log_pending_writes */
case MONITOR_OVLD_OS_LOG_PENDING_WRITES:
- value = srv_os_log_pending_writes;
+ value = srv_stats.os_log_pending_writes;
update_min = TRUE;
break;
/* innodb_log_waits */
case MONITOR_OVLD_LOG_WAITS:
- value = srv_log_waits;
+ value = srv_stats.log_waits;
break;
/* innodb_log_write_requests */
case MONITOR_OVLD_LOG_WRITE_REQUEST:
- value = srv_log_write_requests;
+ value = srv_stats.log_write_requests;
break;
/* innodb_log_writes */
case MONITOR_OVLD_LOG_WRITES:
- value = srv_log_writes;
+ value = srv_stats.log_writes;
break;
/* innodb_dblwr_writes */
case MONITOR_OVLD_SRV_DBLWR_WRITES:
- value = srv_dblwr_writes;
+ value = srv_stats.dblwr_writes;
break;
/* innodb_dblwr_pages_written */
case MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN:
- value = srv_dblwr_pages_written;
+ value = srv_stats.dblwr_pages_written;
break;
/* innodb_page_size */
@@ -1539,27 +1612,27 @@ srv_mon_process_existing_counter(
break;
case MONITOR_OVLD_RWLOCK_S_SPIN_WAITS:
- value = rw_s_spin_wait_count;
+ value = rw_lock_stats.rw_s_spin_wait_count;
break;
case MONITOR_OVLD_RWLOCK_X_SPIN_WAITS:
- value = rw_x_os_wait_count;
+ value = rw_lock_stats.rw_x_os_wait_count;
break;
case MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS:
- value = rw_s_spin_round_count;
+ value = rw_lock_stats.rw_s_spin_round_count;
break;
case MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS:
- value = rw_x_spin_round_count;
+ value = rw_lock_stats.rw_x_spin_round_count;
break;
case MONITOR_OVLD_RWLOCK_S_OS_WAITS:
- value = rw_s_os_wait_count;
+ value = rw_lock_stats.rw_s_os_wait_count;
break;
case MONITOR_OVLD_RWLOCK_X_OS_WAITS:
- value = rw_x_os_wait_count;
+ value = rw_lock_stats.rw_x_os_wait_count;
break;
case MONITOR_OVLD_BUFFER_POOL_SIZE:
@@ -1568,44 +1641,44 @@ srv_mon_process_existing_counter(
/* innodb_rows_read */
case MONITOR_OLVD_ROW_READ:
- value = srv_n_rows_read;
+ value = srv_stats.n_rows_read;
break;
/* innodb_rows_inserted */
case MONITOR_OLVD_ROW_INSERTED:
- value = srv_n_rows_inserted;
+ value = srv_stats.n_rows_inserted;
break;
/* innodb_rows_deleted */
case MONITOR_OLVD_ROW_DELETED:
- value = srv_n_rows_deleted;
+ value = srv_stats.n_rows_deleted;
break;
/* innodb_rows_updated */
case MONITOR_OLVD_ROW_UPDTATED:
- value = srv_n_rows_updated;
+ value = srv_stats.n_rows_updated;
break;
/* innodb_row_lock_current_waits */
case MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT:
- value = srv_n_lock_wait_current_count;
+ value = srv_stats.n_lock_wait_current_count;
break;
/* innodb_row_lock_time */
case MONITOR_OVLD_LOCK_WAIT_TIME:
- value = srv_n_lock_wait_time / 1000;
+ value = srv_stats.n_lock_wait_time / 1000;
break;
/* innodb_row_lock_time_max */
case MONITOR_OVLD_LOCK_MAX_WAIT_TIME:
- value = srv_n_lock_max_wait_time / 1000;
+ value = lock_sys->n_lock_max_wait_time / 1000;
break;
/* innodb_row_lock_time_avg */
case MONITOR_OVLD_LOCK_AVG_WAIT_TIME:
- if (srv_n_lock_wait_count > 0) {
- value = srv_n_lock_wait_time / 1000
- / srv_n_lock_wait_count;
+ if (srv_stats.n_lock_wait_count > 0) {
+ value = srv_stats.n_lock_wait_time / 1000
+ / srv_stats.n_lock_wait_count;
} else {
value = 0;
}
@@ -1613,7 +1686,7 @@ srv_mon_process_existing_counter(
/* innodb_row_lock_waits */
case MONITOR_OVLD_ROW_LOCK_WAIT:
- value = srv_n_lock_wait_count;
+ value = srv_stats.n_lock_wait_count;
break;
case MONITOR_RSEG_HISTORY_LEN:
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index e64cc006f02..5c0ca903417 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -59,6 +59,7 @@ Created 10/8/1995 Heikki Tuuri
#include "btr0sea.h"
#include "dict0load.h"
#include "dict0boot.h"
+#include "dict0stats_bg.h" /* dict_stats_event */
#include "srv0start.h"
#include "row0mysql.h"
#include "ha_prototypes.h"
@@ -70,10 +71,6 @@ Created 10/8/1995 Heikki Tuuri
#include "mysql/plugin.h"
#include "mysql/service_thd_wait.h"
-/* The following counter is incremented whenever there is some user activity
-in the server */
-UNIV_INTERN ulint srv_activity_count = 0;
-
/* The following is the maximum allowed duration of a lock wait. */
UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
@@ -86,6 +83,8 @@ UNIV_INTERN ibool srv_error_monitor_active = FALSE;
UNIV_INTERN ibool srv_buf_dump_thread_active = FALSE;
+UNIV_INTERN ibool srv_dict_stats_thread_active = FALSE;
+
UNIV_INTERN const char* srv_main_thread_op_info = "";
/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
@@ -104,6 +103,9 @@ UNIV_INTERN char* srv_undo_dir = NULL;
/** The number of tablespaces to use for rollback segments. */
UNIV_INTERN ulong srv_undo_tablespaces = 8;
+/** The number of UNDO tablespaces that are open and ready to use. */
+UNIV_INTERN ulint srv_undo_tablespaces_open = 8;
+
/* The number of rollback segments to use */
UNIV_INTERN ulong srv_undo_logs = 1;
@@ -111,6 +113,10 @@ UNIV_INTERN ulong srv_undo_logs = 1;
UNIV_INTERN char* srv_arch_dir = NULL;
#endif /* UNIV_LOG_ARCHIVE */
+/** Set if InnoDB must operate in read-only mode. We don't do any
+recovery and open all tables in RO mode instead of RW mode. We don't
+sync the max trx id to disk either. */
+UNIV_INTERN my_bool srv_read_only_mode;
/** store to its own file each table created by an user; data
dictionary tables are in the system tablespace 0 */
UNIV_INTERN my_bool srv_file_per_table;
@@ -128,6 +134,10 @@ UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
/** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */
UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
+/** Sort buffer size in index creation */
+UNIV_INTERN ulong srv_sort_buf_size = 1048576;
+/** Maximum modification log file size for online index creation */
+UNIV_INTERN unsigned long long srv_online_max_size;
/* If this flag is TRUE, then we will use the native aio of the
OS (provided we compiled Innobase with it in), otherwise we will
@@ -170,15 +180,16 @@ the user from forgetting the 'newraw' keyword to my.cnf */
UNIV_INTERN ibool srv_created_new_raw = FALSE;
-UNIV_INTERN char** srv_log_group_home_dirs = NULL;
+UNIV_INTERN char* srv_log_group_home_dir = NULL;
-UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX;
-UNIV_INTERN ulint srv_n_log_files = ULINT_MAX;
+UNIV_INTERN ulong srv_n_log_files = SRV_N_LOG_FILES_MAX;
/* size in database pages */
UNIV_INTERN ib_uint64_t srv_log_file_size = IB_UINT64_MAX;
+UNIV_INTERN ib_uint64_t srv_log_file_size_requested;
/* size in database pages */
UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
+UNIV_INTERN uint srv_flush_log_at_timeout = 1;
UNIV_INTERN ulong srv_page_size = UNIV_PAGE_SIZE_DEF;
UNIV_INTERN ulong srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF;
@@ -211,7 +222,7 @@ UNIV_INTERN ulong srv_n_page_hash_locks = 16;
/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
UNIV_INTERN ulong srv_LRU_scan_depth = 1024;
/** whether or not to flush neighbors of a block */
-UNIV_INTERN my_bool srv_flush_neighbors = TRUE;
+UNIV_INTERN ulong srv_flush_neighbors = 1;
/* previously requested size */
UNIV_INTERN ulint srv_buf_pool_old_size;
/* current size in kilobytes */
@@ -256,7 +267,8 @@ UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
UNIV_INTERN ulint srv_max_n_open_files = 300;
/* Number of IO operations per second the server can do */
-UNIV_INTERN ulong srv_io_capacity = 400;
+UNIV_INTERN ulong srv_io_capacity = 200;
+UNIV_INTERN ulong srv_max_io_capacity = 400;
/* The InnoDB main thread tries to keep the ratio of modified pages
in the buffer pool to all database pages in the buffer pool smaller than
@@ -264,76 +276,49 @@ the following number. But it is not guaranteed that the value stays below
that during a time of heavy update/insert activity. */
UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
+UNIV_INTERN ulong srv_max_dirty_pages_pct_lwm = 50;
+
+/* This is the percentage of log capacity at which adaptive flushing,
+if enabled, will kick in. */
+UNIV_INTERN ulong srv_adaptive_flushing_lwm = 10;
+
+/* Number of iterations over which adaptive flushing is averaged. */
+UNIV_INTERN ulong srv_flushing_avg_loops = 30;
/* The number of purge threads to use.*/
-UNIV_INTERN ulong srv_n_purge_threads = 1;
+UNIV_INTERN ulong srv_n_purge_threads = 1;
/* the number of pages to purge in one batch */
-UNIV_INTERN ulong srv_purge_batch_size = 20;
-
-/* variable counts amount of data read in total (in bytes) */
-UNIV_INTERN ulint srv_data_read = 0;
+UNIV_INTERN ulong srv_purge_batch_size = 20;
/* Internal setting for "innodb_stats_method". Decides how InnoDB treats
NULL value when collecting statistics. By default, it is set to
SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
-ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
-
-/* here we count the amount of data written in total (in bytes) */
-UNIV_INTERN ulint srv_data_written = 0;
-
-/* the number of the log write requests done */
-UNIV_INTERN ulint srv_log_write_requests = 0;
-
-/* the number of physical writes to the log performed */
-UNIV_INTERN ulint srv_log_writes = 0;
-
-/* amount of data written to the log files in bytes */
-UNIV_INTERN lsn_t srv_os_log_written = 0;
+UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
-/* amount of writes being done to the log files */
-UNIV_INTERN ulint srv_os_log_pending_writes = 0;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-UNIV_INTERN ulint srv_log_waits = 0;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-UNIV_INTERN ulint srv_dblwr_writes = 0;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-UNIV_INTERN ulint srv_dblwr_pages_written = 0;
-
-/* in this variable we store the number of write requests issued */
-UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
-
-/* variable to count the number of pages that were written from buffer
-pool to the disk */
-UNIV_INTERN ulint srv_buf_pool_flushed = 0;
-
-/** Number of buffer pool reads that led to the
-reading of a disk page */
-UNIV_INTERN ulint srv_buf_pool_reads = 0;
+UNIV_INTERN srv_stats_t srv_stats;
/* structure to pass status variables to MySQL */
-UNIV_INTERN export_struc export_vars;
-
-/* If the following is != 0 we do not allow inserts etc. This protects
-the user from forgetting the innodb_force_recovery keyword to my.cnf */
-
-UNIV_INTERN ulint srv_force_recovery = 0;
+UNIV_INTERN export_var_t export_vars;
+
+/** Normally 0. When nonzero, skip some phases of crash recovery,
+starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
+by SELECT or mysqldump. When this is nonzero, we do not allow any user
+modifications to the data. */
+UNIV_INTERN ulong srv_force_recovery;
+#ifndef DBUG_OFF
+/** Inject a crash at different steps of the recovery process.
+This is for testing and debugging only. */
+UNIV_INTERN ulong srv_force_recovery_crash;
+#endif /* !DBUG_OFF */
/** Print all user-level transactions deadlocks to mysqld stderr */
UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE;
+/** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
+UNIV_INTERN my_bool srv_cmp_per_index_enabled = FALSE;
+
/* If the following is set to 1 then we do not run purge and insert buffer
merge to completion before shutdown. If it is set to 2, do not even flush the
buffer pool to data files at the shutdown: we effectively 'crash'
@@ -350,7 +335,9 @@ this many index pages, there are 2 ways to calculate statistics:
* quick transient stats, that are used if persistent stats for the given
table/index are not found in the innodb database */
UNIV_INTERN unsigned long long srv_stats_transient_sample_pages = 8;
+UNIV_INTERN my_bool srv_stats_persistent = TRUE;
UNIV_INTERN unsigned long long srv_stats_persistent_sample_pages = 20;
+UNIV_INTERN my_bool srv_stats_auto_recalc = TRUE;
UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
@@ -375,11 +362,6 @@ UNIV_INTERN ibool srv_print_log_io = FALSE;
UNIV_INTERN ibool srv_print_latch_waits = FALSE;
#endif /* UNIV_DEBUG */
-UNIV_INTERN ulint srv_n_rows_inserted = 0;
-UNIV_INTERN ulint srv_n_rows_updated = 0;
-UNIV_INTERN ulint srv_n_rows_deleted = 0;
-UNIV_INTERN ulint srv_n_rows_read = 0;
-
static ulint srv_n_rows_inserted_old = 0;
static ulint srv_n_rows_updated_old = 0;
static ulint srv_n_rows_deleted_old = 0;
@@ -404,58 +386,58 @@ UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
UNIV_INTERN time_t srv_last_monitor_time;
-UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
+UNIV_INTERN ib_mutex_t srv_innodb_monitor_mutex;
-/* Mutex for locking srv_monitor_file */
-UNIV_INTERN mutex_t srv_monitor_file_mutex;
+/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
+UNIV_INTERN ib_mutex_t srv_monitor_file_mutex;
#ifdef UNIV_PFS_MUTEX
# ifndef HAVE_ATOMIC_BUILTINS
/* Key to register server_mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t server_mutex_key;
# endif /* !HAVE_ATOMIC_BUILTINS */
-/* Key to register srv_innodb_monitor_mutex with performance schema */
+/** Key to register srv_innodb_monitor_mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
-/* Key to register srv_monitor_file_mutex with performance schema */
+/** Key to register srv_monitor_file_mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
-/* Key to register srv_dict_tmpfile_mutex with performance schema */
+/** Key to register srv_dict_tmpfile_mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
-/* Key to register the mutex with performance schema */
+/** Key to register the mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
-/* Key to register srv_sys_t::mutex with performance schema */
+/** Key to register srv_sys_t::mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t srv_sys_mutex_key;
-/* Key to register srv_sys_t::tasks_mutex with performance schema */
+/** Key to register srv_sys_t::tasks_mutex with performance schema */
UNIV_INTERN mysql_pfs_key_t srv_sys_tasks_mutex_key;
#endif /* UNIV_PFS_MUTEX */
-/* Temporary file for innodb monitor output */
+/** Temporary file for innodb monitor output */
UNIV_INTERN FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
+/** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
This mutex has a very high rank; threads reserving it should not
be holding any InnoDB latches. */
-UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
+UNIV_INTERN ib_mutex_t srv_dict_tmpfile_mutex;
+/** Temporary file for output from the data dictionary */
UNIV_INTERN FILE* srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
+/** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
This mutex has a very low rank; threads reserving it should not
acquire any further latches or sleep before releasing this one. */
-UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
-/* Temporary file for miscellanous diagnostic output */
+UNIV_INTERN ib_mutex_t srv_misc_tmpfile_mutex;
+/** Temporary file for miscellanous diagnostic output */
UNIV_INTERN FILE* srv_misc_tmpfile;
UNIV_INTERN ulint srv_main_thread_process_no = 0;
UNIV_INTERN ulint srv_main_thread_id = 0;
-/* The following count work done by srv_master_thread. */
+/* The following counts are used by the srv_master_thread. */
-/* Iterations of the loop bounded by 'srv_active' label. */
-static ulint srv_main_active_loops = 0;
-/* Iterations of the loop bounded by the 'srv_idle' label. */
-static ulint srv_main_idle_loops = 0;
-/* Iterations of the loop bounded by the 'srv_shutdown' label. */
-static ulint srv_main_shutdown_loops = 0;
-/* Log writes involving flush. */
-static ulint srv_log_writes_and_flush = 0;
+/** Iterations of the loop bounded by 'srv_active' label. */
+static ulint srv_main_active_loops = 0;
+/** Iterations of the loop bounded by the 'srv_idle' label. */
+static ulint srv_main_idle_loops = 0;
+/** Iterations of the loop bounded by the 'srv_shutdown' label. */
+static ulint srv_main_shutdown_loops = 0;
+/** Log writes involving flush. */
+static ulint srv_log_writes_and_flush = 0;
/* This is only ever touched by the master thread. It records the
time when the last flush of log file has happened. The master
@@ -484,7 +466,8 @@ current_time % 5 != 0. */
} while (0)
/** Test if the system mutex is owned. */
-#define srv_sys_mutex_own() mutex_own(&srv_sys->mutex)
+#define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex) \
+ && !srv_read_only_mode)
/** Release the system mutex. */
#define srv_sys_mutex_exit() do { \
@@ -492,7 +475,7 @@ current_time % 5 != 0. */
} while (0)
#define fetch_lock_wait_timeout(trx) \
- ((trx)->lock.allowed_to_wait \
+ ((trx)->lock.allowed_to_wait \
? thd_lock_wait_timeout((trx)->mysql_thd) \
: 0)
@@ -568,35 +551,32 @@ suspending the master thread and utility threads when they have nothing
to do. The thread table can be seen as an analogue to the process table
in a traditional Unix implementation. */
-/** The server system */
-typedef struct srv_sys_struct srv_sys_t;
-
/** The server system struct */
-struct srv_sys_struct{
- mutex_t tasks_mutex; /*!< variable protecting the
+struct srv_sys_t{
+ ib_mutex_t tasks_mutex; /*!< variable protecting the
tasks queue */
UT_LIST_BASE_NODE_T(que_thr_t)
tasks; /*!< task queue */
- mutex_t mutex; /*!< variable protecting the
-
+ ib_mutex_t mutex; /*!< variable protecting the
fields below. */
ulint n_sys_threads; /*!< size of the sys_threads
array */
- srv_table_t* sys_threads; /*!< server thread table */
+ srv_slot_t* sys_threads; /*!< server thread table */
ulint n_threads_active[SRV_MASTER + 1];
/*!< number of threads active
in a thread class */
- ulint activity_count; /*!< For tracking server
+ srv_stats_t::ulint_ctr_1_t
+ activity_count; /*!< For tracking server
activity */
};
#ifndef HAVE_ATOMIC_BUILTINS
/** Mutex protecting some server global variables. */
-UNIV_INTERN mutex_t server_mutex;
+UNIV_INTERN ib_mutex_t server_mutex;
#endif /* !HAVE_ATOMIC_BUILTINS */
static srv_sys_t* srv_sys = NULL;
@@ -656,6 +636,18 @@ srv_set_io_thread_op_info(
srv_io_thread_op_info[i] = str;
}
+/*********************************************************************//**
+Resets the info describing an i/o thread current state. */
+UNIV_INTERN
+void
+srv_reset_io_thread_op_info()
+/*=========================*/
+{
+ for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) {
+ srv_io_thread_op_info[i] = "not started yet";
+ }
+}
+
#ifdef UNIV_DEBUG
/*********************************************************************//**
Validates the type of a thread table slot.
@@ -756,6 +748,8 @@ srv_suspend_thread_low(
/*===================*/
srv_slot_t* slot) /*!< in/out: thread slot */
{
+
+ ut_ad(!srv_read_only_mode);
ut_ad(srv_sys_mutex_own());
ut_ad(slot->in_use);
@@ -915,9 +909,8 @@ void
srv_init(void)
/*==========*/
{
- ulint i;
- ulint srv_sys_sz;
- ulint n_sys_threads;
+ ulint n_sys_threads = 0;
+ ulint srv_sys_sz = sizeof(*srv_sys);
#ifndef HAVE_ATOMIC_BUILTINS
mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH);
@@ -926,38 +919,55 @@ srv_init(void)
mutex_create(srv_innodb_monitor_mutex_key,
&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
- /* Number of purge threads + master thread */
- n_sys_threads = srv_n_purge_threads + 1;
+ if (!srv_read_only_mode) {
- srv_sys_sz = sizeof(*srv_sys) + (n_sys_threads * sizeof(srv_slot_t));
+ /* Number of purge threads + master thread */
+ n_sys_threads = srv_n_purge_threads + 1;
+
+ srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads);
+ }
srv_sys = static_cast<srv_sys_t*>(mem_zalloc(srv_sys_sz));
- mutex_create(srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS);
+ srv_sys->n_sys_threads = n_sys_threads;
- mutex_create(srv_sys_tasks_mutex_key,
- &srv_sys->tasks_mutex, SYNC_ANY_LATCH);
+ if (!srv_read_only_mode) {
- srv_sys->n_sys_threads = n_sys_threads;
- srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
+ mutex_create(srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS);
- for (i = 0; i < srv_sys->n_sys_threads; i++) {
- srv_slot_t* slot;
+ mutex_create(srv_sys_tasks_mutex_key,
+ &srv_sys->tasks_mutex, SYNC_ANY_LATCH);
- slot = srv_sys->sys_threads + i;
+ srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
- slot->event = os_event_create(NULL);
+ for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
+ srv_slot_t* slot = &srv_sys->sys_threads[i];
- ut_a(slot->event);
- }
+ slot->event = os_event_create();
+
+ ut_a(slot->event);
+ }
+
+ srv_error_event = os_event_create();
- srv_error_event = os_event_create(NULL);
+ srv_monitor_event = os_event_create();
- srv_monitor_event = os_event_create(NULL);
+ srv_buf_dump_event = os_event_create();
- srv_buf_dump_event = os_event_create("buf_dump_event");
+ UT_LIST_INIT(srv_sys->tasks);
+ }
+
+ /* page_zip_stat_per_index_mutex is acquired from:
+ 1. page_zip_compress() (after SYNC_FSP)
+ 2. page_zip_decompress()
+ 3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
+ 4. innodb_cmp_per_index_update(), no other latches
+ since we do not acquire any other latches while holding this mutex,
+ it can have very low level. We pick SYNC_ANY_LATCH for it. */
- UT_LIST_INIT(srv_sys->tasks);
+ mutex_create(
+ page_zip_stat_per_index_mutex_key,
+ &page_zip_stat_per_index_mutex, SYNC_ANY_LATCH);
/* Create dummy indexes for infimum and supremum records */
@@ -987,8 +997,10 @@ srv_free(void)
trx_i_s_cache_free(trx_i_s_cache);
- os_event_free(srv_buf_dump_event);
- srv_buf_dump_event = NULL;
+ if (!srv_read_only_mode) {
+ os_event_free(srv_buf_dump_event);
+ srv_buf_dump_event = NULL;
+ }
}
/*********************************************************************//**
@@ -1010,10 +1022,9 @@ srv_general_init(void)
}
/*********************************************************************//**
-Normalizes init parameter values to use units we use inside InnoDB.
-@return DB_SUCCESS or error code */
+Normalizes init parameter values to use units we use inside InnoDB. */
static
-ulint
+void
srv_normalize_init_values(void)
/*===========================*/
{
@@ -1035,28 +1046,19 @@ srv_normalize_init_values(void)
srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
-
- return(DB_SUCCESS);
}
/*********************************************************************//**
-Boots the InnoDB server.
-@return DB_SUCCESS or error code */
+Boots the InnoDB server. */
UNIV_INTERN
-ulint
+void
srv_boot(void)
/*==========*/
{
- ulint err;
-
/* Transform the init parameter values given by MySQL to
use units we use inside InnoDB: */
- err = srv_normalize_init_values();
-
- if (err != DB_SUCCESS) {
- return(err);
- }
+ srv_normalize_init_values();
/* Initialize synchronization primitives, memory management, and thread
local storage */
@@ -1066,8 +1068,7 @@ srv_boot(void)
/* Initialize this module */
srv_init();
-
- return(DB_SUCCESS);
+ srv_mon_create();
}
/******************************************************************//**
@@ -1090,10 +1091,10 @@ srv_refresh_innodb_monitor_stats(void)
buf_refresh_io_stats_all();
- srv_n_rows_inserted_old = srv_n_rows_inserted;
- srv_n_rows_updated_old = srv_n_rows_updated;
- srv_n_rows_deleted_old = srv_n_rows_deleted;
- srv_n_rows_read_old = srv_n_rows_read;
+ srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
+ srv_n_rows_updated_old = srv_stats.n_rows_updated;
+ srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
+ srv_n_rows_read_old = srv_stats.n_rows_read;
mutex_exit(&srv_innodb_monitor_mutex);
}
@@ -1158,7 +1159,7 @@ srv_printf_innodb_monitor(
mutex_enter(&dict_foreign_err_mutex);
- if (ftell(dict_foreign_err_file) != 0L) {
+ if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) {
fputs("------------------------\n"
"LATEST FOREIGN KEY ERROR\n"
"------------------------\n", file);
@@ -1271,26 +1272,26 @@ srv_printf_innodb_monitor(
"Number of rows inserted " ULINTPF
", updated " ULINTPF ", deleted " ULINTPF
", read " ULINTPF "\n",
- srv_n_rows_inserted,
- srv_n_rows_updated,
- srv_n_rows_deleted,
- srv_n_rows_read);
+ (ulint) srv_stats.n_rows_inserted,
+ (ulint) srv_stats.n_rows_updated,
+ (ulint) srv_stats.n_rows_deleted,
+ (ulint) srv_stats.n_rows_read);
fprintf(file,
"%.2f inserts/s, %.2f updates/s,"
" %.2f deletes/s, %.2f reads/s\n",
- (srv_n_rows_inserted - srv_n_rows_inserted_old)
+ ((ulint) srv_stats.n_rows_inserted - srv_n_rows_inserted_old)
/ time_elapsed,
- (srv_n_rows_updated - srv_n_rows_updated_old)
+ ((ulint) srv_stats.n_rows_updated - srv_n_rows_updated_old)
/ time_elapsed,
- (srv_n_rows_deleted - srv_n_rows_deleted_old)
+ ((ulint) srv_stats.n_rows_deleted - srv_n_rows_deleted_old)
/ time_elapsed,
- (srv_n_rows_read - srv_n_rows_read_old)
+ ((ulint) srv_stats.n_rows_read - srv_n_rows_read_old)
/ time_elapsed);
- srv_n_rows_inserted_old = srv_n_rows_inserted;
- srv_n_rows_updated_old = srv_n_rows_updated;
- srv_n_rows_deleted_old = srv_n_rows_deleted;
- srv_n_rows_read_old = srv_n_rows_read;
+ srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
+ srv_n_rows_updated_old = srv_stats.n_rows_updated;
+ srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
+ srv_n_rows_read_old = srv_stats.n_rows_read;
fputs("----------------------------\n"
"END OF INNODB MONITOR OUTPUT\n"
@@ -1308,89 +1309,168 @@ void
srv_export_innodb_status(void)
/*==========================*/
{
- buf_pool_stat_t stat;
- ulint LRU_len;
- ulint free_len;
- ulint flush_list_len;
+ buf_pool_stat_t stat;
+ buf_pools_list_size_t buf_pools_list_size;
+ ulint LRU_len;
+ ulint free_len;
+ ulint flush_list_len;
buf_get_total_stat(&stat);
buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
+ buf_get_total_list_size_in_bytes(&buf_pools_list_size);
mutex_enter(&srv_innodb_monitor_mutex);
- export_vars.innodb_data_pending_reads
- = os_n_pending_reads;
- export_vars.innodb_data_pending_writes
- = os_n_pending_writes;
- export_vars.innodb_data_pending_fsyncs
- = fil_n_pending_log_flushes
+ export_vars.innodb_data_pending_reads =
+ os_n_pending_reads;
+
+ export_vars.innodb_data_pending_writes =
+ os_n_pending_writes;
+
+ export_vars.innodb_data_pending_fsyncs =
+ fil_n_pending_log_flushes
+ fil_n_pending_tablespace_flushes;
+
export_vars.innodb_data_fsyncs = os_n_fsyncs;
- export_vars.innodb_data_read = srv_data_read;
+
+ export_vars.innodb_data_read = srv_stats.data_read;
+
export_vars.innodb_data_reads = os_n_file_reads;
+
export_vars.innodb_data_writes = os_n_file_writes;
- export_vars.innodb_data_written = srv_data_written;
+
+ export_vars.innodb_data_written = srv_stats.data_written;
+
export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
- export_vars.innodb_buffer_pool_write_requests
- = srv_buf_pool_write_requests;
- export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
- export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
- export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
- export_vars.innodb_buffer_pool_read_ahead_rnd
- = stat.n_ra_pages_read_rnd;
- export_vars.innodb_buffer_pool_read_ahead
- = stat.n_ra_pages_read;
- export_vars.innodb_buffer_pool_read_ahead_evicted
- = stat.n_ra_pages_evicted;
+
+ export_vars.innodb_buffer_pool_write_requests =
+ srv_stats.buf_pool_write_requests;
+
+ export_vars.innodb_buffer_pool_wait_free =
+ srv_stats.buf_pool_wait_free;
+
+ export_vars.innodb_buffer_pool_pages_flushed =
+ srv_stats.buf_pool_flushed;
+
+ export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
+
+ export_vars.innodb_buffer_pool_read_ahead_rnd =
+ stat.n_ra_pages_read_rnd;
+
+ export_vars.innodb_buffer_pool_read_ahead =
+ stat.n_ra_pages_read;
+
+ export_vars.innodb_buffer_pool_read_ahead_evicted =
+ stat.n_ra_pages_evicted;
+
export_vars.innodb_buffer_pool_pages_data = LRU_len;
+
+ export_vars.innodb_buffer_pool_bytes_data =
+ buf_pools_list_size.LRU_bytes
+ + buf_pools_list_size.unzip_LRU_bytes;
+
export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
+
+ export_vars.innodb_buffer_pool_bytes_dirty =
+ buf_pools_list_size.flush_list_bytes;
+
export_vars.innodb_buffer_pool_pages_free = free_len;
+
#ifdef UNIV_DEBUG
- export_vars.innodb_buffer_pool_pages_latched
- = buf_get_latched_pages_number();
+ export_vars.innodb_buffer_pool_pages_latched =
+ buf_get_latched_pages_number();
#endif /* UNIV_DEBUG */
export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
- export_vars.innodb_buffer_pool_pages_misc
- = buf_pool_get_n_pages() - LRU_len - free_len;
+ export_vars.innodb_buffer_pool_pages_misc =
+ buf_pool_get_n_pages() - LRU_len - free_len;
+
#ifdef HAVE_ATOMIC_BUILTINS
export_vars.innodb_have_atomic_builtins = 1;
#else
export_vars.innodb_have_atomic_builtins = 0;
#endif
export_vars.innodb_page_size = UNIV_PAGE_SIZE;
- export_vars.innodb_log_waits = srv_log_waits;
- export_vars.innodb_os_log_written = srv_os_log_written;
+
+ export_vars.innodb_log_waits = srv_stats.log_waits;
+
+ export_vars.innodb_os_log_written = srv_stats.os_log_written;
+
export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
+
export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
- export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
- export_vars.innodb_log_write_requests = srv_log_write_requests;
- export_vars.innodb_log_writes = srv_log_writes;
- export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
- export_vars.innodb_dblwr_writes = srv_dblwr_writes;
+
+ export_vars.innodb_os_log_pending_writes =
+ srv_stats.os_log_pending_writes;
+
+ export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
+
+ export_vars.innodb_log_writes = srv_stats.log_writes;
+
+ export_vars.innodb_dblwr_pages_written =
+ srv_stats.dblwr_pages_written;
+
+ export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
+
export_vars.innodb_pages_created = stat.n_pages_created;
+
export_vars.innodb_pages_read = stat.n_pages_read;
+
export_vars.innodb_pages_written = stat.n_pages_written;
- export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
- export_vars.innodb_row_lock_current_waits
- = srv_n_lock_wait_current_count;
- export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
- if (srv_n_lock_wait_count > 0) {
+
+ export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
+
+ export_vars.innodb_row_lock_current_waits =
+ srv_stats.n_lock_wait_current_count;
+
+ export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
+
+ if (srv_stats.n_lock_wait_count > 0) {
+
export_vars.innodb_row_lock_time_avg = (ulint)
- (srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
+ (srv_stats.n_lock_wait_time
+ / 1000 / srv_stats.n_lock_wait_count);
+
} else {
export_vars.innodb_row_lock_time_avg = 0;
}
- export_vars.innodb_row_lock_time_max
- = srv_n_lock_max_wait_time / 1000;
- export_vars.innodb_rows_read = srv_n_rows_read;
- export_vars.innodb_rows_inserted = srv_n_rows_inserted;
- export_vars.innodb_rows_updated = srv_n_rows_updated;
- export_vars.innodb_rows_deleted = srv_n_rows_deleted;
+
+ export_vars.innodb_row_lock_time_max =
+ lock_sys->n_lock_max_wait_time / 1000;
+
+ export_vars.innodb_rows_read = srv_stats.n_rows_read;
+
+ export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
+
+ export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
+
+ export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
+
export_vars.innodb_num_open_files = fil_n_file_opened;
- export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
+
+ export_vars.innodb_truncated_status_writes =
+ srv_truncated_status_writes;
+
export_vars.innodb_available_undo_logs = srv_available_undo_logs;
+#ifdef UNIV_DEBUG
+ if (purge_sys->done.trx_no == 0
+ || trx_sys->rw_max_trx_id < purge_sys->done.trx_no - 1) {
+ export_vars.innodb_purge_trx_id_age = 0;
+ } else {
+ export_vars.innodb_purge_trx_id_age =
+ trx_sys->rw_max_trx_id - purge_sys->done.trx_no + 1;
+ }
+
+ if (!purge_sys->view
+ || trx_sys->rw_max_trx_id < purge_sys->view->up_limit_id) {
+ export_vars.innodb_purge_view_trx_id_age = 0;
+ } else {
+ export_vars.innodb_purge_view_trx_id_age =
+ trx_sys->rw_max_trx_id - purge_sys->view->up_limit_id;
+ }
+#endif /* UNIV_DEBUG */
+
mutex_exit(&srv_innodb_monitor_mutex);
}
@@ -1414,14 +1494,16 @@ DECLARE_THREAD(srv_monitor_thread)(
ulint mutex_skipped;
ibool last_srv_print_monitor;
+ ut_ad(!srv_read_only_mode);
+
#ifdef UNIV_DEBUG_THREAD_CREATION
fprintf(stderr, "Lock timeout thread starts, id %lu\n",
os_thread_pf(os_thread_get_curr_id()));
-#endif
+#endif /* UNIV_DEBUG_THREAD_CREATION */
#ifdef UNIV_PFS_THREAD
pfs_register_thread(srv_monitor_thread_key);
-#endif
+#endif /* UNIV_PFS_THREAD */
srv_monitor_active = TRUE;
UT_NOT_USED(arg);
@@ -1470,7 +1552,10 @@ loop:
}
- if (srv_innodb_status) {
+ /* We don't create the temp files or associated
+ mutexes in read-only-mode */
+
+ if (!srv_read_only_mode && srv_innodb_status) {
mutex_enter(&srv_monitor_file_mutex);
rewind(srv_monitor_file);
if (!srv_printf_innodb_monitor(srv_monitor_file,
@@ -1587,16 +1672,18 @@ DECLARE_THREAD(srv_error_monitor_thread)(
const void* sema = NULL;
const void* old_sema = NULL;
+ ut_ad(!srv_read_only_mode);
+
old_lsn = srv_start_lsn;
#ifdef UNIV_DEBUG_THREAD_CREATION
fprintf(stderr, "Error monitor thread starts, id %lu\n",
os_thread_pf(os_thread_get_curr_id()));
-#endif
+#endif /* UNIV_DEBUG_THREAD_CREATION */
#ifdef UNIV_PFS_THREAD
pfs_register_thread(srv_error_monitor_thread_key);
-#endif
+#endif /* UNIV_PFS_THREAD */
srv_error_monitor_active = TRUE;
loop:
@@ -1630,9 +1717,6 @@ loop:
eviction policy. */
buf_LRU_stat_update();
- /* Update the statistics collected for flush rate policy. */
- buf_flush_stat_update();
-
/* In case mutex_exit is not a memory barrier, it is
theoretically possible some threads are left waiting though
the semaphore is already released. Wake up those threads: */
@@ -1690,7 +1774,7 @@ void
srv_inc_activity_count(void)
/*========================*/
{
- ++srv_sys->activity_count;
+ srv_sys->activity_count.inc();
}
/**********************************************************************//**
@@ -1703,12 +1787,15 @@ srv_thread_type
srv_get_active_thread_type(void)
/*============================*/
{
- ulint i;
srv_thread_type ret = SRV_NONE;
+ if (srv_read_only_mode) {
+ return(SRV_NONE);
+ }
+
srv_sys_mutex_enter();
- for (i = SRV_WORKER; i <= SRV_MASTER; ++i) {
+ for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) {
if (srv_sys->n_threads_active[i] != 0) {
ret = static_cast<srv_thread_type>(i);
break;
@@ -1720,6 +1807,7 @@ srv_get_active_thread_type(void)
/* Check only on shutdown. */
if (ret == SRV_NONE
&& srv_shutdown_state != SRV_SHUTDOWN_NONE
+ && trx_purge_state() != PURGE_STATE_DISABLED
&& trx_purge_state() != PURGE_STATE_EXIT) {
ret = SRV_PURGE;
@@ -1739,20 +1827,25 @@ srv_any_background_threads_are_active(void)
{
const char* thread_active = NULL;
- if (srv_error_monitor_active) {
+ if (srv_read_only_mode) {
+ return(NULL);
+ } else if (srv_error_monitor_active) {
thread_active = "srv_error_monitor_thread";
- } else if (srv_lock_timeout_active) {
+ } else if (lock_sys->timeout_thread_active) {
thread_active = "srv_lock_timeout thread";
} else if (srv_monitor_active) {
thread_active = "srv_monitor_thread";
} else if (srv_buf_dump_thread_active) {
thread_active = "buf_dump_thread";
+ } else if (srv_dict_stats_thread_active) {
+ thread_active = "dict_stats_thread";
}
os_event_set(srv_error_event);
os_event_set(srv_monitor_event);
- os_event_set(srv_timeout_event);
os_event_set(srv_buf_dump_event);
+ os_event_set(lock_sys->timeout_event);
+ os_event_set(dict_stats_event);
return(thread_active);
}
@@ -1768,6 +1861,10 @@ void
srv_active_wake_master_thread(void)
/*===============================*/
{
+ if (srv_read_only_mode) {
+ return;
+ }
+
ut_ad(!srv_sys_mutex_own());
srv_inc_activity_count();
@@ -1869,7 +1966,8 @@ srv_sync_log_buffer_in_background(void)
time_t current_time = time(NULL);
srv_main_thread_op_info = "flushing log";
- if (difftime(current_time, srv_last_log_flush_time) >= 1) {
+ if (difftime(current_time, srv_last_log_flush_time)
+ >= srv_flush_log_at_timeout) {
log_buffer_sync_in_background(TRUE);
srv_last_log_flush_time = current_time;
srv_log_writes_and_flush++;
@@ -1986,7 +2084,7 @@ srv_master_do_active_tasks(void)
/* Do an ibuf merge */
srv_main_thread_op_info = "doing insert buffer merge";
counter_time = ut_time_us(NULL);
- ibuf_contract_in_background(FALSE);
+ ibuf_contract_in_background(0, FALSE);
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
@@ -2078,7 +2176,7 @@ srv_master_do_idle_tasks(void)
/* Do an ibuf merge */
counter_time = ut_time_us(NULL);
srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_in_background(TRUE);
+ ibuf_contract_in_background(0, TRUE);
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
@@ -2125,6 +2223,8 @@ srv_master_do_shutdown_tasks(
ulint n_bytes_merged = 0;
ulint n_tables_to_drop = 0;
+ ut_ad(!srv_read_only_mode);
+
++srv_main_shutdown_loops;
ut_a(srv_shutdown_state > 0);
@@ -2152,7 +2252,7 @@ srv_master_do_shutdown_tasks(
/* Do an ibuf merge */
srv_main_thread_op_info = "doing insert buffer merge";
- n_bytes_merged = ibuf_contract_in_background(TRUE);
+ n_bytes_merged = ibuf_contract_in_background(0, TRUE);
/* Flush logs if needed */
srv_sync_log_buffer_in_background();
@@ -2200,14 +2300,16 @@ DECLARE_THREAD(srv_master_thread)(
ulint old_activity_count = srv_get_activity_count();
ib_time_t last_print_time;
+ ut_ad(!srv_read_only_mode);
+
#ifdef UNIV_DEBUG_THREAD_CREATION
fprintf(stderr, "Master thread starts, id %lu\n",
os_thread_pf(os_thread_get_curr_id()));
-#endif
+#endif /* UNIV_DEBUG_THREAD_CREATION */
#ifdef UNIV_PFS_THREAD
pfs_register_thread(srv_master_thread_key);
-#endif
+#endif /* UNIV_PFS_THREAD */
srv_main_thread_process_no = os_proc_get_number();
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
@@ -2300,6 +2402,7 @@ srv_task_execute(void)
{
que_thr_t* thr = NULL;
+ ut_ad(!srv_read_only_mode);
ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
mutex_enter(&srv_sys->tasks_mutex);
@@ -2338,6 +2441,7 @@ DECLARE_THREAD(srv_worker_thread)(
{
srv_slot_t* slot;
+ ut_ad(!srv_read_only_mode);
ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
#ifdef UNIV_DEBUG_THREAD_CREATION
@@ -2418,6 +2522,7 @@ srv_do_purge(
ulint old_activity_count = srv_get_activity_count();
ut_a(n_threads > 0);
+ ut_ad(!srv_read_only_mode);
/* Purge until there are no more records to purge and there is
no change in configuration or server state. If the user has
@@ -2464,7 +2569,7 @@ srv_do_purge(
n_pages_purged = trx_purge(
n_use_threads, srv_purge_batch_size, false);
- if (!(count++ % TRX_SYS_N_RSEGS) || n_pages_purged == 0) {
+ if (!(count++ % TRX_SYS_N_RSEGS)) {
/* Force a truncate of the history list. */
trx_purge(1, srv_purge_batch_size, true);
}
@@ -2487,14 +2592,9 @@ srv_purge_coordinator_suspend(
ulint rseg_history_len) /*!< in: history list length
before last purge */
{
+ ut_ad(!srv_read_only_mode);
ut_a(slot->type == SRV_PURGE);
- rw_lock_x_lock(&purge_sys->latch);
-
- purge_sys->running = false;
-
- rw_lock_x_unlock(&purge_sys->latch);
-
bool stop = false;
/** Maximum wait time on the purge event, in micro-seconds. */
@@ -2504,6 +2604,12 @@ srv_purge_coordinator_suspend(
ulint ret;
ib_int64_t sig_count = srv_suspend_thread(slot);
+ rw_lock_x_lock(&purge_sys->latch);
+
+ purge_sys->running = false;
+
+ rw_lock_x_unlock(&purge_sys->latch);
+
/* We don't wait right away on the the non-timed wait because
we want to signal the thread that wants to suspend purge. */
@@ -2514,8 +2620,8 @@ srv_purge_coordinator_suspend(
ret = os_event_wait_time_low(
slot->event, SRV_PURGE_MAX_TIMEOUT, sig_count);
} else {
- /* We don't want to waste time waiting if the
- history list has increased by the time we get here
+ /* We don't want to waste time waiting, if the
+ history list increased by the time we got here,
unless purge has been stopped. */
ret = 0;
}
@@ -2582,6 +2688,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
srv_slot_t* slot;
ulint n_total_purged = ULINT_UNDEFINED;
+ ut_ad(!srv_read_only_mode);
ut_a(srv_n_purge_threads >= 1);
ut_a(trx_purge_state() == PURGE_STATE_INIT);
ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
@@ -2689,6 +2796,7 @@ srv_que_task_enqueue_low(
/*=====================*/
que_thr_t* thr) /*!< in: query thread */
{
+ ut_ad(!srv_read_only_mode);
mutex_enter(&srv_sys->tasks_mutex);
UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
@@ -2708,6 +2816,8 @@ srv_get_task_queue_length(void)
{
ulint n_tasks;
+ ut_ad(!srv_read_only_mode);
+
mutex_enter(&srv_sys->tasks_mutex);
n_tasks = UT_LIST_GET_LEN(srv_sys->tasks);
@@ -2724,6 +2834,8 @@ void
srv_purge_wakeup(void)
/*==================*/
{
+ ut_ad(!srv_read_only_mode);
+
if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
srv_release_threads(SRV_PURGE, 1);
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 9d1600cff23..efe9f094c0d 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -71,6 +71,7 @@ Created 2/16/1996 Heikki Tuuri
# include "buf0rea.h"
# include "dict0boot.h"
# include "dict0load.h"
+# include "dict0stats_bg.h"
# include "que0que.h"
# include "usr0sess.h"
# include "lock0lock.h"
@@ -87,9 +88,9 @@ Created 2/16/1996 Heikki Tuuri
# include "row0row.h"
# include "row0mysql.h"
# include "btr0pcur.h"
-# include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
-# include "zlib.h" /* for ZLIB_VERSION */
-# include "buf0dblwr.h"
+# include "os0sync.h"
+# include "zlib.h"
+# include "ut0crc32.h"
/** Log sequence number immediately after startup */
UNIV_INTERN lsn_t srv_start_lsn;
@@ -188,6 +189,63 @@ srv_parse_megabytes(
}
/*********************************************************************//**
+Check if a file can be opened in read-write mode.
+@return true if it doesn't exist or can be opened in rw mode. */
+static
+bool
+srv_file_check_mode(
+/*================*/
+ const char* name) /*!< in: filename to check */
+{
+ os_file_stat_t stat;
+
+ memset(&stat, 0x0, sizeof(stat));
+
+ dberr_t err = os_file_get_status(name, &stat, true);
+
+ if (err == DB_FAIL) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "os_file_get_status() failed on '%s'. Can't determine "
+ "file permissions", name);
+
+ return(false);
+
+ } else if (err == DB_SUCCESS) {
+
+ /* Note: stat.rw_perm is only valid of files */
+
+ if (stat.type == OS_FILE_TYPE_FILE) {
+ if (!stat.rw_perm) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "%s can't be opened in %s mode",
+ srv_read_only_mode
+ ? "read-write" : "read",
+ name);
+
+ return(false);
+ }
+ } else {
+ /* Not a regular file, bail out. */
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "'%s' not a regular file.", name);
+
+ return(false);
+ }
+ } else {
+
+ /* This is OK. If the file create fails on RO media, there
+ is nothing we can do. */
+
+ ut_a(err == DB_NOT_FOUND);
+ }
+
+ return(true);
+}
+
+/*********************************************************************//**
Reads the data files and their sizes from a character string given in
the .cnf file.
@return TRUE if ok, FALSE on parse error */
@@ -376,79 +434,6 @@ srv_parse_data_file_paths_and_sizes(
}
/*********************************************************************//**
-Reads log group home directories from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_log_group_home_dirs(
-/*==========================*/
- char* str) /*!< in/out: character string */
-{
- char* input_str;
- char* path;
- ulint i = 0;
-
- srv_log_group_home_dirs = NULL;
-
- input_str = str;
-
- /* First calculate the number of directories and check syntax:
- path;path;... */
-
- while (*str != '\0') {
- path = str;
-
- while (*str != ';' && *str != '\0') {
- str++;
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- } else if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (i != 1) {
- /* If innodb_log_group_home_dir was defined it must
- contain exactly one path definition under current MySQL */
-
- return(FALSE);
- }
-
- srv_log_group_home_dirs = static_cast<char**>(
- malloc(i * sizeof *srv_log_group_home_dirs));
-
- /* Then store the actual values to our array */
-
- str = input_str;
- i = 0;
-
- while (*str != '\0') {
- path = str;
-
- while (*str != ';' && *str != '\0') {
- str++;
- }
-
- if (*str == ';') {
- *str = '\0';
- str++;
- }
-
- srv_log_group_home_dirs[i] = path;
-
- i++;
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
and srv_parse_log_group_home_dirs(). */
UNIV_INTERN
@@ -462,8 +447,6 @@ srv_free_paths_and_sizes(void)
srv_data_file_sizes = NULL;
free(srv_data_file_is_raw_partition);
srv_data_file_is_raw_partition = NULL;
- free(srv_log_group_home_dirs);
- srv_log_group_home_dirs = NULL;
}
#ifndef UNIV_HOTBACKUP
@@ -526,175 +509,230 @@ srv_normalize_path_for_win(
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
-Creates or opens the log files and closes them.
+Creates a log file.
@return DB_SUCCESS or error code */
-static
-ulint
-open_or_create_log_file(
-/*====================*/
- ibool create_new_db, /*!< in: TRUE if we should create a
- new database */
- ibool* log_file_created, /*!< out: TRUE if new log file
- created */
- ibool log_file_has_been_opened,/*!< in: TRUE if a log file has been
- opened before: then it is an error
- to try to create another log file */
- ulint k, /*!< in: log group number */
- ulint i) /*!< in: log file number in group */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+create_log_file(
+/*============*/
+ os_file_t* file, /*!< out: file handle */
+ const char* name) /*!< in: log file name */
{
ibool ret;
- os_offset_t size;
- char name[10000];
- ulint dirnamelen;
- UT_NOT_USED(create_new_db);
+ *file = os_file_create(
+ innodb_file_log_key, name,
+ OS_FILE_CREATE, OS_FILE_NORMAL, OS_LOG_FILE, &ret);
- *log_file_created = FALSE;
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Setting log file %s size to %lu MB",
+ name, (ulong) srv_log_file_size
+ >> (20 - UNIV_PAGE_SIZE_SHIFT));
- srv_normalize_path_for_win(srv_log_group_home_dirs[k]);
+ ret = os_file_set_size(name, *file,
+ (os_offset_t) srv_log_file_size
+ << UNIV_PAGE_SIZE_SHIFT);
+ if (!ret) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Error in creating %s", name);
+ return(DB_ERROR);
+ }
- dirnamelen = strlen(srv_log_group_home_dirs[k]);
- ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
- memcpy(name, srv_log_group_home_dirs[k], dirnamelen);
+ ret = os_file_close(*file);
+ ut_a(ret);
- /* Add a path separator if needed. */
- if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- name[dirnamelen++] = SRV_PATH_SEPARATOR;
+ return(DB_SUCCESS);
+}
+
+/** Initial number of the first redo log file */
+#define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1)
+
+#ifdef DBUG_OFF
+# define RECOVERY_CRASH(x) do {} while(0)
+#else
+# define RECOVERY_CRASH(x) do { \
+ if (srv_force_recovery_crash == x) { \
+ fprintf(stderr, "innodb_force_recovery_crash=%lu\n", \
+ srv_force_recovery_crash); \
+ fflush(stderr); \
+ exit(3); \
+ } \
+} while (0)
+#endif
+
+/*********************************************************************//**
+Creates all log files.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+create_log_files(
+/*=============*/
+ char* logfilename, /*!< in/out: buffer for log file name */
+ size_t dirnamelen, /*!< in: length of the directory path */
+ lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
+ char*& logfile0) /*!< out: name of the first log file */
+{
+ if (srv_read_only_mode) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot create log files in read-only mode");
+ return(DB_READ_ONLY);
}
- sprintf(name + dirnamelen, "%s%lu", "ib_logfile", (ulong) i);
+ /* Remove any old log files. */
+ for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
- files[i] = os_file_create(innodb_file_log_key, name,
- OS_FILE_CREATE, OS_FILE_NORMAL,
- OS_LOG_FILE, &ret);
- if (ret == FALSE) {
- if (os_file_get_last_error(FALSE) != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have errno set
- to 0 here, which causes our function to return 100;
- work around that AIX problem */
- && os_file_get_last_error(FALSE) != 100
+ /* Ignore errors about non-existent files or files
+ that cannot be removed. The create_log_file() will
+ return an error when the file exists. */
+#ifdef __WIN__
+ DeleteFile((LPCTSTR) logfilename);
+#else
+ unlink(logfilename);
#endif
- ) {
- fprintf(stderr,
- "InnoDB: Error in creating"
- " or opening %s\n", name);
+ /* Crashing after deleting the first
+ file should be recoverable. The buffer
+ pool was clean, and we can simply create
+ all log files from the scratch. */
+ RECOVERY_CRASH(6);
+ }
- return(DB_ERROR);
- }
+ ut_ad(!buf_pool_check_no_pending_io());
- files[i] = os_file_create(innodb_file_log_key, name,
- OS_FILE_OPEN, OS_FILE_AIO,
- OS_LOG_FILE, &ret);
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
+ RECOVERY_CRASH(7);
- return(DB_ERROR);
+ for (unsigned i = 0; i < srv_n_log_files; i++) {
+ sprintf(logfilename + dirnamelen,
+ "ib_logfile%u", i ? i : INIT_LOG_FILE0);
+
+ dberr_t err = create_log_file(&files[i], logfilename);
+
+ if (err != DB_SUCCESS) {
+ return(err);
}
+ }
- size = os_file_get_size(files[i]);
- ut_a(size != (os_offset_t) -1);
+ RECOVERY_CRASH(8);
- if (UNIV_UNLIKELY(size != (os_offset_t) srv_log_file_size
- << UNIV_PAGE_SIZE_SHIFT)) {
+ /* We did not create the first log file initially as
+ ib_logfile0, so that crash recovery cannot find it until it
+ has been completed and renamed. */
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
- fprintf(stderr,
- "InnoDB: Error: log file %s is"
- " of different size "UINT64PF" bytes\n"
- "InnoDB: than specified in the .cnf"
- " file "UINT64PF" bytes!\n",
- name, size,
- (os_offset_t) srv_log_file_size
- << UNIV_PAGE_SIZE_SHIFT);
+ fil_space_create(
+ logfilename, SRV_LOG_SPACE_FIRST_ID,
+ fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
+ FIL_LOG);
+ ut_a(fil_validate());
- return(DB_ERROR);
+ logfile0 = fil_node_create(
+ logfilename, (ulint) srv_log_file_size,
+ SRV_LOG_SPACE_FIRST_ID, FALSE);
+ ut_a(logfile0);
+
+ for (unsigned i = 1; i < srv_n_log_files; i++) {
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
+
+ if (!fil_node_create(
+ logfilename,
+ (ulint) srv_log_file_size,
+ SRV_LOG_SPACE_FIRST_ID, FALSE)) {
+ ut_error;
}
- } else {
- *log_file_created = TRUE;
+ }
- ut_print_timestamp(stderr);
+ log_group_init(0, srv_n_log_files,
+ srv_log_file_size * UNIV_PAGE_SIZE,
+ SRV_LOG_SPACE_FIRST_ID,
+ SRV_LOG_SPACE_FIRST_ID + 1);
- fprintf(stderr,
- " InnoDB: Log file %s did not exist:"
- " new to be created\n",
- name);
- if (log_file_has_been_opened) {
+ fil_open_log_and_system_tablespace_files();
- return(DB_ERROR);
- }
+ /* Create a log checkpoint. */
+ mutex_enter(&log_sys->mutex);
+ ut_d(recv_no_log_write = FALSE);
+ recv_reset_logs(lsn);
+ mutex_exit(&log_sys->mutex);
- fprintf(stderr, "InnoDB: Setting log file %s size to %lu MB\n",
- name, (ulong) srv_log_file_size
- >> (20 - UNIV_PAGE_SIZE_SHIFT));
+ return(DB_SUCCESS);
+}
- fprintf(stderr,
- "InnoDB: Database physically writes the file"
- " full: wait...\n");
+/*********************************************************************//**
+Renames the first log file. */
+static
+void
+create_log_files_rename(
+/*====================*/
+ char* logfilename, /*!< in/out: buffer for log file name */
+ size_t dirnamelen, /*!< in: length of the directory path */
+ lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
+ char* logfile0) /*!< in/out: name of the first log file */
+{
+ /* If innodb_flush_method=O_DSYNC,
+ we need to explicitly flush the log buffers. */
+ fil_flush(SRV_LOG_SPACE_FIRST_ID);
+ /* Close the log files, so that we can rename
+ the first one. */
+ fil_close_log_files(false);
- ret = os_file_set_size(name, files[i],
- (os_offset_t) srv_log_file_size
- << UNIV_PAGE_SIZE_SHIFT);
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in creating %s:"
- " probably out of disk space\n",
- name);
+ /* Rename the first log file, now that a log
+ checkpoint has been created. */
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
- return(DB_ERROR);
- }
- }
+ RECOVERY_CRASH(9);
- ret = os_file_close(files[i]);
- ut_a(ret);
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Renaming log file %s to %s", logfile0, logfilename);
- if (i == 0) {
- /* Create in memory the file space object
- which is for this log group */
+ mutex_enter(&log_sys->mutex);
+ ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
+ ibool success = os_file_rename(
+ innodb_file_log_key, logfile0, logfilename);
+ ut_a(success);
- fil_space_create(name,
- 2 * k + SRV_LOG_SPACE_FIRST_ID,
- fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
- FIL_LOG);
- }
+ RECOVERY_CRASH(10);
- ut_a(fil_validate());
+ /* Replace the first file with ib_logfile0. */
+ strcpy(logfile0, logfilename);
+ mutex_exit(&log_sys->mutex);
- /* srv_log_file_size is measured in pages; if page size is 16KB,
- then we have a limit of 64TB on 32 bit systems */
- ut_a(srv_log_file_size <= ULINT_MAX);
+ fil_open_log_and_system_tablespace_files();
- fil_node_create(name, (ulint) srv_log_file_size,
- 2 * k + SRV_LOG_SPACE_FIRST_ID, FALSE);
-#ifdef UNIV_LOG_ARCHIVE
- /* If this is the first log group, create the file space object
- for archived logs.
- Under MySQL, no archiving ever done. */
+ ib_logf(IB_LOG_LEVEL_WARN, "New log files created, LSN=" LSN_PF, lsn);
+}
- if (k == 0 && i == 0) {
- arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID;
+/*********************************************************************//**
+Opens a log file.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+open_log_file(
+/*==========*/
+ os_file_t* file, /*!< out: file handle */
+ const char* name, /*!< in: log file name */
+ os_offset_t* size) /*!< out: file size */
+{
+ ibool ret;
- fil_space_create("arch_log_space", arch_space_id, 0, FIL_LOG);
- } else {
- arch_space_id = ULINT_UNDEFINED;
- }
-#endif /* UNIV_LOG_ARCHIVE */
- if (i == 0) {
- log_group_init(k, srv_n_log_files,
- srv_log_file_size * UNIV_PAGE_SIZE,
- 2 * k + SRV_LOG_SPACE_FIRST_ID,
- SRV_LOG_SPACE_FIRST_ID + 1); /* dummy arch
- space id */
+ *file = os_file_create(innodb_file_log_key, name,
+ OS_FILE_OPEN, OS_FILE_AIO,
+ OS_LOG_FILE, &ret);
+ if (!ret) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
+ return(DB_ERROR);
}
+ *size = os_file_get_size(*file);
+
+ ret = os_file_close(*file);
+ ut_a(ret);
return(DB_SUCCESS);
}
/*********************************************************************//**
Creates or opens database data files and closes them.
@return DB_SUCCESS or error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
open_or_create_data_files(
/*======================*/
ibool* create_new_db, /*!< out: TRUE if new database should be
@@ -718,13 +756,16 @@ open_or_create_data_files(
ibool one_created = FALSE;
os_offset_t size;
ulint flags;
+ ulint space;
ulint rounded_size_pages;
char name[10000];
if (srv_n_data_files >= 1000) {
- fprintf(stderr, "InnoDB: can only have < 1000 data files\n"
- "InnoDB: you have defined %lu\n",
- (ulong) srv_n_data_files);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Can only have < 1000 data files, you have "
+ "defined %lu", (ulong) srv_n_data_files);
+
return(DB_ERROR);
}
@@ -742,7 +783,9 @@ open_or_create_data_files(
ut_a(dirnamelen + strlen(srv_data_file_names[i])
< (sizeof name) - 1);
+
memcpy(name, srv_data_home, dirnamelen);
+
/* Add a path separator if needed. */
if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
name[dirnamelen++] = SRV_PATH_SEPARATOR;
@@ -750,46 +793,67 @@ open_or_create_data_files(
strcpy(name + dirnamelen, srv_data_file_names[i]);
- if (srv_data_file_is_raw_partition[i] == 0) {
+ /* Note: It will return true if the file doesn' exist. */
+
+ if (!srv_file_check_mode(name)) {
+
+ return(DB_FAIL);
+
+ } else if (srv_data_file_is_raw_partition[i] == 0) {
/* First we try to create the file: if it already
exists, ret will get value FALSE */
- files[i] = os_file_create(innodb_file_data_key,
- name, OS_FILE_CREATE,
- OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
+ files[i] = os_file_create(
+ innodb_file_data_key, name, OS_FILE_CREATE,
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+
+ if (srv_read_only_mode) {
+
+ if (ret) {
+ goto size_check;
+ }
- if (ret == FALSE && os_file_get_last_error(FALSE)
- != OS_FILE_ALREADY_EXISTS
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Opening %s failed!", name);
+
+ return(DB_ERROR);
+
+ } else if (!ret
+ && os_file_get_last_error(false)
+ != OS_FILE_ALREADY_EXISTS
#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have
- errno set to 0 here, which causes our function
- to return 100; work around that AIX problem */
- && os_file_get_last_error(FALSE) != 100
-#endif
+ /* AIX 5.1 after security patch ML7 may have
+ errno set to 0 here, which causes our
+ function to return 100; work around that
+ AIX problem */
+ && os_file_get_last_error(false) != 100
+#endif /* UNIV_AIX */
) {
- fprintf(stderr,
- "InnoDB: Error in creating"
- " or opening %s\n",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Creating or opening %s failed!",
name);
return(DB_ERROR);
}
+
} else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
+
+ ut_a(!srv_read_only_mode);
+
/* The partition is opened, not created; then it is
written over */
srv_start_raw_disk_in_use = TRUE;
srv_created_new_raw = TRUE;
- files[i] = os_file_create(innodb_file_data_key,
- name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
+ files[i] = os_file_create(
+ innodb_file_data_key, name, OS_FILE_OPEN_RAW,
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+
if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Error in opening %s", name);
return(DB_ERROR);
}
@@ -805,17 +869,15 @@ open_or_create_data_files(
/* We open the data file */
if (one_created) {
- fprintf(stderr,
- "InnoDB: Error: data files can only"
- " be added at the end\n");
- fprintf(stderr,
- "InnoDB: of a tablespace, but"
- " data file %s existed beforehand.\n",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Data files can only be added at "
+ "the end of a tablespace, but "
+ "data file %s existed beforehand.",
name);
return(DB_ERROR);
}
-
if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
+ ut_a(!srv_read_only_mode);
files[i] = os_file_create(
innodb_file_data_key,
name, OS_FILE_OPEN_RAW,
@@ -833,9 +895,11 @@ open_or_create_data_files(
}
if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
- os_file_get_last_error(TRUE);
+
+ os_file_get_last_error(true);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Can't open '%s'", name);
return(DB_ERROR);
}
@@ -845,6 +909,7 @@ open_or_create_data_files(
goto skip_size_check;
}
+size_check:
size = os_file_get_size(files[i]);
ut_a(size != (os_offset_t) -1);
/* Round size downward to megabytes */
@@ -860,16 +925,16 @@ open_or_create_data_files(
&& srv_last_file_size_max
< rounded_size_pages)) {
- fprintf(stderr,
- "InnoDB: Error: auto-extending"
- " data file %s is"
- " of a different size\n"
- "InnoDB: %lu pages (rounded"
- " down to MB) than specified"
- " in the .cnf file:\n"
- "InnoDB: initial %lu pages,"
- " max %lu (relevant if"
- " non-zero) pages!\n",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "auto-extending "
+ "data file %s is "
+ "of a different size "
+ "%lu pages (rounded "
+ "down to MB) than specified "
+ "in the .cnf file: "
+ "initial %lu pages, "
+ "max %lu (relevant if "
+ "non-zero) pages!",
name,
(ulong) rounded_size_pages,
(ulong) srv_data_file_sizes[i],
@@ -884,13 +949,11 @@ open_or_create_data_files(
if (rounded_size_pages != srv_data_file_sizes[i]) {
- fprintf(stderr,
- "InnoDB: Error: data file %s"
- " is of a different size\n"
- "InnoDB: %lu pages"
- " (rounded down to MB)\n"
- "InnoDB: than specified"
- " in the .cnf file %lu pages!\n",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Data file %s is of a different "
+ "size %lu pages (rounded down to MB) "
+ "than specified in the .cnf file "
+ "%lu pages!",
name,
(ulong) rounded_size_pages,
(ulong) srv_data_file_sizes[i]);
@@ -899,63 +962,65 @@ open_or_create_data_files(
}
skip_size_check:
fil_read_first_page(
- files[i], one_opened, &flags,
+ files[i], one_opened, &flags, &space,
#ifdef UNIV_LOG_ARCHIVE
min_arch_log_no, max_arch_log_no,
#endif /* UNIV_LOG_ARCHIVE */
min_flushed_lsn, max_flushed_lsn);
+ /* The first file of the system tablespace must
+ have space ID = TRX_SYS_SPACE. The FSP_SPACE_ID
+ field in files greater than ibdata1 are unreliable. */
+ ut_a(one_opened || space == TRX_SYS_SPACE);
+
+ /* Check the flags for the first system tablespace
+ file only. */
if (!one_opened
&& UNIV_PAGE_SIZE
!= fsp_flags_get_page_size(flags)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: data file %s"
- " uses page size %lu,\n",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Data file \"%s\" uses page size %lu,"
+ "but the start-up parameter "
+ "is --innodb-page-size=%lu",
name,
- fsp_flags_get_page_size(flags));
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: but the start-up parameter"
- " is innodb-page-size=%lu\n",
+ fsp_flags_get_page_size(flags),
UNIV_PAGE_SIZE);
return(DB_ERROR);
}
one_opened = TRUE;
- } else {
+ } else if (!srv_read_only_mode) {
/* We created the data file and now write it full of
zeros */
one_created = TRUE;
if (i > 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Data file %s did not"
- " exist: new to be created\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Data file %s did not"
+ " exist: new to be created",
name);
} else {
- fprintf(stderr,
- "InnoDB: The first specified"
- " data file %s did not exist:\n"
- "InnoDB: a new database"
- " to be created!\n", name);
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "The first specified "
+ "data file %s did not exist: "
+ "a new database to be created!",
+ name);
+
*create_new_db = TRUE;
}
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Setting file %s size to %lu MB\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Setting file %s size to %lu MB",
name,
(ulong) (srv_data_file_sizes[i]
>> (20 - UNIV_PAGE_SIZE_SHIFT)));
- fprintf(stderr,
- "InnoDB: Database physically writes the"
- " file full: wait...\n");
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Database physically writes the"
+ " file full: wait...");
ret = os_file_set_size(
name, files[i],
@@ -963,9 +1028,10 @@ skip_size_check:
<< UNIV_PAGE_SIZE_SHIFT);
if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in creating %s:"
- " probably out of disk space\n", name);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Error in creating %s: "
+ "probably out of disk space",
+ name);
return(DB_ERROR);
}
@@ -983,8 +1049,10 @@ skip_size_check:
ut_a(fil_validate());
- fil_node_create(name, srv_data_file_sizes[i], 0,
- srv_data_file_is_raw_partition[i] != 0);
+ if (!fil_node_create(name, srv_data_file_sizes[i], 0,
+ srv_data_file_is_raw_partition[i] != 0)) {
+ return(DB_ERROR);
+ }
}
return(DB_SUCCESS);
@@ -994,7 +1062,7 @@ skip_size_check:
Create undo tablespace.
@return DB_SUCCESS or error code */
static
-enum db_err
+dberr_t
srv_undo_tablespace_create(
/*=======================*/
const char* name, /*!< in: tablespace name */
@@ -1002,48 +1070,55 @@ srv_undo_tablespace_create(
{
os_file_t fh;
ibool ret;
- enum db_err err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
os_file_create_subdirs_if_needed(name);
fh = os_file_create(
- innodb_file_data_key, name, OS_FILE_CREATE,
+ innodb_file_data_key,
+ name,
+ srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
OS_FILE_NORMAL, OS_DATA_FILE, &ret);
- if (ret == FALSE
- && os_file_get_last_error(FALSE) != OS_FILE_ALREADY_EXISTS
+ if (srv_read_only_mode && ret) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "%s opened in read-only mode", name);
+ } else if (ret == FALSE
+ && os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have
- errno set to 0 here, which causes our function
- to return 100; work around that AIX problem */
- && os_file_get_last_error(FALSE) != 100
-#endif
+ /* AIX 5.1 after security patch ML7 may have
+ errno set to 0 here, which causes our function
+ to return 100; work around that AIX problem */
+ && os_file_get_last_error(false) != 100
+#endif /* UNIV_AIX */
) {
- fprintf(stderr, "InnoDB: Error in creating %s\n", name);
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Can't create UNDO tablespace %s", name);
err = DB_ERROR;
} else {
+ ut_a(!srv_read_only_mode);
+
/* We created the data file and now write it full of zeros */
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Data file %s did not"
- " exist: new to be created\n", name);
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Data file %s did not exist: new to be created",
+ name);
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Setting file %s size to %lu MB\n",
- name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Setting file %s size to %lu MB",
+ name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Database physically writes the"
- " file full: wait...\n");
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Database physically writes the file full: wait...");
ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
if (!ret) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error in creating %s:"
- " probably out of disk space\n", name);
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Error in creating %s: probably out of "
+ "disk space", name);
err = DB_ERROR;
}
@@ -1058,17 +1133,25 @@ srv_undo_tablespace_create(
Open an undo tablespace.
@return DB_SUCCESS or error code */
static
-enum db_err
+dberr_t
srv_undo_tablespace_open(
/*=====================*/
const char* name, /*!< in: tablespace name */
ulint space) /*!< in: tablespace id */
{
os_file_t fh;
- enum db_err err;
+ dberr_t err = DB_ERROR;
ibool ret;
ulint flags;
+ if (!srv_file_check_mode(name)) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "UNDO tablespaces must be %s!",
+ srv_read_only_mode ? "writable" : "readable");
+
+ return(DB_ERROR);
+ }
+
fh = os_file_create(
innodb_file_data_key, name,
OS_FILE_OPEN_RETRY
@@ -1082,7 +1165,6 @@ srv_undo_tablespace_open(
if (ret) {
os_offset_t size;
- os_offset_t n_pages;
size = os_file_get_size(fh);
ut_a(size != (os_offset_t) -1);
@@ -1105,17 +1187,15 @@ srv_undo_tablespace_open(
ut_a(fil_validate());
- n_pages = size / UNIV_PAGE_SIZE;
+ os_offset_t n_pages = size / UNIV_PAGE_SIZE;
/* On 64 bit Windows ulint can be 32 bit and os_offset_t
is 64 bit. It is OK to cast the n_pages to ulint because
the unit has been scaled to pages and they are always
32 bit. */
- fil_node_create(name, (ulint) n_pages, space, FALSE);
-
- err = DB_SUCCESS;
- } else {
- err = DB_ERROR;
+ if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
+ err = DB_SUCCESS;
+ }
}
return(err);
@@ -1125,20 +1205,25 @@ srv_undo_tablespace_open(
Opens the configured number of undo tablespaces.
@return DB_SUCCESS or error code */
static
-enum db_err
+dberr_t
srv_undo_tablespaces_init(
/*======================*/
ibool create_new_db, /*!< in: TRUE if new db being
created */
- const ulint n_conf_tablespaces) /*!< in: configured undo
+ const ulint n_conf_tablespaces, /*!< in: configured undo
tablespaces */
+ ulint* n_opened) /*!< out: number of UNDO
+ tablespaces successfully
+ discovered and opened */
{
ulint i;
- enum db_err err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
ulint prev_space_id = 0;
ulint n_undo_tablespaces;
ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
+ *n_opened = 0;
+
ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
@@ -1164,10 +1249,10 @@ srv_undo_tablespaces_init(
name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Could not create "
- "undo tablespace '%s'.\n", name);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Could not create undo tablespace '%s'.",
+ name);
return(err);
}
@@ -1217,15 +1302,16 @@ srv_undo_tablespaces_init(
err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error opening undo "
- "tablespace %s.\n", name);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unable to open undo tablespace '%s'.", name);
return(err);
}
prev_space_id = undo_tablespace_ids[i];
+
+ ++*n_opened;
}
/* Open any extra unused undo tablespaces. These must be contiguous.
@@ -1248,6 +1334,8 @@ srv_undo_tablespaces_init(
}
++n_undo_tablespaces;
+
+ ++*n_opened;
}
/* If the user says that there are fewer than what we find we
@@ -1275,13 +1363,17 @@ srv_undo_tablespaces_init(
"value is %lu\n", n_undo_tablespaces);
return(err != DB_SUCCESS ? err : DB_ERROR);
- }
- if (n_undo_tablespaces > 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Opened %lu undo tablespaces\n",
- n_conf_tablespaces);
+ } else if (n_undo_tablespaces > 0) {
+
+ ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
+ n_undo_tablespaces);
+
+ if (n_conf_tablespaces == 0) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Using the system tablespace for all UNDO "
+ "logging because innodb_undo_tablespaces=0");
+ }
}
if (create_new_db) {
@@ -1303,18 +1395,51 @@ srv_undo_tablespaces_init(
}
/********************************************************************
+Wait for the purge thread(s) to start up. */
+static
+void
+srv_start_wait_for_purge_to_start()
+/*===============================*/
+{
+ /* Wait for the purge coordinator and master thread to startup. */
+
+ purge_state_t state = trx_purge_state();
+
+ ut_a(state != PURGE_STATE_DISABLED);
+
+ while (srv_shutdown_state == SRV_SHUTDOWN_NONE
+ && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
+ && state == PURGE_STATE_INIT) {
+
+ switch (state = trx_purge_state()) {
+ case PURGE_STATE_RUN:
+ case PURGE_STATE_STOP:
+ break;
+
+ case PURGE_STATE_INIT:
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Waiting for purge to start");
+
+ os_thread_sleep(50000);
+ break;
+
+ case PURGE_STATE_EXIT:
+ case PURGE_STATE_DISABLED:
+ ut_error;
+ }
+ }
+}
+
+/********************************************************************
Starts InnoDB and creates a new database if database files
are not found and the user wants.
@return DB_SUCCESS or error code */
UNIV_INTERN
-int
+dberr_t
innobase_start_or_create_for_mysql(void)
/*====================================*/
{
ibool create_new_db;
- ibool log_file_created;
- ibool log_created = FALSE;
- ibool log_opened = FALSE;
lsn_t min_flushed_lsn;
lsn_t max_flushed_lsn;
#ifdef UNIV_LOG_ARCHIVE
@@ -1324,11 +1449,19 @@ innobase_start_or_create_for_mysql(void)
ulint sum_of_new_sizes;
ulint sum_of_data_file_sizes;
ulint tablespace_size_in_header;
- ulint err;
- ulint i;
+ dberr_t err;
+ unsigned i;
+ ulint srv_n_log_files_found = srv_n_log_files;
ulint io_limit;
mtr_t mtr;
ib_bh_t* ib_bh;
+ char logfilename[10000];
+ char* logfile0 = NULL;
+ size_t dirnamelen;
+
+ if (srv_read_only_mode) {
+ ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
+ }
#ifdef HAVE_DARWIN_THREADS
# ifdef F_FULLFSYNC
@@ -1422,31 +1555,34 @@ innobase_start_or_create_for_mysql(void)
" InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
#endif
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: The InnoDB memory heap is disabled\n");
+ if (srv_use_sys_malloc) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "The InnoDB memory heap is disabled");
}
#if defined(COMPILER_HINTS_ENABLED)
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Compiler hints enabled.\n");
+ ib_logf(IB_LOG_LEVEL_INFO,
+ " InnoDB: Compiler hints enabled.");
#endif /* defined(COMPILER_HINTS_ENABLED) */
- ut_print_timestamp(stderr);
- fputs(" InnoDB: " IB_ATOMICS_STARTUP_MSG "\n", stderr);
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "" IB_ATOMICS_STARTUP_MSG "");
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Compressed tables use zlib " ZLIB_VERSION
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Compressed tables use zlib " ZLIB_VERSION
#ifdef UNIV_ZIP_DEBUG
" with validation"
#endif /* UNIV_ZIP_DEBUG */
- "\n" , stderr);
+ );
#ifdef UNIV_ZIP_COPY
- ut_print_timestamp(stderr);
- fputs(" InnoDB: and extra copying\n", stderr);
+ ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
#endif /* UNIV_ZIP_COPY */
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "CPU %s crc32 instructions",
+ ut_crc32_sse2_enabled ? "supports" : "does not support");
+
/* Since InnoDB does not currently clean up all its internal data
structures in MySQL Embedded Server Library server_end(), we
print an error message if someone tries to start up InnoDB a
@@ -1505,17 +1641,14 @@ innobase_start_or_create_for_mysql(void)
#elif defined(LINUX_NATIVE_AIO)
if (srv_use_native_aio) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Using Linux native AIO\n");
+ ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
}
#else
/* Currently native AIO is supported only on windows and linux
and that also when the support is compiled in. In all other
cases, we ignore the setting of innodb_use_native_aio. */
srv_use_native_aio = FALSE;
-
-#endif
+#endif /* __WIN__ */
if (srv_file_flush_method_str == NULL) {
/* These are the default options */
@@ -1533,6 +1666,9 @@ innobase_start_or_create_for_mysql(void)
} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
+ srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
+
} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
@@ -1550,12 +1686,10 @@ innobase_start_or_create_for_mysql(void)
} else if (0 == ut_strcmp(srv_file_flush_method_str,
"async_unbuffered")) {
srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-#endif
+#endif /* __WIN__ */
} else {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Unrecognized value %s for"
- " innodb_flush_method\n",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unrecognized value %s for innodb_flush_method",
srv_file_flush_method_str);
return(DB_ERROR);
}
@@ -1580,74 +1714,93 @@ innobase_start_or_create_for_mysql(void)
srv_max_n_threads = 10000;
} else {
srv_buf_pool_instances = 1;
- srv_max_n_threads = 1000; /* saves several MB of memory,
- especially in 64-bit
- computers */
+
+ /* Saves several MB of memory, especially in
+ 64-bit computers */
+
+ srv_max_n_threads = 1000;
}
- err = srv_boot();
+ srv_boot();
- if (err != DB_SUCCESS) {
+ if (!srv_read_only_mode) {
- return((int) err);
- }
+ mutex_create(srv_monitor_file_mutex_key,
+ &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
- mutex_create(srv_monitor_file_mutex_key,
- &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
+ if (srv_innodb_status) {
- if (srv_innodb_status) {
+ srv_monitor_file_name = static_cast<char*>(
+ mem_alloc(
+ strlen(fil_path_to_mysql_datadir)
+ + 20 + sizeof "/innodb_status."));
- srv_monitor_file_name = static_cast<char*>(
- mem_alloc(
- strlen(fil_path_to_mysql_datadir)
- + 20 + sizeof "/innodb_status."));
+ sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
+ fil_path_to_mysql_datadir,
+ os_proc_get_number());
- sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
- fil_path_to_mysql_datadir, os_proc_get_number());
- srv_monitor_file = fopen(srv_monitor_file_name, "w+");
- if (!srv_monitor_file) {
- fprintf(stderr, "InnoDB: unable to create %s: %s\n",
- srv_monitor_file_name, strerror(errno));
- return(DB_ERROR);
+ srv_monitor_file = fopen(srv_monitor_file_name, "w+");
+
+ if (!srv_monitor_file) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unable to create %s: %s",
+ srv_monitor_file_name,
+ strerror(errno));
+
+ return(DB_ERROR);
+ }
+ } else {
+ srv_monitor_file_name = NULL;
+ srv_monitor_file = os_file_create_tmpfile();
+
+ if (!srv_monitor_file) {
+ return(DB_ERROR);
+ }
}
- } else {
- srv_monitor_file_name = NULL;
- srv_monitor_file = os_file_create_tmpfile();
- if (!srv_monitor_file) {
+
+ mutex_create(srv_dict_tmpfile_mutex_key,
+ &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
+
+ srv_dict_tmpfile = os_file_create_tmpfile();
+
+ if (!srv_dict_tmpfile) {
return(DB_ERROR);
}
- }
- mutex_create(srv_dict_tmpfile_mutex_key,
- &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
+ mutex_create(srv_misc_tmpfile_mutex_key,
+ &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
- srv_dict_tmpfile = os_file_create_tmpfile();
- if (!srv_dict_tmpfile) {
- return(DB_ERROR);
- }
+ srv_misc_tmpfile = os_file_create_tmpfile();
- mutex_create(srv_misc_tmpfile_mutex_key,
- &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
-
- srv_misc_tmpfile = os_file_create_tmpfile();
- if (!srv_misc_tmpfile) {
- return(DB_ERROR);
+ if (!srv_misc_tmpfile) {
+ return(DB_ERROR);
+ }
}
/* If user has set the value of innodb_file_io_threads then
we'll emit a message telling the user that this parameter
is now deprecated. */
if (srv_n_file_io_threads != 4) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Warning:"
- " innodb_file_io_threads is deprecated."
- " Please use innodb_read_io_threads and"
- " innodb_write_io_threads instead\n");
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "innodb_file_io_threads is deprecated. Please use "
+ "innodb_read_io_threads and innodb_write_io_threads "
+ "instead");
}
/* Now overwrite the value on srv_n_file_io_threads */
- srv_n_file_io_threads = 2 + srv_n_read_io_threads
- + srv_n_write_io_threads;
+ srv_n_file_io_threads = srv_n_read_io_threads;
+
+ if (!srv_read_only_mode) {
+ /* Add the log and ibuf IO threads. */
+ srv_n_file_io_threads += 2;
+ srv_n_file_io_threads += srv_n_write_io_threads;
+ } else {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Disabling background IO write threads.");
+
+ srv_n_write_io_threads = 0;
+ }
ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
@@ -1662,56 +1815,59 @@ innobase_start_or_create_for_mysql(void)
}
# endif /* __WIN__ */
- os_aio_init(io_limit,
- srv_n_read_io_threads,
- srv_n_write_io_threads,
- SRV_MAX_N_PENDING_SYNC_IOS);
+ if (!os_aio_init(io_limit,
+ srv_n_read_io_threads,
+ srv_n_write_io_threads,
+ SRV_MAX_N_PENDING_SYNC_IOS)) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Fatal : Cannot initialize AIO sub-system");
+
+ return(DB_ERROR);
+ }
fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
- /* Print time to initialize the buffer pool */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Initializing buffer pool, size =");
+ double size;
+ char unit;
if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
- fprintf(stderr,
- " %.1fG\n",
- ((double) srv_buf_pool_size) / (1024 * 1024 * 1024));
+ size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
+ unit = 'G';
} else {
- fprintf(stderr,
- " %.1fM\n",
- ((double) srv_buf_pool_size) / (1024 * 1024));
+ size = ((double) srv_buf_pool_size) / (1024 * 1024);
+ unit = 'M';
}
- err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
+ /* Print time to initialize the buffer pool */
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Initializing buffer pool, size = %.1f%c", size, unit);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Completed initialization of buffer pool\n");
+ err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Fatal error: cannot allocate memory"
- " for the buffer pool\n");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot allocate memory for the buffer pool");
return(DB_ERROR);
}
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Completed initialization of buffer pool");
+
#ifdef UNIV_DEBUG
/* We have observed deadlocks with a 5MB buffer pool but
the actual lower limit could very well be a little higher. */
if (srv_buf_pool_size <= 5 * 1024 * 1024) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Warning: Small buffer pool size "
- "(%luM), the flst_validate() debug function "
- "can cause a deadlock if the buffer pool fills up.\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Small buffer pool size (%luM), the flst_validate() "
+ "debug function can cause a deadlock if the "
+ "buffer pool fills up.",
srv_buf_pool_size / 1024 / 1024);
}
-#endif
+#endif /* UNIV_DEBUG */
fsp_init();
log_init();
@@ -1720,14 +1876,15 @@ innobase_start_or_create_for_mysql(void)
/* Create i/o-handler threads: */
- for (i = 0; i < srv_n_file_io_threads; i++) {
+ for (ulint i = 0; i < srv_n_file_io_threads; ++i) {
+
n[i] = i;
os_thread_create(io_handler_thread, n + i, thread_ids + i);
}
#ifdef UNIV_LOG_ARCHIVE
- if (0 != ut_strcmp(srv_log_group_home_dirs[0], srv_arch_dir)) {
+ if (0 != ut_strcmp(srv_log_group_home_dir, srv_arch_dir)) {
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Error: you must set the log group home dir in my.cnf\n");
ut_print_timestamp(stderr);
@@ -1738,16 +1895,14 @@ innobase_start_or_create_for_mysql(void)
#endif /* UNIV_LOG_ARCHIVE */
if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
- >= 549755813888ULL /* 512G */) {
+ >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
/* log_block_convert_lsn_to_no() limits the returned block
number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
bytes, then we have a limit of 512 GB. If that limit is to
be raised, then log_block_convert_lsn_to_no() must be
modified. */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: combined size of log files"
- " must be < 512 GB\n");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Combined size of log files must be < 512 GB");
return(DB_ERROR);
}
@@ -1759,7 +1914,6 @@ innobase_start_or_create_for_mysql(void)
So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
is 64 TB on 32 bit systems. */
- ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: combined size of log files"
" must be < %lu GB\n",
@@ -1791,10 +1945,8 @@ innobase_start_or_create_for_mysql(void)
}
if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: tablespace size must be"
- " at least 10 MB\n");
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Tablespace size must be at least 10 MB");
return(DB_ERROR);
}
@@ -1805,36 +1957,27 @@ innobase_start_or_create_for_mysql(void)
#endif /* UNIV_LOG_ARCHIVE */
&min_flushed_lsn, &max_flushed_lsn,
&sum_of_new_sizes);
- if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Could not open or create data files.\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: If you tried to add new data files,"
- " and it failed here,\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: you should now edit innodb_data_file_path"
- " in my.cnf back\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: to what it was, and remove the"
- " new ibdata files InnoDB created\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: in this failed attempt. InnoDB only wrote"
- " those files full of\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: zeros, but did not yet use them in any way."
- " But be careful: do not\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: remove old data files"
- " which contain your precious data!\n");
+ if (err == DB_FAIL) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "The system tablespace must be writable!");
+
+ return(DB_ERROR);
- return((int) err);
+ } else if (err != DB_SUCCESS) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Could not open or create the system tablespace. If "
+ "you tried to add new data files to the system "
+ "tablespace, and it failed here, you should now "
+ "edit innodb_data_file_path in my.cnf back to what "
+ "it was, and remove the new ibdata files InnoDB "
+ "created in this failed attempt. InnoDB only wrote "
+ "those files full of zeros, but did not yet use "
+ "them in any way. But be careful: do not remove "
+ "old data files which contain your precious data!");
+
+ return(err);
}
#ifdef UNIV_LOG_ARCHIVE
@@ -1842,125 +1985,199 @@ innobase_start_or_create_for_mysql(void)
srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
#endif /* UNIV_LOG_ARCHIVE */
- for (i = 0; i < srv_n_log_files; i++) {
- err = open_or_create_log_file(create_new_db, &log_file_created,
- log_opened, 0, i);
- if (err != DB_SUCCESS) {
+ dirnamelen = strlen(srv_log_group_home_dir);
+ ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
+ memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
- return((int) err);
- }
+ /* Add a path separator if needed. */
+ if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
+ logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
+ }
- if (log_file_created) {
- log_created = TRUE;
- } else {
- log_opened = TRUE;
+ srv_log_file_size_requested = srv_log_file_size;
+
+ if (create_new_db) {
+ bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
+ ut_a(success);
+
+ min_flushed_lsn = max_flushed_lsn = log_get_lsn();
+
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+
+ err = create_log_files(logfilename, dirnamelen,
+ max_flushed_lsn, logfile0);
+
+ if (err != DB_SUCCESS) {
+ return(err);
}
- if ((log_opened && create_new_db)
- || (log_opened && log_created)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: all log files must be"
- " created at the same time.\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: All log files must be"
- " created also in database creation.\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: If you want bigger or smaller"
- " log files, shut down the\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: database and make sure there"
- " were no errors in shutdown.\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Then delete the existing log files."
- " Edit the .cnf file\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: and start the database again.\n");
+ } else {
+ for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
+ os_offset_t size;
+ os_file_stat_t stat_info;
+
+ sprintf(logfilename + dirnamelen,
+ "ib_logfile%u", i);
+
+ err = os_file_get_status(
+ logfilename, &stat_info, false);
+
+ if (err == DB_NOT_FOUND) {
+ if (i == 0) {
+ if (max_flushed_lsn
+ != min_flushed_lsn) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot create"
+ " log files because"
+ " data files are"
+ " corrupt or"
+ " not in sync"
+ " with each other");
+ return(DB_ERROR);
+ }
+
+ if (max_flushed_lsn < (lsn_t) 1000) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot create"
+ " log files because"
+ " data files are"
+ " corrupt or the"
+ " database was not"
+ " shut down cleanly"
+ " after creating"
+ " the data files.");
+ return(DB_ERROR);
+ }
+
+ err = create_log_files(
+ logfilename, dirnamelen,
+ max_flushed_lsn, logfile0);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ create_log_files_rename(
+ logfilename, dirnamelen,
+ max_flushed_lsn, logfile0);
+
+ /* Suppress the message about
+ crash recovery. */
+ max_flushed_lsn = min_flushed_lsn
+ = log_get_lsn();
+ goto files_checked;
+ } else if (i < 2) {
+ /* must have at least 2 log files */
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Only one log file found.");
+ return(err);
+ }
- return(DB_ERROR);
+ /* opened all files */
+ break;
+ }
+
+ if (!srv_file_check_mode(logfilename)) {
+ return(DB_ERROR);
+ }
+
+ err = open_log_file(&files[i], logfilename, &size);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ ut_a(size != (os_offset_t) -1);
+
+ if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Log file %s size "
+ UINT64PF " is not a multiple of"
+ " innodb_page_size",
+ logfilename, size);
+ return(DB_ERROR);
+ }
+
+ size >>= UNIV_PAGE_SIZE_SHIFT;
+
+ if (i == 0) {
+ srv_log_file_size = size;
+ } else if (size != srv_log_file_size) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Log file %s is"
+ " of different size "UINT64PF" bytes"
+ " than other log"
+ " files "UINT64PF" bytes!",
+ logfilename,
+ size << UNIV_PAGE_SIZE_SHIFT,
+ (os_offset_t) srv_log_file_size
+ << UNIV_PAGE_SIZE_SHIFT);
+ return(DB_ERROR);
+ }
}
- }
- /* Open all log files and data files in the system tablespace: we
- keep them open until database shutdown */
+ srv_n_log_files_found = i;
- fil_open_log_and_system_tablespace_files();
+ /* Create the in-memory file space objects. */
- err = srv_undo_tablespaces_init(create_new_db, srv_undo_tablespaces);
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
- /* If the force recovery is set very high then we carry on regardless
- of all errors. Basically this is fingers crossed mode. */
+ fil_space_create(logfilename,
+ SRV_LOG_SPACE_FIRST_ID,
+ fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
+ FIL_LOG);
- if (err != DB_SUCCESS
- && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
+ ut_a(fil_validate());
- return((int) err);
- }
+ /* srv_log_file_size is measured in pages; if page size is 16KB,
+ then we have a limit of 64TB on 32 bit systems */
+ ut_a(srv_log_file_size <= ULINT_MAX);
+
+ for (unsigned j = 0; j < i; j++) {
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
+
+ if (!fil_node_create(logfilename,
+ (ulint) srv_log_file_size,
+ SRV_LOG_SPACE_FIRST_ID, FALSE)) {
+ return(DB_ERROR);
+ }
+ }
- if (log_created && !create_new_db
-#ifdef UNIV_LOG_ARCHIVE
- && !srv_archive_recovery
-#endif /* UNIV_LOG_ARCHIVE */
- ) {
- if (max_flushed_lsn != min_flushed_lsn
#ifdef UNIV_LOG_ARCHIVE
- || max_arch_log_no != min_arch_log_no
+ /* Create the file space object for archived logs. Under
+ MySQL, no archiving ever done. */
+ fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
+ 0, FIL_LOG);
#endif /* UNIV_LOG_ARCHIVE */
- ) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot initialize created"
- " log files because\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: data files were not in sync"
- " with each other\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: or the data files are corrupt.\n");
+ log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
+ SRV_LOG_SPACE_FIRST_ID,
+ SRV_LOG_SPACE_FIRST_ID + 1);
+ }
- return(DB_ERROR);
- }
+files_checked:
+ /* Open all log files and data files in the system
+ tablespace: we keep them open until database
+ shutdown */
- if (max_flushed_lsn < (lsn_t) 1000) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot initialize created"
- " log files because\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: data files are corrupt,"
- " or new data files were\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: created when the database"
- " was started previous\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: time but the database"
- " was not shut down\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: normally after that.\n");
+ fil_open_log_and_system_tablespace_files();
- return(DB_ERROR);
- }
+ err = srv_undo_tablespaces_init(
+ create_new_db,
+ srv_undo_tablespaces,
+ &srv_undo_tablespaces_open);
- mutex_enter(&(log_sys->mutex));
+ /* If the force recovery is set very high then we carry on regardless
+ of all errors. Basically this is fingers crossed mode. */
-#ifdef UNIV_LOG_ARCHIVE
- /* Do not + 1 arch_log_no because we do not use log
- archiving */
- recv_reset_logs(max_flushed_lsn, max_arch_log_no, TRUE);
-#else
- recv_reset_logs(max_flushed_lsn, TRUE);
-#endif /* UNIV_LOG_ARCHIVE */
+ if (err != DB_SUCCESS
+ && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
- mutex_exit(&(log_sys->mutex));
+ return(err);
+ }
+
+ /* Initialize objects used by dict stats gathering thread, which
+ can also be used by recovery if it tries to drop some table */
+ if (!srv_read_only_mode) {
+ dict_stats_thread_init();
}
trx_sys_file_format_init();
@@ -1968,6 +2185,9 @@ innobase_start_or_create_for_mysql(void)
trx_sys_create();
if (create_new_db) {
+
+ ut_a(!srv_read_only_mode);
+
mtr_start(&mtr);
fsp_header_init(0, sum_of_new_sizes, &mtr);
@@ -1987,16 +2207,34 @@ innobase_start_or_create_for_mysql(void)
trx_purge_sys_create(srv_n_purge_threads, ib_bh);
- dict_create();
+ err = dict_create();
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
srv_startup_is_before_trx_rollback_phase = FALSE;
+ bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
+ ut_a(success);
+
+ min_flushed_lsn = max_flushed_lsn = log_get_lsn();
+
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+
+ /* Stamp the LSN to the data files. */
+ fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0);
+
+ fil_flush_file_spaces(FIL_TABLESPACE);
+
+ create_log_files_rename(logfilename, dirnamelen,
+ max_flushed_lsn, logfile0);
#ifdef UNIV_LOG_ARCHIVE
} else if (srv_archive_recovery) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Starting archive"
- " recovery from a backup...\n");
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ " Starting archive recovery from a backup...");
+
err = recv_recovery_from_archive_start(
min_flushed_lsn, srv_archive_recovery_limit_lsn,
min_arch_log_no);
@@ -2007,7 +2245,11 @@ innobase_start_or_create_for_mysql(void)
/* Since ibuf init is in dict_boot, and ibuf is needed
in any disk i/o, first call dict_boot */
- dict_boot();
+ err = dict_boot();
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
ib_bh = trx_sys_init_at_db_start();
@@ -2051,10 +2293,10 @@ innobase_start_or_create_for_mysql(void)
/* We always try to do a recovery, even if the database had
been shut down normally: this is the normal startup path */
- err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
- IB_ULONGLONG_MAX,
- min_flushed_lsn,
- max_flushed_lsn);
+ err = recv_recovery_from_checkpoint_start(
+ LOG_CHECKPOINT, IB_ULONGLONG_MAX,
+ min_flushed_lsn, max_flushed_lsn);
+
if (err != DB_SUCCESS) {
return(DB_ERROR);
@@ -2066,7 +2308,11 @@ innobase_start_or_create_for_mysql(void)
to access space 0, and the insert buffer at this stage already
works for space 0. */
- dict_boot();
+ err = dict_boot();
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
ib_bh = trx_sys_init_at_db_start();
@@ -2079,6 +2325,7 @@ innobase_start_or_create_for_mysql(void)
are initialized in trx_sys_init_at_db_start(). */
recv_recovery_from_checkpoint_finish();
+
if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
/* The following call is necessary for the insert
buffer to work with multiple tablespaces. We must
@@ -2100,6 +2347,90 @@ innobase_start_or_create_for_mysql(void)
recv_needed_recovery);
}
+ if (!srv_force_recovery
+ && !recv_sys->found_corrupt_log
+ && (srv_log_file_size_requested != srv_log_file_size
+ || srv_n_log_files_found != srv_n_log_files)) {
+ /* Prepare to replace the redo log files. */
+
+ if (srv_read_only_mode) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot resize log files "
+ "in read-only mode.");
+ return(DB_READ_ONLY);
+ }
+
+ /* Clean the buffer pool. */
+ bool success = buf_flush_list(
+ ULINT_MAX, LSN_MAX, NULL);
+ ut_a(success);
+
+ RECOVERY_CRASH(1);
+
+ min_flushed_lsn = max_flushed_lsn = log_get_lsn();
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Resizing redo log from %u*%u to %u*%u pages"
+ ", LSN=" LSN_PF,
+ (unsigned) i,
+ (unsigned) srv_log_file_size,
+ (unsigned) srv_n_log_files,
+ (unsigned) srv_log_file_size_requested,
+ max_flushed_lsn);
+
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+
+ RECOVERY_CRASH(2);
+
+ /* Flush the old log files. */
+ log_buffer_flush_to_disk();
+ /* If innodb_flush_method=O_DSYNC,
+ we need to explicitly flush the log buffers. */
+ fil_flush(SRV_LOG_SPACE_FIRST_ID);
+
+ ut_ad(max_flushed_lsn == log_get_lsn());
+
+ /* Prohibit redo log writes from any other
+ threads until creating a log checkpoint at the
+ end of create_log_files(). */
+ ut_d(recv_no_log_write = TRUE);
+ ut_ad(!buf_pool_check_no_pending_io());
+
+ RECOVERY_CRASH(3);
+
+ /* Stamp the LSN to the data files. */
+ fil_write_flushed_lsn_to_data_files(
+ max_flushed_lsn, 0);
+
+ fil_flush_file_spaces(FIL_TABLESPACE);
+
+ RECOVERY_CRASH(4);
+
+ /* Close and free the redo log files, so that
+ we can replace them. */
+ fil_close_log_files(true);
+
+ RECOVERY_CRASH(5);
+
+ /* Free the old log file space. */
+ log_group_close_all();
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Starting to delete and rewrite log files.");
+
+ srv_log_file_size = srv_log_file_size_requested;
+
+ err = create_log_files(logfilename, dirnamelen,
+ max_flushed_lsn, logfile0);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ create_log_files_rename(logfilename, dirnamelen,
+ max_flushed_lsn, logfile0);
+ }
+
srv_startup_is_before_trx_rollback_phase = FALSE;
recv_recovery_rollback_active();
@@ -2181,31 +2512,39 @@ innobase_start_or_create_for_mysql(void)
if (srv_available_undo_logs == ULINT_UNDEFINED) {
/* Can only happen if force recovery is set. */
- ut_a(srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
+ ut_a(srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
+ || srv_read_only_mode);
srv_undo_logs = ULONG_UNDEFINED;
}
- /* Create the thread which watches the timeouts for lock waits */
- os_thread_create(
- lock_wait_timeout_thread,
- NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
-
- /* Create the thread which warns of long semaphore waits */
- os_thread_create(
- srv_error_monitor_thread,
- NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
+ if (!srv_read_only_mode) {
+ /* Create the thread which watches the timeouts
+ for lock waits */
+ os_thread_create(
+ lock_wait_timeout_thread,
+ NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
- /* Create the thread which prints InnoDB monitor info */
- os_thread_create(
- srv_monitor_thread,
- NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
+ /* Create the thread which warns of long semaphore waits */
+ os_thread_create(
+ srv_error_monitor_thread,
+ NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
- srv_is_being_started = FALSE;
+ /* Create the thread which prints InnoDB monitor info */
+ os_thread_create(
+ srv_monitor_thread,
+ NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
+ }
/* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
err = dict_create_or_check_foreign_constraint_tables();
if (err != DB_SUCCESS) {
- return((int)DB_ERROR);
+ return(err);
+ }
+
+ /* Create the SYS_TABLESPACES system table */
+ err = dict_create_or_check_sys_tablespace();
+ if (err != DB_SUCCESS) {
+ return(err);
}
srv_is_being_started = FALSE;
@@ -2215,11 +2554,15 @@ innobase_start_or_create_for_mysql(void)
/* Create the master thread which does purge and other utility
operations */
- os_thread_create(
- srv_master_thread,
- NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
+ if (!srv_read_only_mode) {
+
+ os_thread_create(
+ srv_master_thread,
+ NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
+ }
- if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
+ if (!srv_read_only_mode
+ && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
os_thread_create(
srv_purge_coordinator_thread,
@@ -2234,35 +2577,15 @@ innobase_start_or_create_for_mysql(void)
srv_worker_thread, NULL,
thread_ids + 5 + i + SRV_MAX_N_IO_THREADS);
}
- }
-
- os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
-
- /* Wait for the purge coordinator and master thread to startup. */
-
- purge_state_t state = trx_purge_state();
- while (srv_shutdown_state == SRV_SHUTDOWN_NONE
- && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
- && state == PURGE_STATE_INIT) {
-
- switch (state = trx_purge_state()) {
- case PURGE_STATE_RUN:
- case PURGE_STATE_STOP:
- break;
-
- case PURGE_STATE_INIT:
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: "
- "Waiting for the background threads to "
- "start\n");
+ srv_start_wait_for_purge_to_start();
- os_thread_sleep(50000);
- break;
+ } else {
+ purge_sys->state = PURGE_STATE_DISABLED;
+ }
- case PURGE_STATE_EXIT:
- ut_error;
- }
+ if (!srv_read_only_mode) {
+ os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
}
#ifdef UNIV_DEBUG
@@ -2276,7 +2599,8 @@ innobase_start_or_create_for_mysql(void)
tablespace_size_in_header = fsp_header_get_tablespace_size();
- if (!srv_auto_extend_last_data_file
+ if (!srv_read_only_mode
+ && !srv_auto_extend_last_data_file
&& sum_of_data_file_sizes != tablespace_size_in_header) {
ut_print_timestamp(stderr);
@@ -2319,7 +2643,8 @@ innobase_start_or_create_for_mysql(void)
}
}
- if (srv_auto_extend_last_data_file
+ if (!srv_read_only_mode
+ && srv_auto_extend_last_data_file
&& sum_of_data_file_sizes < tablespace_size_in_header) {
ut_print_timestamp(stderr);
@@ -2383,23 +2708,17 @@ innobase_start_or_create_for_mysql(void)
os_fast_mutex_free(&srv_os_test_mutex);
if (srv_print_verbose_log) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: %s started; "
- "log sequence number " LSN_PF "\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "%s started; log sequence number " LSN_PF "",
INNODB_VERSION_STR, srv_start_lsn);
}
if (srv_force_recovery > 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: !!! innodb_force_recovery"
- " is set to %lu !!!\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "!!! innodb_force_recovery is set to %lu !!!",
(ulong) srv_force_recovery);
}
- fflush(stderr);
-
if (srv_force_recovery == 0) {
/* In the insert buffer we may have even bigger tablespace
id's, because we may have dropped those tablespaces, but
@@ -2409,16 +2728,20 @@ innobase_start_or_create_for_mysql(void)
ibuf_update_max_tablespace_id();
}
- /* Create the buffer pool dump/load thread */
- os_thread_create(buf_dump_thread, NULL, NULL);
+ if (!srv_read_only_mode) {
+ /* Create the buffer pool dump/load thread */
+ os_thread_create(buf_dump_thread, NULL, NULL);
- srv_was_started = TRUE;
+ /* Create the dict stats gathering thread */
+ os_thread_create(dict_stats_thread, NULL, NULL);
- /* Create the thread that will optimize the FTS sub-system
- in a separate background thread. */
- fts_optimize_init();
+ /* Create the thread that will optimize the FTS sub-system. */
+ fts_optimize_init();
+ }
- return((int) DB_SUCCESS);
+ srv_was_started = TRUE;
+
+ return(DB_SUCCESS);
}
#if 0
@@ -2455,27 +2778,28 @@ srv_fts_close(void)
Shuts down the InnoDB database.
@return DB_SUCCESS or error code */
UNIV_INTERN
-int
+dberr_t
innobase_shutdown_for_mysql(void)
/*=============================*/
{
ulint i;
+
if (!srv_was_started) {
if (srv_is_being_started) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: shutting down"
- " a not properly started\n"
- "InnoDB: or created database!\n");
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Shutting down an improperly started, "
+ "or created database!");
}
return(DB_SUCCESS);
}
- /* Shutdown the FTS optimize sub system. */
- fts_optimize_start_shutdown();
+ if (!srv_read_only_mode) {
+ /* Shutdown the FTS optimize sub system. */
+ fts_optimize_start_shutdown();
- fts_optimize_end();
+ fts_optimize_end();
+ }
/* 1. Flush the buffer pool to disk, write the current lsn to
the tablespace header(s), and copy all log data to archive.
@@ -2485,18 +2809,12 @@ innobase_shutdown_for_mysql(void)
logs_empty_and_mark_files_at_shutdown();
if (srv_conc_get_active_threads() != 0) {
- fprintf(stderr,
- "InnoDB: Warning: query counter shows %ld queries"
- " still\n"
- "InnoDB: inside InnoDB at shutdown\n",
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Query counter shows %ld queries still "
+ "inside InnoDB at shutdown",
srv_conc_get_active_threads());
}
- /* This functionality will be used by WL#5522. */
- ut_a(trx_purge_state() == PURGE_STATE_RUN
- || trx_purge_state() == PURGE_STATE_EXIT
- || srv_force_recovery >= SRV_FORCE_NO_BACKGROUND);
-
/* 2. Make all threads created by InnoDB to exit */
srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
@@ -2509,22 +2827,28 @@ innobase_shutdown_for_mysql(void)
/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
HERE OR EARLIER */
- /* a. Let the lock timeout thread exit */
- os_event_set(srv_timeout_event);
+ if (!srv_read_only_mode) {
+ /* a. Let the lock timeout thread exit */
+ os_event_set(lock_sys->timeout_event);
- /* b. srv error monitor thread exits automatically, no need
- to do anything here */
+ /* b. srv error monitor thread exits automatically,
+ no need to do anything here */
- /* c. We wake the master thread so that it exits */
- srv_wake_master_thread();
+ /* c. We wake the master thread so that it exits */
+ srv_wake_master_thread();
- /* d. Wakeup purge threads. */
- srv_purge_wakeup();
+ /* d. Wakeup purge threads. */
+ srv_purge_wakeup();
+ }
/* e. Exit the i/o threads */
os_aio_wake_all_threads_at_shutdown();
+ /* f. dict_stats_thread is signaled from
+ logs_empty_and_mark_files_at_shutdown() and should have
+ already quit or is quitting right now. */
+
os_mutex_enter(os_sync_mutex);
if (os_thread_count == 0) {
@@ -2549,9 +2873,9 @@ innobase_shutdown_for_mysql(void)
}
if (i == 1000) {
- fprintf(stderr,
- "InnoDB: Warning: %lu threads created by InnoDB"
- " had not exited at shutdown!\n",
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "%lu threads created by InnoDB"
+ " had not exited at shutdown!",
(ulong) os_thread_count);
}
@@ -2563,6 +2887,7 @@ innobase_shutdown_for_mysql(void)
mem_free(srv_monitor_file_name);
}
}
+
if (srv_dict_tmpfile) {
fclose(srv_dict_tmpfile);
srv_dict_tmpfile = 0;
@@ -2573,6 +2898,10 @@ innobase_shutdown_for_mysql(void)
srv_misc_tmpfile = 0;
}
+ if (!srv_read_only_mode) {
+ dict_stats_thread_deinit();
+ }
+
/* This must be disabled before closing the buffer pool
and closing the data dictionary. */
btr_search_disable();
@@ -2583,9 +2912,14 @@ innobase_shutdown_for_mysql(void)
trx_sys_file_format_close();
trx_sys_close();
- mutex_free(&srv_monitor_file_mutex);
- mutex_free(&srv_dict_tmpfile_mutex);
- mutex_free(&srv_misc_tmpfile_mutex);
+ /* We don't create these mutexes in RO mode because we don't create
+ the temp files that the cover. */
+ if (!srv_read_only_mode) {
+ mutex_free(&srv_monitor_file_mutex);
+ mutex_free(&srv_dict_tmpfile_mutex);
+ mutex_free(&srv_misc_tmpfile_mutex);
+ }
+
dict_close();
btr_search_sys_free();
@@ -2594,6 +2928,7 @@ innobase_shutdown_for_mysql(void)
os_aio_free();
que_close();
row_mysql_close();
+ srv_mon_free();
sync_close();
srv_free();
fil_close();
@@ -2618,11 +2953,10 @@ innobase_shutdown_for_mysql(void)
|| os_event_count != 0
|| os_mutex_count != 0
|| os_fast_mutex_count != 0) {
- fprintf(stderr,
- "InnoDB: Warning: some resources were not"
- " cleaned up in shutdown:\n"
- "InnoDB: threads %lu, events %lu,"
- " os_mutexes %lu, os_fast_mutexes %lu\n",
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Some resources were not cleaned up in shutdown: "
+ "threads %lu, events %lu, os_mutexes %lu, "
+ "os_fast_mutexes %lu",
(ulong) os_thread_count, (ulong) os_event_count,
(ulong) os_mutex_count, (ulong) os_fast_mutex_count);
}
@@ -2632,17 +2966,15 @@ innobase_shutdown_for_mysql(void)
}
if (srv_print_verbose_log) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Shutdown completed;"
- " log sequence number " LSN_PF "\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Shutdown completed; log sequence number " LSN_PF "",
srv_shutdown_lsn);
}
srv_was_started = FALSE;
srv_start_has_been_called = FALSE;
- return((int) DB_SUCCESS);
+ return(DB_SUCCESS);
}
#endif /* !UNIV_HOTBACKUP */
@@ -2650,7 +2982,7 @@ innobase_shutdown_for_mysql(void)
/********************************************************************
Signal all per-table background threads to shutdown, and wait for them to do
so. */
-
+UNIV_INTERN
void
srv_shutdown_table_bg_threads(void)
/*===============================*/
@@ -2723,3 +3055,48 @@ srv_shutdown_table_bg_threads(void)
table = next;
}
}
+
+/*****************************************************************//**
+Get the meta-data filename from the table name. */
+UNIV_INTERN
+void
+srv_get_meta_data_filename(
+/*=======================*/
+ dict_table_t* table, /*!< in: table */
+ char* filename, /*!< out: filename */
+ ulint max_len) /*!< in: filename max length */
+{
+ ulint len;
+ char* path;
+ char* suffix;
+ static const ulint suffix_len = strlen(".cfg");
+
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ dict_get_and_save_data_dir_path(table, false);
+ ut_a(table->data_dir_path);
+
+ path = os_file_make_remote_pathname(
+ table->data_dir_path, table->name, "cfg");
+ } else {
+ path = fil_make_ibd_name(table->name, false);
+ }
+
+ ut_a(path);
+ len = ut_strlen(path);
+ ut_a(max_len >= len);
+
+ suffix = path + (len - suffix_len);
+ if (strncmp(suffix, ".cfg", suffix_len) == 0) {
+ strcpy(filename, path);
+ } else {
+ ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0);
+
+ strncpy(filename, path, len - suffix_len);
+ suffix = filename + (len - suffix_len);
+ strcpy(suffix, ".cfg");
+ }
+
+ mem_free(path);
+
+ srv_normalize_path_for_win(filename);
+}
diff --git a/storage/innobase/sync/sync0arr.cc b/storage/innobase/sync/sync0arr.cc
index b90a5f29589..749258021f7 100644
--- a/storage/innobase/sync/sync0arr.cc
+++ b/storage/innobase/sync/sync0arr.cc
@@ -39,6 +39,7 @@ Created 9/5/1995 Heikki Tuuri
#include "sync0rw.h"
#include "os0sync.h"
#include "os0file.h"
+#include "lock0lock.h"
#include "srv0srv.h"
#include "ha_prototypes.h"
@@ -78,11 +79,11 @@ any waiting threads who have missed the signal. */
/** A cell where an individual thread may wait suspended
until a resource is released. The suspending is implemented
using an operating system event semaphore. */
-struct sync_cell_struct {
+struct sync_cell_t {
void* wait_object; /*!< pointer to the object the
thread is waiting for; if NULL
the cell is free for use */
- mutex_t* old_wait_mutex; /*!< the latest wait mutex in cell */
+ ib_mutex_t* old_wait_mutex; /*!< the latest wait mutex in cell */
rw_lock_t* old_wait_rw_lock;
/*!< the latest wait rw-lock
in cell */
@@ -116,15 +117,15 @@ all changes (set or reset) to the state of the event must be made
while owning the mutex. */
/** Synchronization array */
-struct sync_array_struct {
+struct sync_array_t {
ulint n_reserved; /*!< number of currently reserved
cells in the wait array */
ulint n_cells; /*!< number of cells in the
wait array */
sync_cell_t* array; /*!< pointer to wait array */
- mutex_t mutex; /*!< possible database mutex
+ ib_mutex_t mutex; /*!< possible database mutex
protecting this data structure */
- os_mutex_t os_mutex; /*!< Possible operating system mutex
+ os_ib_mutex_t os_mutex; /*!< Possible operating system mutex
protecting the data structure.
As this data structure is used in
constructing the database mutex,
@@ -293,7 +294,7 @@ sync_cell_get_event(
ulint type = cell->request_type;
if (type == SYNC_MUTEX) {
- return(((mutex_t*) cell->wait_object)->event);
+ return(((ib_mutex_t*) cell->wait_object)->event);
} else if (type == RW_LOCK_WAIT_EX) {
return(((rw_lock_t*) cell->wait_object)->wait_ex_event);
} else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
@@ -434,7 +435,7 @@ sync_array_cell_print(
FILE* file, /*!< in: file where to print */
sync_cell_t* cell) /*!< in: sync cell */
{
- mutex_t* mutex;
+ ib_mutex_t* mutex;
rw_lock_t* rwlock;
ulint type;
ulint writer;
@@ -600,7 +601,7 @@ sync_array_detect_deadlock(
sync_cell_t* cell, /*!< in: cell to search */
ulint depth) /*!< in: recursion depth */
{
- mutex_t* mutex;
+ ib_mutex_t* mutex;
rw_lock_t* lock;
os_thread_id_t thread;
ibool ret;
@@ -622,7 +623,7 @@ sync_array_detect_deadlock(
if (cell->request_type == SYNC_MUTEX) {
- mutex = static_cast<mutex_t*>(cell->wait_object);
+ mutex = static_cast<ib_mutex_t*>(cell->wait_object);
if (mutex_get_lock_word(mutex) != 0) {
@@ -736,7 +737,7 @@ sync_arr_cell_can_wake_up(
/*======================*/
sync_cell_t* cell) /*!< in: cell to search */
{
- mutex_t* mutex;
+ ib_mutex_t* mutex;
rw_lock_t* lock;
if (cell->request_type == SYNC_MUTEX) {
@@ -902,6 +903,11 @@ sync_array_print_long_waits_low(
ibool fatal = FALSE;
double longest_diff = 0;
+ /* For huge tables, skip the check during CHECK TABLE etc... */
+ if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) {
+ return(FALSE);
+ }
+
#ifdef UNIV_DEBUG_VALGRIND
/* Increase the timeouts if running under valgrind because it executes
extremely slowly. UNIV_DEBUG_VALGRIND does not necessary mean that
@@ -1000,7 +1006,7 @@ sync_array_print_long_waits(
(ulong) os_file_n_pending_pwrites);
srv_print_innodb_monitor = TRUE;
- os_event_set(srv_timeout_event);
+ os_event_set(lock_sys->timeout_event);
os_thread_sleep(30000000);
diff --git a/storage/innobase/sync/sync0rw.cc b/storage/innobase/sync/sync0rw.cc
index dc6c510a3ed..823efecaf6b 100644
--- a/storage/innobase/sync/sync0rw.cc
+++ b/storage/innobase/sync/sync0rw.cc
@@ -57,11 +57,11 @@ lock_word == 0: Write locked
(-lock_word) is the number of readers
that hold the lock.
lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been
- decremented by X_LOCK_DECR once for each lock,
- so the number of locks is:
- ((-lock_word) / X_LOCK_DECR) + 1
-When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0:
-other values of lock_word are invalid.
+ decremented by X_LOCK_DECR for the first lock
+ and the first recursive lock, then by 1 for
+ each recursive lock thereafter.
+ So the number of locks is:
+ (lock_copy == 0) ? 1 : 2 - (lock_copy + X_LOCK_DECR)
The lock_word is always read and updated atomically and consistently, so that
it always represents the state of the lock, and the state of the lock changes
@@ -124,50 +124,21 @@ wait_ex_event: A thread may only wait on the wait_ex_event after it has
performed the following actions in order:
(1) Decrement lock_word by X_LOCK_DECR.
(2) Record counter value of wait_ex_event (os_event_reset,
- called from sync_array_reserve_cell).
+ called from sync_array_reserve_cell).
(3) Verify that lock_word < 0.
(1) must come first to ensures no other threads become reader
- or next writer, and notifies unlocker that signal must be sent.
- (2) must come before (3) to ensure the signal is not missed.
+ or next writer, and notifies unlocker that signal must be sent.
+ (2) must come before (3) to ensure the signal is not missed.
These restrictions force the above ordering.
Immediately before sending the wake-up signal, we should:
Verify lock_word == 0 (waiting thread holds x_lock)
*/
-
-/** number of spin waits on rw-latches,
-resulted during shared (read) locks */
-UNIV_INTERN ib_int64_t rw_s_spin_wait_count = 0;
-/** number of spin loop rounds on rw-latches,
-resulted during shared (read) locks */
-UNIV_INTERN ib_int64_t rw_s_spin_round_count = 0;
-
-/** number of OS waits on rw-latches,
-resulted during shared (read) locks */
-UNIV_INTERN ib_int64_t rw_s_os_wait_count = 0;
-
-/** number of unlocks (that unlock shared locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-UNIV_INTERN ib_int64_t rw_s_exit_count = 0;
-
-/** number of spin waits on rw-latches,
-resulted during exclusive (write) locks */
-UNIV_INTERN ib_int64_t rw_x_spin_wait_count = 0;
-/** number of spin loop rounds on rw-latches,
-resulted during exclusive (write) locks */
-UNIV_INTERN ib_int64_t rw_x_spin_round_count = 0;
-
-/** number of OS waits on rw-latches,
-resulted during exclusive (write) locks */
-UNIV_INTERN ib_int64_t rw_x_os_wait_count = 0;
-
-/** number of unlocks (that unlock exclusive locks),
-set only when UNIV_SYNC_PERF_STAT is defined */
-UNIV_INTERN ib_int64_t rw_x_exit_count = 0;
+UNIV_INTERN rw_lock_stats_t rw_lock_stats;
/* The global list of rw-locks */
UNIV_INTERN rw_lock_list_t rw_lock_list;
-UNIV_INTERN mutex_t rw_lock_list_mutex;
+UNIV_INTERN ib_mutex_t rw_lock_list_mutex;
#ifdef UNIV_PFS_MUTEX
UNIV_INTERN mysql_pfs_key_t rw_lock_list_mutex_key;
@@ -179,7 +150,7 @@ UNIV_INTERN mysql_pfs_key_t rw_lock_mutex_key;
To modify the debug info list of an rw-lock, this mutex has to be
acquired in addition to the mutex protecting the lock. */
-UNIV_INTERN mutex_t rw_lock_debug_mutex;
+UNIV_INTERN ib_mutex_t rw_lock_debug_mutex;
# ifdef UNIV_PFS_MUTEX
UNIV_INTERN mysql_pfs_key_t rw_lock_debug_mutex_key;
@@ -258,7 +229,7 @@ rw_lock_create_func(
lock->mutex.cline = cline;
ut_d(lock->mutex.cmutex_name = cmutex_name);
- ut_d(lock->mutex.mutex_type = 1);
+ ut_d(lock->mutex.ib_mutex_type = 1);
#else /* INNODB_RW_LOCKS_USE_ATOMICS */
# ifdef UNIV_DEBUG
UT_NOT_USED(cmutex_name);
@@ -292,8 +263,8 @@ rw_lock_create_func(
lock->last_x_file_name = "not yet reserved";
lock->last_s_line = 0;
lock->last_x_line = 0;
- lock->event = os_event_create(NULL);
- lock->wait_ex_event = os_event_create(NULL);
+ lock->event = os_event_create();
+ lock->wait_ex_event = os_event_create();
mutex_enter(&rw_lock_list_mutex);
@@ -316,7 +287,7 @@ rw_lock_free_func(
rw_lock_t* lock) /*!< in: rw-lock */
{
#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_t* mutex;
+ ib_mutex_t* mutex;
#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
ut_ad(rw_lock_validate(lock));
@@ -364,14 +335,15 @@ rw_lock_validate(
ulint waiters;
lint lock_word;
- ut_a(lock);
+ ut_ad(lock);
waiters = rw_lock_get_waiters(lock);
lock_word = lock->lock_word;
ut_ad(lock->magic_n == RW_LOCK_MAGIC_N);
- ut_a(waiters == 0 || waiters == 1);
- ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
+ ut_ad(waiters == 0 || waiters == 1);
+ ut_ad(lock_word > -(2 * X_LOCK_DECR));
+ ut_ad(lock_word <= X_LOCK_DECR);
return(TRUE);
}
@@ -395,10 +367,16 @@ rw_lock_s_lock_spin(
ulint index; /* index of the reserved wait cell */
ulint i = 0; /* spin round count */
sync_array_t* sync_arr;
+ size_t counter_index;
+
+ /* We reuse the thread id to index into the counter, cache
+ it here for efficiency. */
+
+ counter_index = (size_t) os_thread_get_curr_id();
ut_ad(rw_lock_validate(lock));
- rw_s_spin_wait_count++; /*!< Count calls to this function */
+ rw_lock_stats.rw_s_spin_wait_count.add(counter_index, 1);
lock_loop:
/* Spin waiting for the writer field to become free */
@@ -414,19 +392,9 @@ lock_loop:
os_thread_yield();
}
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu spin wait rw-s-lock at %p"
- " cfile %s cline %lu rnds %lu\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()),
- (void*) lock,
- innobase_basename(lock->cfile_name),
- (ulong) lock->cline, (ulong) i);
- }
-
/* We try once again to obtain the lock */
if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
- rw_s_spin_round_count += i;
+ rw_lock_stats.rw_s_spin_round_count.add(counter_index, i);
return; /* Success */
} else {
@@ -435,7 +403,7 @@ lock_loop:
goto lock_loop;
}
- rw_s_spin_round_count += i;
+ rw_lock_stats.rw_s_spin_round_count.add(counter_index, i);
sync_arr = sync_array_get();
@@ -444,7 +412,7 @@ lock_loop:
file_name, line, &index);
/* Set waiters before checking lock_word to ensure wake-up
- signal is sent. This may lead to some unnecessary signals. */
+ signal is sent. This may lead to some unnecessary signals. */
rw_lock_set_waiter_flag(lock);
if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
@@ -452,19 +420,9 @@ lock_loop:
return; /* Success */
}
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu OS wait rw-s-lock at %p"
- " cfile %s cline %lu\n",
- os_thread_pf(os_thread_get_curr_id()),
- (void*) lock,
- innobase_basename(lock->cfile_name),
- (ulong) lock->cline);
- }
-
/* these stats may not be accurate */
lock->count_os_wait++;
- rw_s_os_wait_count++;
+ rw_lock_stats.rw_s_os_wait_count.add(counter_index, 1);
sync_array_wait_event(sync_arr, index);
@@ -511,6 +469,12 @@ rw_lock_x_lock_wait(
ulint index;
ulint i = 0;
sync_array_t* sync_arr;
+ size_t counter_index;
+
+ /* We reuse the thread id to index into the counter, cache
+ it here for efficiency. */
+
+ counter_index = (size_t) os_thread_get_curr_id();
ut_ad(lock->lock_word <= 0);
@@ -524,7 +488,7 @@ rw_lock_x_lock_wait(
}
/* If there is still a reader, then go to sleep.*/
- rw_x_spin_round_count += i;
+ rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
sync_arr = sync_array_get();
@@ -539,11 +503,11 @@ rw_lock_x_lock_wait(
/* these stats may not be accurate */
lock->count_os_wait++;
- rw_x_os_wait_count++;
+ rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1);
- /* Add debug info as it is needed to detect possible
- deadlock. We must add info for WAIT_EX thread for
- deadlock detection to work properly. */
+ /* Add debug info as it is needed to detect possible
+ deadlock. We must add info for WAIT_EX thread for
+ deadlock detection to work properly. */
#ifdef UNIV_SYNC_DEBUG
rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
file_name, line);
@@ -551,16 +515,16 @@ rw_lock_x_lock_wait(
sync_array_wait_event(sync_arr, index);
#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass,
- RW_LOCK_WAIT_EX);
+ rw_lock_remove_debug_info(
+ lock, pass, RW_LOCK_WAIT_EX);
#endif
- /* It is possible to wake when lock_word < 0.
- We must pass the while-loop check to proceed.*/
+ /* It is possible to wake when lock_word < 0.
+ We must pass the while-loop check to proceed.*/
} else {
sync_array_free_cell(sync_arr, index);
}
}
- rw_x_spin_round_count += i;
+ rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
}
/******************************************************************//**
@@ -576,8 +540,6 @@ rw_lock_x_lock_low(
const char* file_name,/*!< in: file name where lock requested */
ulint line) /*!< in: line where requested */
{
- os_thread_id_t curr_thread = os_thread_get_curr_id();
-
if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
/* lock->recursive also tells us if the writer_thread
@@ -587,8 +549,8 @@ rw_lock_x_lock_low(
ut_a(!lock->recursive);
/* Decrement occurred: we are writer or next-writer. */
- rw_lock_set_writer_id_and_recursion_flag(lock,
- pass ? FALSE : TRUE);
+ rw_lock_set_writer_id_and_recursion_flag(
+ lock, pass ? FALSE : TRUE);
rw_lock_x_lock_wait(lock,
#ifdef UNIV_SYNC_DEBUG
@@ -597,19 +559,25 @@ rw_lock_x_lock_low(
file_name, line);
} else {
+ os_thread_id_t thread_id = os_thread_get_curr_id();
+
/* Decrement failed: relock or failed lock */
if (!pass && lock->recursive
- && os_thread_eq(lock->writer_thread, curr_thread)) {
+ && os_thread_eq(lock->writer_thread, thread_id)) {
/* Relock */
- lock->lock_word -= X_LOCK_DECR;
+ if (lock->lock_word == 0) {
+ lock->lock_word -= X_LOCK_DECR;
+ } else {
+ --lock->lock_word;
+ }
+
} else {
/* Another thread locked before us */
return(FALSE);
}
}
#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
- file_name, line);
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, line);
#endif
lock->last_x_file_name = file_name;
lock->last_x_line = (unsigned int) line;
@@ -640,6 +608,12 @@ rw_lock_x_lock_func(
ulint index; /*!< index of the reserved wait cell */
sync_array_t* sync_arr;
ibool spinning = FALSE;
+ size_t counter_index;
+
+ /* We reuse the thread id to index into the counter, cache
+ it here for efficiency. */
+
+ counter_index = (size_t) os_thread_get_curr_id();
ut_ad(rw_lock_validate(lock));
#ifdef UNIV_SYNC_DEBUG
@@ -651,15 +625,17 @@ rw_lock_x_lock_func(
lock_loop:
if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
- rw_x_spin_round_count += i;
+ rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
return; /* Locking succeeded */
} else {
- if (!spinning) {
- spinning = TRUE;
- rw_x_spin_wait_count++;
+ if (!spinning) {
+ spinning = TRUE;
+
+ rw_lock_stats.rw_x_spin_wait_count.add(
+ counter_index, 1);
}
/* Spin waiting for the lock_word to become free */
@@ -679,16 +655,7 @@ lock_loop:
}
}
- rw_x_spin_round_count += i;
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu spin wait rw-x-lock at %p"
- " cfile %s cline %lu rnds %lu\n",
- os_thread_pf(os_thread_get_curr_id()), (void*) lock,
- innobase_basename(lock->cfile_name),
- (ulong) lock->cline, (ulong) i);
- }
+ rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
sync_arr = sync_array_get();
@@ -704,18 +671,9 @@ lock_loop:
return; /* Locking succeeded */
}
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread %lu OS wait for rw-x-lock at %p"
- " cfile %s cline %lu\n",
- os_thread_pf(os_thread_get_curr_id()), (void*) lock,
- innobase_basename(lock->cfile_name),
- (ulong) lock->cline);
- }
-
/* these stats may not be accurate */
lock->count_os_wait++;
- rw_x_os_wait_count++;
+ rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1);
sync_array_wait_event(sync_arr, index);
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
index af64d011db2..d6f7325e2a3 100644
--- a/storage/innobase/sync/sync0sync.cc
+++ b/storage/innobase/sync/sync0sync.cc
@@ -171,25 +171,25 @@ Q.E.D. */
/** The number of iterations in the mutex_spin_wait() spin loop.
Intended for performance monitoring. */
-static ib_int64_t mutex_spin_round_count = 0;
+static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_round_count;
/** The number of mutex_spin_wait() calls. Intended for
performance monitoring. */
-static ib_int64_t mutex_spin_wait_count = 0;
+static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_wait_count;
/** The number of OS waits in mutex_spin_wait(). Intended for
performance monitoring. */
-static ib_int64_t mutex_os_wait_count = 0;
+static ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_os_wait_count;
/** The number of mutex_exit() calls. Intended for performance
monitoring. */
-UNIV_INTERN ib_int64_t mutex_exit_count = 0;
+UNIV_INTERN ib_int64_t mutex_exit_count;
/** This variable is set to TRUE when sync_init is called */
UNIV_INTERN ibool sync_initialized = FALSE;
#ifdef UNIV_SYNC_DEBUG
/** An acquired mutex or rw-lock and its level in the latching order */
-typedef struct sync_level_struct sync_level_t;
+struct sync_level_t;
/** Mutexes or rw-locks held by a thread */
-typedef struct sync_thread_struct sync_thread_t;
+struct sync_thread_t;
/** The latch levels currently owned by threads are stored in this data
structure; the size of this array is OS_THREAD_MAX_N */
@@ -197,7 +197,7 @@ structure; the size of this array is OS_THREAD_MAX_N */
UNIV_INTERN sync_thread_t* sync_thread_level_arrays;
/** Mutex protecting sync_thread_level_arrays */
-UNIV_INTERN mutex_t sync_thread_mutex;
+UNIV_INTERN ib_mutex_t sync_thread_mutex;
# ifdef UNIV_PFS_MUTEX
UNIV_INTERN mysql_pfs_key_t sync_thread_mutex_key;
@@ -208,7 +208,7 @@ UNIV_INTERN mysql_pfs_key_t sync_thread_mutex_key;
UNIV_INTERN ut_list_base_node_t mutex_list;
/** Mutex protecting the mutex_list variable */
-UNIV_INTERN mutex_t mutex_list_mutex;
+UNIV_INTERN ib_mutex_t mutex_list_mutex;
#ifdef UNIV_PFS_MUTEX
UNIV_INTERN mysql_pfs_key_t mutex_list_mutex_key;
@@ -221,10 +221,8 @@ UNIV_INTERN ibool sync_order_checks_on = FALSE;
/** Number of slots reserved for each OS thread in the sync level array */
static const ulint SYNC_THREAD_N_LEVELS = 10000;
-typedef struct sync_arr_struct sync_arr_t;
-
/** Array for tracking sync levels per thread. */
-struct sync_arr_struct {
+struct sync_arr_t {
ulint in_use; /*!< Number of active cells */
ulint n_elems; /*!< Number of elements in the array */
ulint max_elems; /*!< Maximum elements */
@@ -234,14 +232,14 @@ struct sync_arr_struct {
};
/** Mutexes or rw-locks held by a thread */
-struct sync_thread_struct{
+struct sync_thread_t{
os_thread_id_t id; /*!< OS thread id */
sync_arr_t* levels; /*!< level array for this thread; if
this is NULL this slot is unused */
};
/** An acquired mutex or rw-lock and its level in the latching order */
-struct sync_level_struct{
+struct sync_level_t{
void* latch; /*!< pointer to a mutex or an
rw-lock; NULL means that
the slot is empty */
@@ -264,7 +262,7 @@ UNIV_INTERN
void
mutex_create_func(
/*==============*/
- mutex_t* mutex, /*!< in: pointer to memory */
+ ib_mutex_t* mutex, /*!< in: pointer to memory */
#ifdef UNIV_DEBUG
const char* cmutex_name, /*!< in: mutex name */
# ifdef UNIV_SYNC_DEBUG
@@ -280,7 +278,7 @@ mutex_create_func(
os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mutex->os_fast_mutex);
mutex->lock_word = 0;
#endif
- mutex->event = os_event_create(NULL);
+ mutex->event = os_event_create();
mutex_set_waiters(mutex, 0);
#ifdef UNIV_DEBUG
mutex->magic_n = MUTEX_MAGIC_N;
@@ -293,16 +291,6 @@ mutex_create_func(
mutex->cfile_name = cfile_name;
mutex->cline = cline;
mutex->count_os_wait = 0;
-#ifdef UNIV_DEBUG
- mutex->cmutex_name= cmutex_name;
- mutex->count_using= 0;
- mutex->mutex_type= 0;
- mutex->lspent_time= 0;
- mutex->lmax_spent_time= 0;
- mutex->count_spin_loop= 0;
- mutex->count_spin_rounds= 0;
- mutex->count_os_yield= 0;
-#endif /* UNIV_DEBUG */
/* Check that lock_word is aligned; this is important on Intel */
ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
@@ -337,7 +325,7 @@ UNIV_INTERN
void
mutex_free_func(
/*============*/
- mutex_t* mutex) /*!< in: mutex */
+ ib_mutex_t* mutex) /*!< in: mutex */
{
ut_ad(mutex_validate(mutex));
ut_a(mutex_get_lock_word(mutex) == 0);
@@ -397,7 +385,7 @@ UNIV_INTERN
ulint
mutex_enter_nowait_func(
/*====================*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name __attribute__((unused)),
/*!< in: file name where mutex
requested */
@@ -406,7 +394,7 @@ mutex_enter_nowait_func(
{
ut_ad(mutex_validate(mutex));
- if (!mutex_test_and_set(mutex)) {
+ if (!ib_mutex_test_and_set(mutex)) {
ut_d(mutex->thread_id = os_thread_get_curr_id());
#ifdef UNIV_SYNC_DEBUG
@@ -427,7 +415,7 @@ UNIV_INTERN
ibool
mutex_validate(
/*===========*/
- const mutex_t* mutex) /*!< in: mutex */
+ const ib_mutex_t* mutex) /*!< in: mutex */
{
ut_a(mutex);
ut_a(mutex->magic_n == MUTEX_MAGIC_N);
@@ -443,7 +431,7 @@ UNIV_INTERN
ibool
mutex_own(
/*======*/
- const mutex_t* mutex) /*!< in: mutex */
+ const ib_mutex_t* mutex) /*!< in: mutex */
{
ut_ad(mutex_validate(mutex));
@@ -458,7 +446,7 @@ UNIV_INTERN
void
mutex_set_waiters(
/*==============*/
- mutex_t* mutex, /*!< in: mutex */
+ ib_mutex_t* mutex, /*!< in: mutex */
ulint n) /*!< in: value to set */
{
volatile ulint* ptr; /* declared volatile to ensure that
@@ -479,7 +467,7 @@ UNIV_INTERN
void
mutex_spin_wait(
/*============*/
- mutex_t* mutex, /*!< in: pointer to mutex */
+ ib_mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where mutex
requested */
ulint line) /*!< in: line where requested */
@@ -487,6 +475,9 @@ mutex_spin_wait(
ulint i; /* spin round count */
ulint index; /* index of the reserved wait cell */
sync_array_t* sync_arr;
+ size_t counter_index;
+
+ counter_index = (size_t) os_thread_get_curr_id();
ut_ad(mutex);
@@ -494,7 +485,7 @@ mutex_spin_wait(
isn't exact. Moved out of ifdef that follows because we are willing
to sacrifice the cost of counting this as the data is valuable.
Count the number of calls to mutex_spin_wait. */
- mutex_spin_wait_count++;
+ mutex_spin_wait_count.add(counter_index, 1);
mutex_loop:
@@ -507,7 +498,6 @@ mutex_loop:
a memory word. */
spin_loop:
- ut_d(mutex->count_spin_loop++);
while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
if (srv_spin_wait_delay) {
@@ -518,26 +508,12 @@ spin_loop:
}
if (i == SYNC_SPIN_ROUNDS) {
-#ifdef UNIV_DEBUG
- mutex->count_os_yield++;
-#endif /* UNIV_DEBUG */
os_thread_yield();
}
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
- fprintf(stderr,
- "Thread %lu spin wait mutex at %p"
- " cfile %s cline %lu rnds %lu\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
- innobase_basename(mutex->cfile_name),
- (ulong) mutex->cline, (ulong) i);
-#endif
-
- mutex_spin_round_count += i;
-
- ut_d(mutex->count_spin_rounds += i);
+ mutex_spin_round_count.add(counter_index, i);
- if (mutex_test_and_set(mutex) == 0) {
+ if (ib_mutex_test_and_set(mutex) == 0) {
/* Succeeded! */
ut_d(mutex->thread_id = os_thread_get_curr_id());
@@ -550,7 +526,7 @@ spin_loop:
/* We may end up with a situation where lock_word is 0 but the OS
fast mutex is still reserved. On FreeBSD the OS does not seem to
schedule a thread which is constantly calling pthread_mutex_trylock
- (in mutex_test_and_set implementation). Then we could end up
+ (in ib_mutex_test_and_set implementation). Then we could end up
spinning here indefinitely. The following 'i++' stops this infinite
spin. */
@@ -575,7 +551,7 @@ spin_loop:
/* Try to reserve still a few times */
for (i = 0; i < 4; i++) {
- if (mutex_test_and_set(mutex) == 0) {
+ if (ib_mutex_test_and_set(mutex) == 0) {
/* Succeeded! Free the reserved wait cell */
sync_array_free_cell(sync_arr, index);
@@ -585,13 +561,6 @@ spin_loop:
mutex_set_debug_info(mutex, file_name, line);
#endif
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
- fprintf(stderr, "Thread %lu spin wait succeeds at 2:"
- " mutex at %p\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()),
- (void*) mutex);
-#endif
-
return;
/* Note that in this case we leave the waiters field
@@ -604,19 +573,12 @@ spin_loop:
after the change in the wait array and the waiters field was made.
Now there is no risk of infinite wait on the event. */
-#ifdef UNIV_SRV_PRINT_LATCH_WAITS
- fprintf(stderr,
- "Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
- (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
- innobase_basename(mutex->cfile_name),
- (ulong) mutex->cline, (ulong) i);
-#endif
-
- mutex_os_wait_count++;
+ mutex_os_wait_count.add(counter_index, 1);
mutex->count_os_wait++;
sync_array_wait_event(sync_arr, index);
+
goto mutex_loop;
}
@@ -626,7 +588,7 @@ UNIV_INTERN
void
mutex_signal_object(
/*================*/
- mutex_t* mutex) /*!< in: mutex */
+ ib_mutex_t* mutex) /*!< in: mutex */
{
mutex_set_waiters(mutex, 0);
@@ -643,7 +605,7 @@ UNIV_INTERN
void
mutex_set_debug_info(
/*=================*/
- mutex_t* mutex, /*!< in: mutex */
+ ib_mutex_t* mutex, /*!< in: mutex */
const char* file_name, /*!< in: file where requested */
ulint line) /*!< in: line where requested */
{
@@ -662,7 +624,7 @@ UNIV_INTERN
void
mutex_get_debug_info(
/*=================*/
- mutex_t* mutex, /*!< in: mutex */
+ ib_mutex_t* mutex, /*!< in: mutex */
const char** file_name, /*!< out: file where requested */
ulint* line, /*!< out: line where requested */
os_thread_id_t* thread_id) /*!< out: id of the thread which owns
@@ -683,7 +645,7 @@ mutex_list_print_info(
/*==================*/
FILE* file) /*!< in: file where to print */
{
- mutex_t* mutex;
+ ib_mutex_t* mutex;
const char* file_name;
ulint line;
os_thread_id_t thread_id;
@@ -726,7 +688,7 @@ ulint
mutex_n_reserved(void)
/*==================*/
{
- mutex_t* mutex;
+ ib_mutex_t* mutex;
ulint count = 0;
mutex_enter(&mutex_list_mutex);
@@ -825,9 +787,9 @@ sync_print_warning(
const sync_level_t* slot) /*!< in: slot for which to
print warning */
{
- mutex_t* mutex;
+ ib_mutex_t* mutex;
- mutex = static_cast<mutex_t*>(slot->latch);
+ mutex = static_cast<ib_mutex_t*>(slot->latch);
if (mutex->magic_n == MUTEX_MAGIC_N) {
fprintf(stderr,
@@ -1200,6 +1162,8 @@ sync_thread_add_level(
case SYNC_TRX_I_S_RWLOCK:
case SYNC_TRX_I_S_LAST_READ:
case SYNC_IBUF_MUTEX:
+ case SYNC_INDEX_ONLINE_LOG:
+ case SYNC_STATS_AUTO_RECALC:
if (!sync_thread_levels_g(array, level, TRUE)) {
fprintf(stderr,
"InnoDB: sync_thread_levels_g(array, %lu)"
@@ -1448,7 +1412,7 @@ sync_thread_reset_level(
return(TRUE);
}
- if (((mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
+ if (((ib_mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
rw_lock_t* rw_lock;
rw_lock = (rw_lock_t*) latch;
@@ -1511,7 +1475,7 @@ sync_init(void)
mutex_create(rw_lock_debug_mutex_key, &rw_lock_debug_mutex,
SYNC_NO_ORDER_CHECK);
- rw_lock_debug_event = os_event_create(NULL);
+ rw_lock_debug_event = os_event_create();
rw_lock_debug_waiters = FALSE;
#endif /* UNIV_SYNC_DEBUG */
}
@@ -1552,7 +1516,7 @@ void
sync_close(void)
/*===========*/
{
- mutex_t* mutex;
+ ib_mutex_t* mutex;
sync_array_close();
@@ -1569,7 +1533,7 @@ sync_close(void)
mutex_free(mutex);
- mutex = UT_LIST_GET_FIRST(mutex_list);
+ mutex = UT_LIST_GET_FIRST(mutex_list);
}
mutex_free(&mutex_list_mutex);
@@ -1593,13 +1557,6 @@ sync_print_wait_info(
/*=================*/
FILE* file) /*!< in: file where to print */
{
-#ifdef UNIV_SYNC_DEBUG
- fprintf(file,
- "Mutex exits "UINT64PF", "
- "rws exits "UINT64PF", rwx exits "UINT64PF"\n",
- mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
-#endif
-
fprintf(file,
"Mutex spin waits "UINT64PF", rounds "UINT64PF", "
"OS waits "UINT64PF"\n"
@@ -1607,25 +1564,27 @@ sync_print_wait_info(
"OS waits "UINT64PF"\n"
"RW-excl spins "UINT64PF", rounds "UINT64PF", "
"OS waits "UINT64PF"\n",
- mutex_spin_wait_count,
- mutex_spin_round_count,
- mutex_os_wait_count,
- rw_s_spin_wait_count,
- rw_s_spin_round_count,
- rw_s_os_wait_count,
- rw_x_spin_wait_count,
- rw_x_spin_round_count,
- rw_x_os_wait_count);
+ (ib_uint64_t) mutex_spin_wait_count,
+ (ib_uint64_t) mutex_spin_round_count,
+ (ib_uint64_t) mutex_os_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_s_spin_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_s_spin_round_count,
+ (ib_uint64_t) rw_lock_stats.rw_s_os_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_x_spin_wait_count,
+ (ib_uint64_t) rw_lock_stats.rw_x_spin_round_count,
+ (ib_uint64_t) rw_lock_stats.rw_x_os_wait_count);
fprintf(file,
"Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
"%.2f RW-excl\n",
(double) mutex_spin_round_count /
(mutex_spin_wait_count ? mutex_spin_wait_count : 1),
- (double) rw_s_spin_round_count /
- (rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
- (double) rw_x_spin_round_count /
- (rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
+ (double) rw_lock_stats.rw_s_spin_round_count /
+ (rw_lock_stats.rw_s_spin_wait_count
+ ? rw_lock_stats.rw_s_spin_wait_count : 1),
+ (double) rw_lock_stats.rw_x_spin_round_count /
+ (rw_lock_stats.rw_x_spin_wait_count
+ ? rw_lock_stats.rw_x_spin_wait_count : 1));
}
/*******************************************************************//**
diff --git a/storage/innobase/trx/trx0i_s.cc b/storage/innobase/trx/trx0i_s.cc
index cbf90afae0d..f6360562ae7 100644
--- a/storage/innobase/trx/trx0i_s.cc
+++ b/storage/innobase/trx/trx0i_s.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -131,25 +131,25 @@ noop because it will be empty. */
/** Memory for each table in the intermediate buffer is allocated in
separate chunks. These chunks are considered to be concatenated to
represent one flat array of rows. */
-typedef struct i_s_mem_chunk_struct {
+struct i_s_mem_chunk_t {
ulint offset; /*!< offset, in number of rows */
ulint rows_allocd; /*!< the size of this chunk, in number
of rows */
void* base; /*!< start of the chunk */
-} i_s_mem_chunk_t;
+};
/** This represents one table's cache. */
-typedef struct i_s_table_cache_struct {
+struct i_s_table_cache_t {
ulint rows_used; /*!< number of used rows */
ulint rows_allocd; /*!< number of allocated rows */
ulint row_size; /*!< size of a single row */
i_s_mem_chunk_t chunks[MEM_CHUNKS_IN_TABLE_CACHE]; /*!< array of
memory chunks that stores the
rows */
-} i_s_table_cache_t;
+};
/** This structure describes the intermediate buffer */
-struct trx_i_s_cache_struct {
+struct trx_i_s_cache_t {
rw_lock_t rw_lock; /*!< read-write lock protecting
the rest of this structure */
ullint last_read; /*!< last time the cache was read;
@@ -501,8 +501,7 @@ fill_trx_row(
goto thd_done;
}
- row->trx_mysql_thread_id = thd_get_thread_id(
- static_cast<const THD*>(trx->mysql_thd));
+ row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd);
stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len);
@@ -1290,7 +1289,10 @@ fetch_data_into_cache_low(
for (trx = UT_LIST_GET_FIRST(*trx_list);
trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
+ trx =
+ (trx_list == &trx_sys->mysql_trx_list
+ ? UT_LIST_GET_NEXT(mysql_trx_list, trx)
+ : UT_LIST_GET_NEXT(trx_list, trx))) {
i_s_trx_row_t* trx_row;
i_s_locks_row_t* requested_lock_row;
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 62c140879aa..f6d8dfc6b40 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -69,19 +69,9 @@ UNIV_INTERN mysql_pfs_key_t trx_purge_latch_key;
UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key;
#endif /* UNIV_PFS_MUTEX */
-/********************************************************************//**
-Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function.
-@return copy of an undo log record or pointer to trx_purge_dummy_rec,
-if the whole undo log can skipped in purge; NULL if none left */
-static
-trx_undo_rec_t*
-trx_purge_fetch_next_rec(
-/*=====================*/
- roll_ptr_t* roll_ptr, /*!< out: roll pointer to undo record */
- ulint* n_pages_handled,/*!< in/out: number of UNDO log pages
- handled */
- mem_heap_t* heap); /*!< in: memory heap where copied */
+#ifdef UNIV_DEBUG
+UNIV_INTERN my_bool srv_purge_view_update_only_debug;
+#endif /* UNIV_DEBUG */
/****************************************************************//**
Builds a purge 'query' graph. The actual purge is performed by executing
@@ -129,7 +119,7 @@ trx_purge_sys_create(
purge_sys = static_cast<trx_purge_t*>(mem_zalloc(sizeof(*purge_sys)));
purge_sys->state = PURGE_STATE_INIT;
- purge_sys->event = os_event_create("purge");
+ purge_sys->event = os_event_create();
/* Take ownership of ib_bh, we are responsible for freeing it. */
purge_sys->ib_bh = ib_bh;
@@ -539,7 +529,6 @@ trx_purge_truncate_history(
}
}
-
/***********************************************************************//**
Updates the last not yet purged history log info in rseg when we have purged
a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
@@ -703,7 +692,7 @@ trx_purge_get_rseg_with_min_trx_id(
/* We assume in purge of externally stored fields that space id is
in the range of UNDO tablespace space ids */
- ut_a(purge_sys->rseg->space <= srv_undo_tablespaces);
+ ut_a(purge_sys->rseg->space <= srv_undo_tablespaces_open);
zip_size = purge_sys->rseg->zip_size;
@@ -924,7 +913,7 @@ Fetches the next undo log record from the history list to purge. It must be
released with the corresponding release function.
@return copy of an undo log record or pointer to trx_purge_dummy_rec,
if the whole undo log can skipped in purge; NULL if none left */
-static
+static __attribute__((warn_unused_result, nonnull))
trx_undo_rec_t*
trx_purge_fetch_next_rec(
/*=====================*/
@@ -1215,6 +1204,12 @@ trx_purge(
rw_lock_x_unlock(&purge_sys->latch);
+#ifdef UNIV_DEBUG
+ if (srv_purge_view_update_only_debug) {
+ return(0);
+ }
+#endif
+
/* Fetch the UNDO recs that need to be purged. */
n_pages_handled = trx_purge_attach_undo_recs(
n_purge_threads, purge_sys, &purge_sys->limit, batch_size);
@@ -1260,6 +1255,14 @@ run_synchronously:
ut_a(purge_sys->n_submitted == purge_sys->n_completed);
+#ifdef UNIV_DEBUG
+ if (purge_sys->limit.trx_no == 0) {
+ purge_sys->done = purge_sys->iter;
+ } else {
+ purge_sys->done = purge_sys->limit;
+ }
+#endif /* UNIV_DEBUG */
+
if (truncate) {
trx_purge_truncate();
}
@@ -1305,14 +1308,14 @@ trx_purge_stop(void)
ut_a(purge_sys->state != PURGE_STATE_INIT);
ut_a(purge_sys->state != PURGE_STATE_EXIT);
+ ut_a(purge_sys->state != PURGE_STATE_DISABLED);
++purge_sys->n_stop;
state = purge_sys->state;
if (state == PURGE_STATE_RUN) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Stopping purge.\n");
+ ib_logf(IB_LOG_LEVEL_INFO, "Stopping purge");
/* We need to wakeup the purge thread in case it is suspended,
so that it can acknowledge the state change. */
@@ -1329,6 +1332,28 @@ trx_purge_stop(void)
/* Wait for purge coordinator to signal that it
is suspended. */
os_event_wait_low(purge_sys->event, sig_count);
+ } else {
+ bool once = true;
+
+ rw_lock_x_lock(&purge_sys->latch);
+
+ /* Wait for purge to signal that it has actually stopped. */
+ while (purge_sys->running) {
+
+ if (once) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Waiting for purge to stop");
+ once = false;
+ }
+
+ rw_lock_x_unlock(&purge_sys->latch);
+
+ os_thread_sleep(10000);
+
+ rw_lock_x_lock(&purge_sys->latch);
+ }
+
+ rw_lock_x_unlock(&purge_sys->latch);
}
MONITOR_INC_VALUE(MONITOR_PURGE_STOP_COUNT, 1);
@@ -1343,8 +1368,16 @@ trx_purge_run(void)
{
rw_lock_x_lock(&purge_sys->latch);
- ut_a(purge_sys->state != PURGE_STATE_INIT);
- ut_a(purge_sys->state != PURGE_STATE_EXIT);
+ switch(purge_sys->state) {
+ case PURGE_STATE_INIT:
+ case PURGE_STATE_EXIT:
+ case PURGE_STATE_DISABLED:
+ ut_error;
+
+ case PURGE_STATE_RUN:
+ case PURGE_STATE_STOP:
+ break;
+ }
if (purge_sys->n_stop > 0) {
@@ -1354,8 +1387,7 @@ trx_purge_run(void)
if (purge_sys->n_stop == 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Resuming purge.\n");
+ ib_logf(IB_LOG_LEVEL_INFO, "Resuming purge");
purge_sys->state = PURGE_STATE_RUN;
}
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index b87eac9362e..a698b37c2a6 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -287,7 +287,7 @@ trx_undo_rec_get_pars(
TRX_UNDO_INSERT_REC, ... */
ulint* cmpl_info, /*!< out: compiler info, relevant only
for update type records */
- ibool* updated_extern, /*!< out: TRUE if we updated an
+ bool* updated_extern, /*!< out: true if we updated an
externally stored fild */
undo_no_t* undo_no, /*!< out: undo log record number */
table_id_t* table_id) /*!< out: table id */
@@ -300,12 +300,8 @@ trx_undo_rec_get_pars(
type_cmpl = mach_read_from_1(ptr);
ptr++;
- if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
- *updated_extern = TRUE;
- type_cmpl -= TRX_UNDO_UPD_EXTERN;
- } else {
- *updated_extern = FALSE;
- }
+ *updated_extern = !!(type_cmpl & TRX_UNDO_UPD_EXTERN);
+ type_cmpl &= ~TRX_UNDO_UPD_EXTERN;
*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
@@ -588,6 +584,7 @@ trx_undo_page_report_modify(
/* Store first some general parameters to the undo log */
if (!update) {
+ ut_ad(!rec_get_deleted_flag(rec, dict_table_is_comp(table)));
type_cmpl = TRX_UNDO_DEL_MARK_REC;
} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
type_cmpl = TRX_UNDO_UPD_DEL_REC;
@@ -1040,8 +1037,9 @@ trx_undo_update_rec_get_update(
}
/*******************************************************************//**
-Builds a partial row from an update undo log record. It contains the
-columns which occur as ordering in any index of the table.
+Builds a partial row from an update undo log record, for purge.
+It contains the columns which occur as ordering in any index of the table.
+Any missing columns are indicated by col->mtype == DATA_MISSING.
@return pointer to remaining part of undo record */
UNIV_INTERN
byte*
@@ -1075,7 +1073,12 @@ trx_undo_rec_get_partial_row(
*row = dtuple_create(heap, row_len);
- dict_table_copy_types(*row, index->table);
+ /* Mark all columns in the row uninitialized, so that
+ we can distinguish missing fields from fields that are SQL NULL. */
+ for (ulint i = 0; i < row_len; i++) {
+ dfield_get_type(dtuple_get_nth_field(*row, i))
+ ->mtype = DATA_MISSING;
+ }
end_ptr = ptr + mach_read_from_2(ptr);
ptr += 2;
@@ -1097,7 +1100,9 @@ trx_undo_rec_get_partial_row(
ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
dfield = dtuple_get_nth_field(*row, col_no);
-
+ dict_col_copy_type(
+ dict_table_get_nth_col(index->table, col_no),
+ dfield_get_type(dfield));
dfield_set_data(dfield, field, len);
if (len != UNIV_SQL_NULL
@@ -1177,7 +1182,7 @@ transaction and in consistent reads that must look to the history of this
transaction.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
trx_undo_report_row_operation(
/*==========================*/
ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
@@ -1196,6 +1201,7 @@ trx_undo_report_row_operation(
const rec_t* rec, /*!< in: in case of an update or delete
marking, the record in the clustered
index, otherwise NULL */
+ const ulint* offsets, /*!< in: rec_get_offsets(rec) */
roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the
inserted undo log record,
0 if BTR_NO_UNDO_LOG
@@ -1207,16 +1213,14 @@ trx_undo_report_row_operation(
buf_block_t* undo_block;
trx_rseg_t* rseg;
mtr_t mtr;
- ulint err = DB_SUCCESS;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
+ dberr_t err = DB_SUCCESS;
#ifdef UNIV_DEBUG
int loop_count = 0;
#endif /* UNIV_DEBUG */
- rec_offs_init(offsets_);
+ ut_ad(!srv_read_only_mode);
ut_a(dict_index_is_clust(index));
+ ut_ad(!rec || rec_offs_validate(rec, index, offsets));
if (flags & BTR_NO_UNDO_LOG_FLAG) {
@@ -1230,6 +1234,17 @@ trx_undo_report_row_operation(
|| (clust_entry && !update && !rec));
trx = thr_get_trx(thr);
+
+ /* This table is visible only to the session that created it. */
+ if (trx->read_only) {
+ ut_ad(!srv_read_only_mode);
+ /* MySQL should block writes to non-temporary tables. */
+ ut_a(DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_TEMPORARY));
+ if (trx->rseg == 0) {
+ trx_assign_rseg(trx);
+ }
+ }
+
rseg = trx->rseg;
mtr_start(&mtr);
@@ -1272,8 +1287,6 @@ trx_undo_report_row_operation(
}
ut_ad(err == DB_SUCCESS);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
}
page_no = undo->last_page_no;
@@ -1352,8 +1365,7 @@ trx_undo_report_row_operation(
*roll_ptr = trx_undo_build_roll_ptr(
op_type == TRX_UNDO_INSERT_OP,
rseg->id, page_no, offset);
- err = DB_SUCCESS;
- goto func_exit;
+ return(DB_SUCCESS);
}
ut_ad(page_no == undo->last_page_no);
@@ -1380,10 +1392,6 @@ trx_undo_report_row_operation(
err_exit:
mutex_exit(&trx->undo_mutex);
mtr_commit(&mtr);
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
return(err);
}
@@ -1428,39 +1436,34 @@ trx_undo_get_undo_rec_low(
/******************************************************************//**
Copies an undo record to heap.
-NOTE: the caller must have latches on the clustered index page and
-purge_view.
+NOTE: the caller must have latches on the clustered index page.
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
-truncated and we cannot fetch the old version */
-static
-ulint
+@retval true if the undo log has been
+truncated and we cannot fetch the old version
+@retval false if the undo log record is available */
+static __attribute__((nonnull, warn_unused_result))
+bool
trx_undo_get_undo_rec(
/*==================*/
roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
trx_id_t trx_id, /*!< in: id of the trx that generated
the roll pointer: it points to an
undo log of this transaction */
- trx_undo_rec_t** undo_rec, /*!< out, own: copy of the record */
+ trx_undo_rec_t**undo_rec, /*!< out, own: copy of the record */
mem_heap_t* heap) /*!< in: memory heap where copied */
{
- ibool missing_history;
+ bool missing_history;
rw_lock_s_lock(&purge_sys->latch);
missing_history = read_view_sees_trx_id(purge_sys->view, trx_id);
- rw_lock_s_unlock(&purge_sys->latch);
-
- if (UNIV_UNLIKELY(missing_history)) {
- /* It may be that the necessary undo log has already been
- deleted */
-
- return(DB_MISSING_HISTORY);
+ if (!missing_history) {
+ *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
}
- *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
+ rw_lock_s_unlock(&purge_sys->latch);
- return(DB_SUCCESS);
+ return(missing_history);
}
#ifdef UNIV_DEBUG
@@ -1471,13 +1474,13 @@ trx_undo_get_undo_rec(
/*******************************************************************//**
Build a previous version of a clustered index record. The caller must
-hold a latch on the index page of the clustered index record, to
-guarantee that the stack of versions is locked all the way down to the
-purge_sys->view.
-@return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
-earlier than purge_view, which means that it may have been removed */
+hold a latch on the index page of the clustered index record.
+@retval true if previous version was built, or if it was an insert
+or the table has been rebuilt
+@retval false if the previous version is earlier than purge_view,
+which means that it may have been removed */
UNIV_INTERN
-ulint
+bool
trx_undo_prev_version_build(
/*========================*/
const rec_t* index_rec ATTRIB_USED_ONLY_IN_DEBUG,
@@ -1488,7 +1491,7 @@ trx_undo_prev_version_build(
index_rec page and purge_view */
const rec_t* rec, /*!< in: version of a clustered index record */
dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
+ ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
mem_heap_t* heap, /*!< in: memory heap from which the memory
needed is allocated */
rec_t** old_vers)/*!< out, own: previous version, or NULL if
@@ -1509,9 +1512,8 @@ trx_undo_prev_version_build(
byte* ptr;
ulint info_bits;
ulint cmpl_info;
- ibool dummy_extern;
+ bool dummy_extern;
byte* buf;
- ulint err;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
@@ -1526,28 +1528,28 @@ trx_undo_prev_version_build(
*old_vers = NULL;
if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
/* The record rec is the first inserted version */
-
- return(DB_SUCCESS);
+ return(true);
}
rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
- err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- /* The undo record may already have been purged.
- This should never happen for user transactions, but
- it can happen in purge. */
- ut_ad(err == DB_MISSING_HISTORY);
-
- return(err);
+ if (trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap)) {
+ /* The undo record may already have been purged,
+ during purge or semi-consistent read. */
+ return(false);
}
ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
&dummy_extern, &undo_no, &table_id);
+ if (table_id != index->table->id) {
+ /* The table should have been rebuilt, but purge has
+ not yet removed the undo log records for the
+ now-dropped old table (table_id). */
+ return(true);
+ }
+
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
&info_bits);
@@ -1578,7 +1580,6 @@ trx_undo_prev_version_build(
ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
roll_ptr, info_bits,
NULL, heap, &update);
- ut_a(table_id == index->table->id);
ut_a(ptr);
# if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
@@ -1588,14 +1589,46 @@ trx_undo_prev_version_build(
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
ulint n_ext;
+ /* We should confirm the existence of disowned external data,
+ if the previous version record is delete marked. If the trx_id
+ of the previous record is seen by purge view, we should treat
+ it as missing history, because the disowned external data
+ might be purged already.
+
+ The inherited external data (BLOBs) can be freed (purged)
+ after trx_id was committed, provided that no view was started
+ before trx_id. If the purge view can see the committed
+ delete-marked record by trx_id, no transactions need to access
+ the BLOB. */
+
+ /* the row_upd_changes_disowned_external(update) call could be
+ omitted, but the synchronization on purge_sys->latch is likely
+ more expensive. */
+
+ if ((update->info_bits & REC_INFO_DELETED_FLAG)
+ && row_upd_changes_disowned_external(update)) {
+ bool missing_extern;
+
+ rw_lock_s_lock(&purge_sys->latch);
+ missing_extern = read_view_sees_trx_id(purge_sys->view,
+ trx_id);
+ rw_lock_s_unlock(&purge_sys->latch);
+
+ if (missing_extern) {
+ /* treat as a fresh insert, not to
+ cause assertion error at the caller. */
+ return(true);
+ }
+ }
+
/* We have to set the appropriate extern storage bits in the
old version of the record: the extern bits in rec for those
fields that update does NOT update, as well as the bits for
those fields that update updates to become externally stored
fields. Store the info: */
- entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index,
- offsets, &n_ext, heap);
+ entry = row_rec_to_index_entry(
+ rec, index, offsets, &n_ext, heap);
n_ext += btr_push_update_extern_fields(entry, update, heap);
/* The page containing the clustered index record
corresponding to entry is latched in mtr. Thus the
@@ -1618,6 +1651,6 @@ trx_undo_prev_version_build(
row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
}
- return(DB_SUCCESS);
+ return(true);
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/trx/trx0roll.cc b/storage/innobase/trx/trx0roll.cc
index 042b5b87da7..d07e40c506d 100644
--- a/storage/innobase/trx/trx0roll.cc
+++ b/storage/innobase/trx/trx0roll.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -133,7 +133,7 @@ trx_rollback_to_savepoint_low(
Rollback a transaction to a given savepoint or do a complete rollback.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
trx_rollback_to_savepoint(
/*======================*/
trx_t* trx, /*!< in: transaction handle */
@@ -157,14 +157,14 @@ trx_rollback_to_savepoint(
srv_active_wake_master_thread();
- return((int) trx->error_state);
+ return(trx->error_state);
}
/*******************************************************************//**
Rollback a transaction used in MySQL.
@return error code or DB_SUCCESS */
static
-enum db_err
+dberr_t
trx_rollback_for_mysql_low(
/*=======================*/
trx_t* trx) /*!< in/out: transaction */
@@ -193,7 +193,7 @@ trx_rollback_for_mysql_low(
Rollback a transaction used in MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
trx_rollback_for_mysql(
/*===================*/
trx_t* trx) /*!< in/out: transaction */
@@ -214,7 +214,7 @@ trx_rollback_for_mysql(
return(trx_rollback_for_mysql_low(trx));
case TRX_STATE_PREPARED:
- assert_trx_in_rw_list(trx);
+ ut_ad(!trx_is_autocommit_non_locking(trx));
return(trx_rollback_for_mysql_low(trx));
case TRX_STATE_COMMITTED_IN_MEMORY:
@@ -223,19 +223,19 @@ trx_rollback_for_mysql(
}
ut_error;
- return((int) DB_CORRUPTION);
+ return(DB_CORRUPTION);
}
/*******************************************************************//**
Rollback the latest SQL statement for MySQL.
@return error code or DB_SUCCESS */
UNIV_INTERN
-int
+dberr_t
trx_rollback_last_sql_stat_for_mysql(
/*=================================*/
trx_t* trx) /*!< in/out: transaction */
{
- int err;
+ dberr_t err;
/* We are reading trx->state without holding trx_sys->mutex
here, because the statement rollback should be invoked for a
@@ -344,8 +344,8 @@ the row, these locks are naturally released in the rollback. Savepoints which
were set after this savepoint are deleted.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
trx_rollback_to_savepoint_for_mysql_low(
/*====================================*/
trx_t* trx, /*!< in/out: transaction */
@@ -358,7 +358,7 @@ trx_rollback_to_savepoint_for_mysql_low(
binlog entries of the queries
executed after the savepoint */
{
- ulint err;
+ dberr_t err;
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
ut_ad(trx->in_mysql_trx_list);
@@ -395,7 +395,7 @@ were set after this savepoint are deleted.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_rollback_to_savepoint_for_mysql(
/*================================*/
trx_t* trx, /*!< in: transaction handle */
@@ -449,7 +449,7 @@ savepoint and replaces it with a new. Savepoints are deleted in a transaction
commit or rollback.
@return always DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_savepoint_for_mysql(
/*====================*/
trx_t* trx, /*!< in: transaction handle */
@@ -495,7 +495,7 @@ savepoint are left as is.
@return if no savepoint of the name found then DB_NO_SAVEPOINT,
otherwise DB_SUCCESS */
UNIV_INTERN
-ulint
+dberr_t
trx_release_savepoint_for_mysql(
/*============================*/
trx_t* trx, /*!< in: transaction handle */
@@ -623,18 +623,16 @@ trx_rollback_active(
if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
&& trx->table_id != 0) {
- /* If the transaction was for a dictionary operation, we
- drop the relevant table, if it still exists */
+ /* If the transaction was for a dictionary operation,
+ we drop the relevant table only if it is not flagged
+ as DISCARDED. If it still exists. */
- fprintf(stderr,
- "InnoDB: Dropping table with id "UINT64PF
- " in recovery if it exists\n",
- (ib_uint64_t) trx->table_id);
+ table = dict_table_open_on_id(
+ trx->table_id, dictionary_locked, FALSE);
- table = dict_table_open_on_id(trx->table_id, dictionary_locked);
+ if (table && !dict_table_is_discarded(table)) {
- if (table) {
- ulint err;
+ dberr_t err;
/* Ensure that the table doesn't get evicted from the
cache, keeps things simple for drop. */
@@ -643,16 +641,17 @@ trx_rollback_active(
dict_table_move_from_lru_to_non_lru(table);
}
- dict_table_close(table, dictionary_locked);
+ dict_table_close(table, dictionary_locked, FALSE);
- fputs("InnoDB: Table found: dropping table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" in recovery\n", stderr);
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Dropping table '%s', with id " UINT64PF " "
+ "in recovery",
+ table->name, trx->table_id);
err = row_drop_table_for_mysql(table->name, trx, TRUE);
trx_commit_for_mysql(trx);
- ut_a(err == (int) DB_SUCCESS);
+ ut_a(err == DB_SUCCESS);
}
}
@@ -660,9 +659,8 @@ trx_rollback_active(
row_mysql_unlock_data_dictionary(trx);
}
- fprintf(stderr, "\nInnoDB: Rolling back of trx id " TRX_ID_FMT
- " completed\n",
- trx->id);
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Rollback of trx with id " TRX_ID_FMT " completed", trx->id);
mem_heap_free(heap);
@@ -808,6 +806,8 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
/*!< in: a dummy parameter required by
os_thread_create */
{
+ ut_ad(!srv_read_only_mode);
+
#ifdef UNIV_PFS_THREAD
pfs_register_thread(trx_rollback_clean_thread_key);
#endif /* UNIV_PFS_THREAD */
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 97fd1f36943..7c2bbc90ad9 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -43,20 +43,16 @@ Created 3/26/1996 Heikki Tuuri
#include "log0recv.h"
#include "os0file.h"
#include "read0read.h"
-#include "buf0dblwr.h"
/** The file format tag structure with id and name. */
-struct file_format_struct {
+struct file_format_t {
ulint id; /*!< id of the file format */
const char* name; /*!< text representation of the
file format */
- mutex_t mutex; /*!< covers changes to the above
+ ib_mutex_t mutex; /*!< covers changes to the above
fields */
};
-/** The file format tag */
-typedef struct file_format_struct file_format_t;
-
/** The transaction system */
UNIV_INTERN trx_sys_t* trx_sys = NULL;
@@ -122,12 +118,12 @@ UNIV_INTERN mysql_pfs_key_t file_format_max_mutex_key;
UNIV_INTERN mysql_pfs_key_t trx_sys_mutex_key;
#endif /* UNIV_PFS_RWLOCK */
+#ifndef UNIV_HOTBACKUP
#ifdef UNIV_DEBUG
/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
uint trx_rseg_n_slots_debug = 0;
#endif
-#ifndef UNIV_HOTBACKUP
/** This is used to track the maximum file format id known to InnoDB. It's
updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
or create a table. */
@@ -180,13 +176,17 @@ trx_sys_flush_max_trx_id(void)
ut_ad(mutex_own(&trx_sys->mutex));
- mtr_start(&mtr);
+ if (!srv_read_only_mode) {
+ mtr_start(&mtr);
- sys_header = trx_sysf_get(&mtr);
+ sys_header = trx_sysf_get(&mtr);
- mlog_write_ull(sys_header + TRX_SYS_TRX_ID_STORE,
- trx_sys->max_trx_id, &mtr);
- mtr_commit(&mtr);
+ mlog_write_ull(
+ sys_header + TRX_SYS_TRX_ID_STORE,
+ trx_sys->max_trx_id, &mtr);
+
+ mtr_commit(&mtr);
+ }
}
/*****************************************************************//**
@@ -524,6 +524,8 @@ trx_sys_init_at_db_start(void)
+ TRX_SYS_TRX_ID_STORE),
TRX_SYS_TRX_ID_WRITE_MARGIN);
+ ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
+
UT_LIST_INIT(trx_sys->mysql_trx_list);
trx_dummy_sess = sess_open();
@@ -701,7 +703,7 @@ Check for the max file format tag stored on disk. Note: If max_format_id
is == UNIV_FORMAT_MAX + 1 then we only print a warning.
@return DB_SUCCESS or error code */
UNIV_INTERN
-ulint
+dberr_t
trx_sys_file_format_max_check(
/*==========================*/
ulint max_format_id) /*!< in: max format id to check */
@@ -718,21 +720,18 @@ trx_sys_file_format_max_check(
format_id = UNIV_FORMAT_MIN;
}
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: highest supported file format is %s.\n",
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Highest supported file format is %s.",
trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX));
if (format_id > UNIV_FORMAT_MAX) {
ut_a(format_id < FILE_FORMAT_NAME_N);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: %s: the system tablespace is in a file "
- "format that this version doesn't support - %s\n",
- ((max_format_id <= UNIV_FORMAT_MAX)
- ? "Error" : "Warning"),
+ ib_logf(max_format_id <= UNIV_FORMAT_MAX
+ ? IB_LOG_LEVEL_ERROR : IB_LOG_LEVEL_WARN,
+ "The system tablespace is in a file "
+ "format that this version doesn't support - %s.",
trx_sys_file_format_id_to_name(format_id));
if (max_format_id <= UNIV_FORMAT_MAX) {
@@ -883,7 +882,7 @@ trx_sys_create_rsegs(
ut_a(n_spaces < TRX_SYS_N_RSEGS);
ut_a(n_rsegs <= TRX_SYS_N_RSEGS);
- if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+ if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO || srv_read_only_mode) {
return(ULINT_UNDEFINED);
}
@@ -926,9 +925,8 @@ trx_sys_create_rsegs(
}
}
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %lu rollback segment(s) are active.\n",
- n_used);
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "%lu rollback segment(s) are active.", n_used);
return(n_used);
}
@@ -1000,7 +998,7 @@ trx_sys_read_file_format_id(
);
if (!success) {
/* The following call prints an error message */
- os_file_get_last_error(TRUE);
+ os_file_get_last_error(true);
ut_print_timestamp(stderr);
@@ -1019,7 +1017,7 @@ trx_sys_read_file_format_id(
if (!success) {
/* The following call prints an error message */
- os_file_get_last_error(TRUE);
+ os_file_get_last_error(true);
ut_print_timestamp(stderr);
@@ -1080,7 +1078,7 @@ trx_sys_read_pertable_file_format_id(
);
if (!success) {
/* The following call prints an error message */
- os_file_get_last_error(TRUE);
+ os_file_get_last_error(true);
ut_print_timestamp(stderr);
@@ -1099,7 +1097,7 @@ trx_sys_read_pertable_file_format_id(
if (!success) {
/* The following call prints an error message */
- os_file_get_last_error(TRUE);
+ os_file_get_last_error(true);
ut_print_timestamp(stderr);
@@ -1120,11 +1118,11 @@ trx_sys_read_pertable_file_format_id(
if (flags == 0) {
/* file format is Antelope */
*format_id = 0;
- return (TRUE);
+ return(TRUE);
} else if (flags & 1) {
/* tablespace flags are ok */
*format_id = (flags / 32) % 128;
- return (TRUE);
+ return(TRUE);
} else {
/* bad tablespace flags */
return(FALSE);
@@ -1143,7 +1141,7 @@ trx_sys_file_format_id_to_name(
{
if (!(id < FILE_FORMAT_NAME_N)) {
/* unknown id */
- return ("Unknown");
+ return("Unknown");
}
return(file_format_name_map[id]);
@@ -1252,7 +1250,7 @@ trx_sys_any_active_transactions(void)
mutex_enter(&trx_sys->mutex);
total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list)
- + trx_sys->n_mysql_trx;
+ + UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
ut_a(total_trx >= trx_sys->n_prepared_trx);
total_trx -= trx_sys->n_prepared_trx;
diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc
index 80ebe0df2b3..449b970842a 100644
--- a/storage/innobase/trx/trx0trx.cc
+++ b/storage/innobase/trx/trx0trx.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -105,6 +105,7 @@ trx_create(void)
trx->state = TRX_STATE_NOT_STARTED;
+ trx->active_commit_ordered = 0;
trx->isolation_level = TRX_ISO_REPEATABLE_READ;
trx->no = IB_ULONGLONG_MAX;
@@ -146,10 +147,6 @@ trx_create(void)
trx->lock.table_locks = ib_vector_create(
heap_alloc, sizeof(void**), 32);
- /* For non-locking selects we avoid calling ut_time() too frequently.
- Set the time here for new transactions. */
- trx->start_time = ut_time();
-
return(trx);
}
@@ -184,8 +181,6 @@ trx_allocate_for_mysql(void)
mutex_enter(&trx_sys->mutex);
- trx_sys->n_mysql_trx++;
-
ut_d(trx->in_mysql_trx_list = TRUE);
UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
@@ -205,6 +200,7 @@ trx_free(
ut_a(trx->magic_n == TRX_MAGIC_N);
ut_ad(!trx->in_ro_trx_list);
ut_ad(!trx->in_rw_trx_list);
+ ut_ad(!trx->in_mysql_trx_list);
mutex_free(&trx->undo_mutex);
@@ -233,8 +229,10 @@ trx_free(
/* We allocated a dedicated heap for the vector. */
ib_vector_free(trx->autoinc_locks);
- /* We allocated a dedicated heap for the vector. */
- ib_vector_free(trx->lock.table_locks);
+ if (trx->lock.table_locks != NULL) {
+ /* We allocated a dedicated heap for the vector. */
+ ib_vector_free(trx->lock.table_locks);
+ }
mutex_free(&trx->mutex);
@@ -249,11 +247,12 @@ trx_free_for_background(
/*====================*/
trx_t* trx) /*!< in, own: trx object */
{
- if (UNIV_UNLIKELY(trx->declared_to_be_inside_innodb)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: Freeing a trx which is declared"
- " to be processing\n"
- "InnoDB: inside InnoDB.\n", stderr);
+ if (trx->declared_to_be_inside_innodb) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
+ "to be processing inside InnoDB", trx, trx->id);
+
trx_print(stderr, trx, 600);
putc('\n', stderr);
@@ -262,16 +261,16 @@ trx_free_for_background(
srv_conc_force_exit_innodb(trx);
}
- if (UNIV_UNLIKELY(trx->n_mysql_tables_in_use != 0
- || trx->mysql_n_tables_locked != 0)) {
+ if (trx->n_mysql_tables_in_use != 0
+ || trx->mysql_n_tables_locked != 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: MySQL is freeing a thd\n"
- "InnoDB: though trx->n_mysql_tables_in_use is %lu\n"
- "InnoDB: and trx->mysql_n_tables_locked is %lu.\n",
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "MySQL is freeing a thd though "
+ "trx->n_mysql_tables_in_use is %lu and "
+ "trx->mysql_n_tables_locked is %lu.",
(ulong) trx->n_mysql_tables_in_use,
(ulong) trx->mysql_n_tables_locked);
+
trx_print(stderr, trx, 600);
ut_print_buf(stderr, trx, sizeof(trx_t));
putc('\n', stderr);
@@ -326,8 +325,6 @@ trx_free_for_mysql(
ut_ad(trx_sys_validate_trx_list());
- trx_sys->n_mysql_trx--;
-
mutex_exit(&trx_sys->mutex);
trx_free_for_background(trx);
@@ -348,6 +345,9 @@ trx_list_rw_insert_ordered(
ut_ad(!trx->read_only);
+ ut_d(trx->start_file = __FILE__);
+ ut_d(trx->start_line = __LINE__);
+
ut_a(srv_is_being_started);
ut_ad(!trx->in_ro_trx_list);
ut_ad(!trx->in_rw_trx_list);
@@ -372,6 +372,7 @@ trx_list_rw_insert_ordered(
if (trx2 == NULL) {
UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
+ ut_d(trx_sys->rw_max_trx_id = trx->id);
} else {
UT_LIST_INSERT_AFTER(
trx_list, trx_sys->rw_trx_list, trx2, trx);
@@ -423,6 +424,7 @@ trx_resurrect_insert(
trx->state = TRX_STATE_PREPARED;
trx_sys->n_prepared_trx++;
+ trx_sys->n_prepared_recovered_trx++;
} else {
fprintf(stderr,
"InnoDB: Since innodb_force_recovery"
@@ -483,6 +485,7 @@ trx_resurrect_update_in_prepared_state(
if (srv_force_recovery == 0) {
if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
trx_sys->n_prepared_trx++;
+ trx_sys->n_prepared_recovered_trx++;
} else {
ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
}
@@ -620,10 +623,10 @@ trx_lists_init_at_db_start(void)
/******************************************************************//**
Assigns a rollback segment to a transaction in a round-robin fashion.
@return assigned rollback segment instance */
-UNIV_INLINE
+static
trx_rseg_t*
-trx_assign_rseg(
-/*============*/
+trx_assign_rseg_low(
+/*================*/
ulong max_undo_logs, /*!< in: maximum number of UNDO logs to use */
ulint n_tablespaces) /*!< in: number of rollback tablespaces */
{
@@ -631,7 +634,7 @@ trx_assign_rseg(
trx_rseg_t* rseg;
static ulint latest_rseg = 0;
- if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
+ if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO || srv_read_only_mode) {
ut_a(max_undo_logs == ULONG_UNDEFINED);
return(NULL);
}
@@ -668,6 +671,24 @@ trx_assign_rseg(
}
/****************************************************************//**
+Assign a read-only transaction a rollback-segment, if it is attempting
+to write to a TEMPORARY table. */
+UNIV_INTERN
+void
+trx_assign_rseg(
+/*============*/
+ trx_t* trx) /*!< A read-only transaction that
+ needs to be assigned a RBS. */
+{
+ ut_a(trx->rseg == 0);
+ ut_a(trx->read_only);
+ ut_a(!srv_read_only_mode);
+ ut_a(!trx_is_autocommit_non_locking(trx));
+
+ trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
+}
+
+/****************************************************************//**
Starts a transaction. */
static
void
@@ -675,10 +696,10 @@ trx_start_low(
/*==========*/
trx_t* trx) /*!< in: transaction */
{
- static ulint n_start_times;
-
ut_ad(trx->rseg == NULL);
+ ut_ad(trx->start_file != 0);
+ ut_ad(trx->start_line != 0);
ut_ad(!trx->is_recovered);
ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
@@ -686,7 +707,9 @@ trx_start_low(
/* Check whether it is an AUTOCOMMIT SELECT */
trx->auto_commit = thd_trx_is_auto_commit(trx->mysql_thd);
- trx->read_only = thd_trx_is_read_only(trx->mysql_thd);
+ trx->read_only =
+ (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
+ || srv_read_only_mode;
if (!trx->auto_commit) {
++trx->will_lock;
@@ -695,16 +718,10 @@ trx_start_low(
}
if (!trx->read_only) {
- trx->rseg = trx_assign_rseg(
+ trx->rseg = trx_assign_rseg_low(
srv_undo_logs, srv_undo_tablespaces);
}
- /* Avoid making an unnecessary system call, for non-locking
- auto-commit selects we reuse the start_time for every 32 starts. */
- if (!trx_is_autocommit_non_locking(trx) || !(n_start_times++ % 32)) {
- trx->start_time = ut_time();
- }
-
/* The initial value for trx->no: IB_ULONGLONG_MAX is used in
read_view_open_now: */
@@ -745,12 +762,15 @@ trx_start_low(
ut_ad(!trx_is_autocommit_non_locking(trx));
UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
ut_d(trx->in_rw_trx_list = TRUE);
+ ut_d(trx_sys->rw_max_trx_id = trx->id);
}
ut_ad(trx_sys_validate_trx_list());
mutex_exit(&trx_sys->mutex);
+ trx->start_time = ut_time();
+
MONITOR_INC(MONITOR_TRX_ACTIVE);
}
@@ -971,6 +991,52 @@ trx_finalize_for_fts(
trx->fts_trx = NULL;
}
+/**********************************************************************//**
+If required, flushes the log to disk based on the value of
+innodb_flush_log_at_trx_commit. */
+static
+void
+trx_flush_log_if_needed_low(
+/*========================*/
+ lsn_t lsn) /*!< in: lsn up to which logs are to be
+ flushed. */
+{
+ switch (srv_flush_log_at_trx_commit) {
+ case 0:
+ /* Do nothing */
+ break;
+ case 1:
+ case 3:
+ /* Write the log and optionally flush it to disk */
+ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
+ srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
+ break;
+ case 2:
+ /* Write the log but do not flush it to disk */
+ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
+
+ break;
+ default:
+ ut_error;
+ }
+}
+
+/**********************************************************************//**
+If required, flushes the log to disk based on the value of
+innodb_flush_log_at_trx_commit. */
+static __attribute__((nonnull))
+void
+trx_flush_log_if_needed(
+/*====================*/
+ lsn_t lsn, /*!< in: lsn up to which logs are to be
+ flushed. */
+ trx_t* trx) /*!< in/out: transaction */
+{
+ trx->op_info = "flushing log";
+ trx_flush_log_if_needed_low(lsn);
+ trx->op_info = "";
+}
+
/****************************************************************//**
Commits a transaction. */
UNIV_INTERN
@@ -987,7 +1053,7 @@ trx_commit(
ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
/* undo_no is non-zero if we're doing the final commit. */
- if (trx->fts_trx && (trx->undo_no != 0)) {
+ if (trx->fts_trx && trx->undo_no != 0) {
ulint error;
ut_a(!trx_is_autocommit_non_locking(trx));
@@ -1043,6 +1109,8 @@ trx_commit(
trx->state = TRX_STATE_NOT_STARTED;
+ read_view_remove(trx->global_read_view, false);
+
MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
} else {
lock_trx_release_locks(trx);
@@ -1057,7 +1125,6 @@ trx_commit(
assert_trx_in_list(trx);
if (trx->read_only) {
- ut_ad(trx->rseg == NULL);
UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
ut_d(trx->in_ro_trx_list = FALSE);
MONITOR_INC(MONITOR_TRX_RO_COMMIT);
@@ -1075,13 +1142,16 @@ trx_commit(
trx->state = TRX_STATE_NOT_STARTED;
+ /* We already own the trx_sys_t::mutex, by doing it here we
+ avoid a potential context switch later. */
+ read_view_remove(trx->global_read_view, true);
+
ut_ad(trx_sys_validate_trx_list());
mutex_exit(&trx_sys->mutex);
}
if (trx->global_read_view != NULL) {
- read_view_remove(trx->global_read_view);
mem_heap_empty(trx->global_read_view_heap);
@@ -1129,26 +1199,8 @@ trx_commit(
trx->must_flush_log_later = TRUE;
} else if (srv_flush_log_at_trx_commit == 0) {
/* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1 ||
- srv_flush_log_at_trx_commit == 3) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
- FALSE);
- } else {
- /* Write the log to the log files AND flush
- them to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (srv_flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
} else {
- ut_error;
+ trx_flush_log_if_needed(lsn, trx);
}
trx->commit_lsn = lsn;
@@ -1162,6 +1214,14 @@ trx_commit(
trx->undo_no = 0;
trx->last_sql_stat_start.least_undo_no = 0;
+ trx->ddl = false;
+#ifdef UNIV_DEBUG
+ ut_ad(trx->start_file != 0);
+ ut_ad(trx->start_line != 0);
+ trx->start_file = 0;
+ trx->start_line = 0;
+#endif /* UNIV_DEBUG */
+
trx->will_lock = 0;
trx->read_only = FALSE;
trx->auto_commit = FALSE;
@@ -1175,6 +1235,8 @@ trx_commit(
ut_ad(!trx->in_ro_trx_list);
ut_ad(!trx->in_rw_trx_list);
+ trx->dict_operation = TRX_DICT_OP_NONE;
+
trx->error_state = DB_SUCCESS;
/* trx->in_mysql_trx_list would hold between
@@ -1365,7 +1427,7 @@ trx_commit_step(
Does the transaction commit for MySQL.
@return DB_SUCCESS or error number */
UNIV_INTERN
-ulint
+dberr_t
trx_commit_for_mysql(
/*=================*/
trx_t* trx) /*!< in/out: transaction */
@@ -1389,6 +1451,9 @@ trx_commit_for_mysql(
records, generated by the same transaction do not. */
trx->support_xa = thd_supports_xa(trx->mysql_thd);
+ ut_d(trx->start_file = __FILE__);
+ ut_d(trx->start_line = __LINE__);
+
trx_start_low(trx);
/* fall through */
case TRX_STATE_ACTIVE:
@@ -1407,53 +1472,23 @@ trx_commit_for_mysql(
/**********************************************************************//**
If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE.
-@return 0 or error number */
+with trx->flush_log_later == TRUE. */
UNIV_INTERN
-ulint
+void
trx_commit_complete_for_mysql(
/*==========================*/
- trx_t* trx) /*!< in: trx handle */
+ trx_t* trx) /*!< in/out: transaction */
{
- lsn_t lsn = trx->commit_lsn;
-
ut_a(trx);
- trx->op_info = "flushing log";
-
- if (!trx->must_flush_log_later) {
- /* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 0) {
- /* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered) {
- /* Do nothing - we already flushed the prepare and binlog write
- to disk, so transaction is durable (will be recovered from
- binlog if necessary) */
- } else if (srv_flush_log_at_trx_commit == 1 || srv_flush_log_at_trx_commit == 3) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- /* Write the log to the log files AND flush them to
- disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (srv_flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- ut_error;
+ if (!trx->must_flush_log_later
+ || (srv_flush_log_at_trx_commit == 1 && trx->active_commit_ordered)) {
+ return;
}
- trx->must_flush_log_later = FALSE;
-
- trx->op_info = "";
+ trx_flush_log_if_needed(trx->commit_lsn, trx);
- return(0);
+ trx->must_flush_log_later = FALSE;
}
/**********************************************************************//**
@@ -1500,9 +1535,9 @@ trx_print_low(
ulint max_query_len,
/*!< in: max query length to print,
or 0 to use the default max length */
- ulint n_lock_rec,
+ ulint n_rec_locks,
/*!< in: lock_number_of_rows_locked(&trx->lock) */
- ulint n_lock_struct,
+ ulint n_trx_locks,
/*!< in: length of trx->lock.trx_locks */
ulint heap_size)
/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
@@ -1581,14 +1616,14 @@ state_ok:
fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
}
- if (n_lock_struct > 0 || heap_size > 400) {
+ if (n_trx_locks > 0 || heap_size > 400) {
newline = TRUE;
fprintf(f, "%lu lock struct(s), heap size %lu,"
" %lu row lock(s)",
- (ulong) n_lock_struct,
+ (ulong) n_trx_locks,
(ulong) heap_size,
- (ulong) n_lock_rec);
+ (ulong) n_rec_locks);
}
if (trx->has_search_latch) {
@@ -1644,19 +1679,19 @@ trx_print(
ulint max_query_len) /*!< in: max query length to print,
or 0 to use the default max length */
{
- ulint n_lock_rec;
- ulint n_lock_struct;
+ ulint n_rec_locks;
+ ulint n_trx_locks;
ulint heap_size;
lock_mutex_enter();
- n_lock_rec = lock_number_of_rows_locked(&trx->lock);
- n_lock_struct = UT_LIST_GET_LEN(trx->lock.trx_locks);
+ n_rec_locks = lock_number_of_rows_locked(&trx->lock);
+ n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
heap_size = mem_heap_get_size(trx->lock.lock_heap);
lock_mutex_exit();
mutex_enter(&trx_sys->mutex);
trx_print_low(f, trx, max_query_len,
- n_lock_rec, n_lock_struct, heap_size);
+ n_rec_locks, n_trx_locks, heap_size);
mutex_exit(&trx_sys->mutex);
}
@@ -1684,7 +1719,6 @@ trx_assert_started(
switch (trx->state) {
case TRX_STATE_PREPARED:
- assert_trx_in_rw_list(trx);
return(TRUE);
case TRX_STATE_ACTIVE:
@@ -1826,28 +1860,7 @@ trx_prepare(
TODO: find out if MySQL holds some mutex when calling this.
That would spoil our group prepare algorithm. */
- if (srv_flush_log_at_trx_commit == 0) {
- /* Do nothing */
- } else if (srv_flush_log_at_trx_commit == 1 || srv_flush_log_at_trx_commit == 3) {
- if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
- FALSE);
- } else {
- /* Write the log to the log files AND flush
- them to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
- }
- } else if (srv_flush_log_at_trx_commit == 2) {
-
- /* Write the log but do not flush it to disk */
-
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
- } else {
- ut_error;
- }
+ trx_flush_log_if_needed(lsn, trx);
}
}
@@ -1859,7 +1872,7 @@ trx_prepare_for_mysql(
/*==================*/
trx_t* trx) /*!< in/out: trx handle */
{
- trx_start_if_not_started_xa(trx);
+ trx_start_if_not_started_xa_low(trx);
trx->op_info = "preparing";
@@ -1935,12 +1948,12 @@ trx_recover_for_mysql(
if (count > 0){
ut_print_timestamp(stderr);
fprintf(stderr,
- " InnoDB: %lu transactions in prepared state"
+ " InnoDB: %d transactions in prepared state"
" after recovery\n",
- (ulong) count);
+ int (count));
}
- return ((int) count);
+ return(int (count));
}
/*******************************************************************//**
@@ -2023,8 +2036,8 @@ trx_get_trx_by_xid(
Starts the transaction if it is not yet started. */
UNIV_INTERN
void
-trx_start_if_not_started_xa(
-/*========================*/
+trx_start_if_not_started_xa_low(
+/*============================*/
trx_t* trx) /*!< in: transaction */
{
switch (trx->state) {
@@ -2057,8 +2070,8 @@ trx_start_if_not_started_xa(
Starts the transaction if it is not yet started. */
UNIV_INTERN
void
-trx_start_if_not_started(
-/*=====================*/
+trx_start_if_not_started_low(
+/*=========================*/
trx_t* trx) /*!< in: transaction */
{
switch (trx->state) {
@@ -2074,3 +2087,45 @@ trx_start_if_not_started(
ut_error;
}
+
+/*************************************************************//**
+Starts the transaction for a DDL operation. */
+UNIV_INTERN
+void
+trx_start_for_ddl_low(
+/*==================*/
+ trx_t* trx, /*!< in/out: transaction */
+ trx_dict_op_t op) /*!< in: dictionary operation type */
+{
+ switch (trx->state) {
+ case TRX_STATE_NOT_STARTED:
+ /* Flag this transaction as a dictionary operation, so that
+ the data dictionary will be locked in crash recovery. */
+
+ trx_set_dict_operation(trx, op);
+
+ /* Ensure it is not flagged as an auto-commit-non-locking
+ transation. */
+ trx->will_lock = 1;
+
+ trx->ddl = true;
+
+ trx_start_low(trx);
+ return;
+
+ case TRX_STATE_ACTIVE:
+ /* We have this start if not started idiom, therefore we
+ can't add stronger checks here. */
+ trx->ddl = true;
+
+ ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
+ ut_ad(trx->will_lock > 0);
+ return;
+ case TRX_STATE_PREPARED:
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ break;
+ }
+
+ ut_error;
+}
+
diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc
index 13ad2bb3755..c4480b11366 100644
--- a/storage/innobase/trx/trx0undo.cc
+++ b/storage/innobase/trx/trx0undo.cc
@@ -413,8 +413,8 @@ trx_undo_page_init(
Creates a new undo log segment in file.
@return DB_SUCCESS if page creation OK possible error codes are:
DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
trx_undo_seg_create(
/*================*/
trx_rseg_t* rseg __attribute__((unused)),/*!< in: rollback segment */
@@ -435,7 +435,7 @@ trx_undo_seg_create(
trx_usegf_t* seg_hdr;
ulint n_reserved;
ibool success;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
ut_ad(mtr && id && rseg_hdr);
ut_ad(mutex_own(&(rseg->mutex)));
@@ -1468,7 +1468,7 @@ trx_undo_mem_create(
if (undo == NULL) {
- return NULL;
+ return(NULL);
}
undo->id = id;
@@ -1551,8 +1551,8 @@ Creates a new undo log.
@return DB_SUCCESS if successful in creating the new undo lob object,
possible error codes are: DB_TOO_MANY_CONCURRENT_TRXS
DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
trx_undo_create(
/*============*/
trx_t* trx, /*!< in: transaction */
@@ -1571,7 +1571,7 @@ trx_undo_create(
ulint offset;
ulint id;
page_t* undo_page;
- ulint err;
+ dberr_t err;
ut_ad(mutex_own(&(rseg->mutex)));
@@ -1746,7 +1746,7 @@ undo log reused.
are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
DB_OUT_OF_MEMORY */
UNIV_INTERN
-ulint
+dberr_t
trx_undo_assign_undo(
/*=================*/
trx_t* trx, /*!< in: transaction */
@@ -1755,7 +1755,7 @@ trx_undo_assign_undo(
trx_rseg_t* rseg;
trx_undo_t* undo;
mtr_t mtr;
- ulint err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
ut_ad(trx);
@@ -1771,11 +1771,17 @@ trx_undo_assign_undo(
mutex_enter(&rseg->mutex);
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_too_many_trx",
+ err = DB_TOO_MANY_CONCURRENT_TRXS;
+ goto func_exit;
+ );
+
undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
&mtr);
if (undo == NULL) {
err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
- &undo, &mtr);
+ &undo, &mtr);
if (err != DB_SUCCESS) {
goto func_exit;
@@ -1800,7 +1806,7 @@ func_exit:
mutex_exit(&(rseg->mutex));
mtr_commit(&mtr);
- return err;
+ return(err);
}
/******************************************************************//**
diff --git a/storage/innobase/ut/ut0crc32.cc b/storage/innobase/ut/ut0crc32.cc
index 538879dd9e2..695035d6ae8 100644
--- a/storage/innobase/ut/ut0crc32.cc
+++ b/storage/innobase/ut/ut0crc32.cc
@@ -79,11 +79,11 @@ mysys/my_perf.c, contributed by Facebook under the following license.
* factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
*/
-#include <string.h> /* memcmp() */
-
#include "univ.i"
#include "ut0crc32.h"
+#include <string.h>
+
ib_ut_crc32_t ut_crc32;
/* Precalculated table used to generate the CRC32 if the CPU does not
@@ -92,7 +92,7 @@ static ib_uint32_t ut_crc32_slice8_table[8][256];
static ibool ut_crc32_slice8_table_initialized = FALSE;
/* Flag that tells whether the CPU supports CRC32 or not */
-static ibool ut_crc32_sse2_enabled = FALSE;
+UNIV_INTERN bool ut_crc32_sse2_enabled = false;
/********************************************************************//**
Initializes the table that is used to generate the CRC32 if the CPU does
@@ -315,8 +315,4 @@ ut_crc32_init()
ut_crc32_slice8_table_init();
ut_crc32 = ut_crc32_slice8;
}
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: CPU %s crc32 instructions\n",
- ut_crc32_sse2_enabled ? "supports" : "does not support");
}
diff --git a/storage/innobase/ut/ut0mem.cc b/storage/innobase/ut/ut0mem.cc
index 42ad180d373..2bb5d9ce332 100644
--- a/storage/innobase/ut/ut0mem.cc
+++ b/storage/innobase/ut/ut0mem.cc
@@ -35,9 +35,6 @@ Created 5/11/1994 Heikki Tuuri
#include <stdlib.h>
-/** This struct is placed first in every allocated memory block */
-typedef struct ut_mem_block_struct ut_mem_block_t;
-
/** The total amount of memory currently allocated from the operating
system with os_mem_alloc_large() or malloc(). Does not count malloc()
if srv_use_sys_malloc is set. Protected by ut_list_mutex. */
@@ -52,14 +49,14 @@ UNIV_INTERN mysql_pfs_key_t ut_list_mutex_key;
#endif
/** Dynamically allocated memory block */
-struct ut_mem_block_struct{
+struct ut_mem_block_t{
UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;
/*!< mem block list node */
ulint size; /*!< size of allocated memory */
ulint magic_n;/*!< magic number (UT_MEM_MAGIC_N) */
};
-/** The value of ut_mem_block_struct::magic_n. Used in detecting
+/** The value of ut_mem_block_t::magic_n. Used in detecting
memory corruption. */
#define UT_MEM_MAGIC_N 1601650166
diff --git a/storage/innobase/ut/ut0rbt.cc b/storage/innobase/ut/ut0rbt.cc
index b21543a679d..e93844af600 100644
--- a/storage/innobase/ut/ut0rbt.cc
+++ b/storage/innobase/ut/ut0rbt.cc
@@ -773,7 +773,7 @@ rbt_create_arg_cmp(
size_t sizeof_value, /*!< in: sizeof data item */
ib_rbt_arg_compare
compare, /*!< in: fn to compare items */
- const void* cmp_arg) /*!< in: compare fn arg */
+ void* cmp_arg) /*!< in: compare fn arg */
{
ib_rbt_t* tree;
diff --git a/storage/innobase/ut/ut0ut.cc b/storage/innobase/ut/ut0ut.cc
index 2268cfd2493..3c94d96c3ac 100644
--- a/storage/innobase/ut/ut0ut.cc
+++ b/storage/innobase/ut/ut0ut.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,6 +28,7 @@ Created 5/11/1994 Heikki Tuuri
#ifndef UNIV_INNOCHECKSUM
#include "ut0sort.h"
+#include "os0thread.h" /* thread-ID */
#ifdef UNIV_NONINL
#include "ut0ut.ic"
@@ -218,18 +219,25 @@ ut_print_timestamp(
/*===============*/
FILE* file) /*!< in: file where to print */
{
+ ulint thread_id = 0;
+
+#ifndef UNIV_INNOCHECKSUM
+ thread_id = os_thread_pf(os_thread_get_curr_id());
+#endif
+
#ifdef __WIN__
SYSTEMTIME cal_tm;
GetLocalTime(&cal_tm);
- fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
- (int) cal_tm.wYear % 100,
+ fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx",
+ (int) cal_tm.wYear,
(int) cal_tm.wMonth,
(int) cal_tm.wDay,
(int) cal_tm.wHour,
(int) cal_tm.wMinute,
- (int) cal_tm.wSecond);
+ (int) cal_tm.wSecond,
+ thread_id);
#else
struct tm* cal_tm_ptr;
time_t tm;
@@ -243,13 +251,14 @@ ut_print_timestamp(
time(&tm);
cal_tm_ptr = localtime(&tm);
#endif
- fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
- cal_tm_ptr->tm_year % 100,
+ fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx",
+ cal_tm_ptr->tm_year + 1900,
cal_tm_ptr->tm_mon + 1,
cal_tm_ptr->tm_mday,
cal_tm_ptr->tm_hour,
cal_tm_ptr->tm_min,
- cal_tm_ptr->tm_sec);
+ cal_tm_ptr->tm_sec,
+ thread_id);
#endif
}
@@ -515,7 +524,7 @@ void
ut_print_name(
/*==========*/
FILE* f, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction */
+ const trx_t* trx, /*!< in: transaction */
ibool table_id,/*!< in: TRUE=print a table name,
FALSE=print other identifier */
const char* name) /*!< in: name to print */
@@ -533,7 +542,7 @@ void
ut_print_namel(
/*===========*/
FILE* f, /*!< in: output stream */
- trx_t* trx, /*!< in: transaction (NULL=no quotes) */
+ const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
ibool table_id,/*!< in: TRUE=print a table name,
FALSE=print other identifier */
const char* name, /*!< in: name to print */
@@ -553,6 +562,50 @@ ut_print_namel(
}
/**********************************************************************//**
+Formats a table or index name, quoted as an SQL identifier. If the name
+contains a slash '/', the result will contain two identifiers separated by
+a period (.), as in SQL database_name.identifier.
+@return pointer to 'formatted' */
+UNIV_INTERN
+char*
+ut_format_name(
+/*===========*/
+ const char* name, /*!< in: table or index name, must be
+ '\0'-terminated */
+ ibool is_table, /*!< in: if TRUE then 'name' is a table
+ name */
+ char* formatted, /*!< out: formatted result, will be
+ '\0'-terminated */
+ ulint formatted_size) /*!< out: no more than this number of
+ bytes will be written to 'formatted' */
+{
+ switch (formatted_size) {
+ case 1:
+ formatted[0] = '\0';
+ /* FALL-THROUGH */
+ case 0:
+ return(formatted);
+ }
+
+ char* end;
+
+ end = innobase_convert_name(formatted, formatted_size,
+ name, strlen(name), NULL, is_table);
+
+ /* If the space in 'formatted' was completely used, then sacrifice
+ the last character in order to write '\0' at the end. */
+ if ((ulint) (end - formatted) == formatted_size) {
+ end--;
+ }
+
+ ut_a((ulint) (end - formatted) < formatted_size);
+
+ *end = '\0';
+
+ return(formatted);
+}
+
+/**********************************************************************//**
Catenate files. */
UNIV_INTERN
void
@@ -648,7 +701,7 @@ UNIV_INTERN
const char*
ut_strerr(
/*======*/
- enum db_err num) /*!< in: error number */
+ dberr_t num) /*!< in: error number */
{
switch (num) {
case DB_SUCCESS:
@@ -703,10 +756,12 @@ ut_strerr(
return("Cannot drop constraint");
case DB_NO_SAVEPOINT:
return("No such savepoint");
- case DB_TABLESPACE_ALREADY_EXISTS:
+ case DB_TABLESPACE_EXISTS:
return("Tablespace already exists");
case DB_TABLESPACE_DELETED:
- return("No such tablespace");
+ return("Tablespace deleted or being deleted");
+ case DB_TABLESPACE_NOT_FOUND:
+ return("Tablespace not found");
case DB_LOCK_TABLE_FULL:
return("Lock structs have exhausted the buffer pool");
case DB_FOREIGN_DUPLICATE_KEY:
@@ -717,8 +772,8 @@ ut_strerr(
return("Too many concurrent transactions");
case DB_UNSUPPORTED:
return("Unsupported");
- case DB_PRIMARY_KEY_IS_NULL:
- return("Primary key is NULL");
+ case DB_INVALID_NULL:
+ return("NULL value encountered in NOT NULL column");
case DB_STATS_DO_NOT_EXIST:
return("Persistent statistics do not exist");
case DB_FAIL:
@@ -745,6 +800,21 @@ ut_strerr(
return("Undo record too big");
case DB_END_OF_INDEX:
return("End of index");
+ case DB_IO_ERROR:
+ return("I/O error");
+ case DB_TABLE_IN_FK_CHECK:
+ return("Table is being used in foreign key check");
+ case DB_DATA_MISMATCH:
+ return("data mismatch");
+ case DB_SCHEMA_NOT_LOCKED:
+ return("schema not locked");
+ case DB_NOT_FOUND:
+ return("not found");
+ case DB_ONLINE_LOG_TOO_BIG:
+ return("Log size exceeded during online index creation");
+ case DB_DICT_CHANGED:
+ return("Table dictionary has changed");
+
/* do not add default: in order to produce a warning if new code
is added to the enum but not added here */
}
diff --git a/storage/innobase/ut/ut0vec.cc b/storage/innobase/ut/ut0vec.cc
index 8ac5d9dc5d3..5842d9f1c0e 100644
--- a/storage/innobase/ut/ut0vec.cc
+++ b/storage/innobase/ut/ut0vec.cc
@@ -44,12 +44,14 @@ ib_vector_create(
ut_a(size > 0);
- vec = static_cast<ib_vector_t*>(allocator->mem_malloc(allocator, sizeof(*vec)));
+ vec = static_cast<ib_vector_t*>(
+ allocator->mem_malloc(allocator, sizeof(*vec)));
vec->used = 0;
vec->total = size;
vec->allocator = allocator;
vec->sizeof_value = sizeof_value;
+
vec->data = static_cast<void*>(
allocator->mem_malloc(allocator, vec->sizeof_value * size));
diff --git a/storage/innobase/ut/ut0wqueue.cc b/storage/innobase/ut/ut0wqueue.cc
index 6d410524fe7..d1ba36b3b00 100644
--- a/storage/innobase/ut/ut0wqueue.cc
+++ b/storage/innobase/ut/ut0wqueue.cc
@@ -40,7 +40,7 @@ ib_wqueue_create(void)
mutex_create(PFS_NOT_INSTRUMENTED, &wq->mutex, SYNC_WORK_QUEUE);
wq->items = ib_list_create();
- wq->event = os_event_create(NULL);
+ wq->event = os_event_create();
return(wq);
}
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index 95f37ddb12f..092e1a8a79e 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -512,7 +512,7 @@ static int table2maria(TABLE *table_arg, data_file_type row_type,
keydef[i].block_length= pos->block_size;
keydef[i].seg= keyseg;
keydef[i].keysegs= pos->key_parts;
- for (j= 0; j < pos->key_parts; j++)
+ for (j= 0; j < pos->user_defined_key_parts; j++)
{
Field *field= pos->key_part[j].field;
type= field->key_type();
@@ -574,7 +574,7 @@ static int table2maria(TABLE *table_arg, data_file_type row_type,
(uchar*) table_arg->record[0]);
}
}
- keyseg+= pos->key_parts;
+ keyseg+= pos->user_defined_key_parts;
}
if (table_arg->found_next_number_field)
keydef[share->next_number_index].flag|= HA_AUTO_KEY;
@@ -1042,7 +1042,7 @@ ulong ha_maria::index_flags(uint inx, uint part, bool all_parts) const
double ha_maria::scan_time()
{
if (file->s->data_file_type == BLOCK_RECORD)
- return ulonglong2double(stats.data_file_length - file->s->block_size) / max(file->s->block_size / 2, IO_SIZE) + 2;
+ return ulonglong2double(stats.data_file_length - file->s->block_size) / MY_MAX(file->s->block_size / 2, IO_SIZE) + 2;
return handler::scan_time();
}
@@ -2464,18 +2464,18 @@ int ha_maria::info(uint flag)
ref_length= maria_info.reflength;
share->db_options_in_use= maria_info.options;
stats.block_size= maria_block_size;
- stats.mrr_length_per_rec= maria_info.reflength + 8; // 8 = max(sizeof(void *))
+ stats.mrr_length_per_rec= maria_info.reflength + 8; // 8 = MY_MAX(sizeof(void *))
/* Update share */
share->keys_in_use.set_prefix(share->keys);
share->keys_in_use.intersect_extended(maria_info.key_map);
share->keys_for_keyread.intersect(share->keys_in_use);
share->db_record_offset= maria_info.record_offset;
- if (share->key_parts)
+ if (share->user_defined_key_parts)
{
ulong *to= table->key_info[0].rec_per_key, *end;
double *from= maria_info.rec_per_key;
- for (end= to+ share->key_parts ; to < end ; to++, from++)
+ for (end= to+ share->user_defined_key_parts ; to < end ; to++, from++)
*to= (ulong) (*from + 0.5);
}
diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c
index e3668d3c8d3..a351447cce3 100644
--- a/storage/maria/ma_bitmap.c
+++ b/storage/maria/ma_bitmap.c
@@ -319,7 +319,11 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file,
my_bool _ma_bitmap_end(MARIA_SHARE *share)
{
my_bool res;
- mysql_mutex_assert_owner(&share->close_lock);
+
+#ifndef DBUG_OFF
+ if (! share->internal_table)
+ mysql_mutex_assert_owner(&share->close_lock);
+#endif
DBUG_ASSERT(share->bitmap.non_flushable == 0);
DBUG_ASSERT(share->bitmap.flush_all_requested == 0);
DBUG_ASSERT(share->bitmap.waiting_for_non_flushable == 0 &&
@@ -1393,7 +1397,7 @@ found:
IMPLEMENTATION
We will return the smallest area >= size. If there is no such
block, we will return the biggest area that satisfies
- area_size >= min(BLOB_SEGMENT_MIN_SIZE*full_page_size, size)
+ area_size >= MY_MIN(BLOB_SEGMENT_MIN_SIZE*full_page_size, size)
To speed up searches, we will only consider areas that has at least 16 free
pages starting on an even boundary. When finding such an area, we will
@@ -1501,7 +1505,7 @@ static ulong allocate_full_pages(MARIA_FILE_BITMAP *bitmap,
DBUG_RETURN(0); /* No room on page */
/*
- Now allocate min(pages_needed, area_size), starting from
+ Now allocate MY_MIN(pages_needed, area_size), starting from
best_start + best_prefix_area_size
*/
if (best_area_size > pages_needed)
diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c
index 55b9a137050..2fc30b880b4 100644
--- a/storage/maria/ma_blockrec.c
+++ b/storage/maria/ma_blockrec.c
@@ -1230,7 +1230,7 @@ static my_bool extend_directory(MARIA_HA *info, uchar *buff, uint block_size,
}
check_directory(buff, block_size,
- info ? min(info->s->base.min_block_length, length) : 0,
+ info ? MY_MIN(info->s->base.min_block_length, length) : 0,
*empty_space);
DBUG_RETURN(0);
}
@@ -2126,7 +2126,7 @@ static my_bool write_full_pages(MARIA_HA *info,
}
lsn_store(buff, lsn);
buff[PAGE_TYPE_OFFSET]= (uchar) BLOB_PAGE;
- copy_length= min(data_size, length);
+ copy_length= MY_MIN(data_size, length);
memcpy(buff + LSN_SIZE + PAGE_TYPE_SIZE, data, copy_length);
length-= copy_length;
@@ -3504,7 +3504,7 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info,
/* page will be pinned & locked by get_head_or_tail_page */
if (get_head_or_tail_page(info, blocks->block, info->buff,
- max(row->space_on_head_page,
+ MY_MAX(row->space_on_head_page,
info->s->base.min_block_length),
HEAD_PAGE,
PAGECACHE_LOCK_WRITE, &row_pos))
@@ -3952,7 +3952,7 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info,
*/
DBUG_ASSERT(blocks->count > 1 ||
- max(new_row->total_length, share->base.min_block_length) <=
+ MY_MAX(new_row->total_length, share->base.min_block_length) <=
length_on_head_page);
/* Store same amount of data on head page as on original page */
diff --git a/storage/maria/ma_cache.c b/storage/maria/ma_cache.c
index 829189baeed..35926d37e03 100644
--- a/storage/maria/ma_cache.c
+++ b/storage/maria/ma_cache.c
@@ -61,7 +61,7 @@ my_bool _ma_read_cache(MARIA_HA *handler, IO_CACHE *info, uchar *buff,
(my_off_t) (info->read_end - info->request_pos))
{
in_buff_pos=info->request_pos+(uint) offset;
- in_buff_length= min(length,(size_t) (info->read_end-in_buff_pos));
+ in_buff_length= MY_MIN(length,(size_t) (info->read_end-in_buff_pos));
memcpy(buff,info->request_pos+(uint) offset,(size_t) in_buff_length);
if (!(length-=in_buff_length))
DBUG_RETURN(0);
diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c
index ab9080c40fb..e6907aabe27 100644
--- a/storage/maria/ma_check.c
+++ b/storage/maria/ma_check.c
@@ -2396,7 +2396,7 @@ static int initialize_variables_for_repair(HA_CHECK *param,
else
{
ulong rec_length;
- rec_length= max(share->base.min_pack_length,
+ rec_length= MY_MAX(share->base.min_pack_length,
share->base.min_block_length);
sort_info->max_records= (ha_rows) (sort_info->filelength / rec_length);
}
@@ -3600,7 +3600,7 @@ int maria_filecopy(HA_CHECK *param, File to,File from,my_off_t start,
ulong buff_length;
DBUG_ENTER("maria_filecopy");
- buff_length=(ulong) min(param->write_buffer_length,length);
+ buff_length=(ulong) MY_MIN(param->write_buffer_length,length);
if (!(buff=my_malloc(buff_length,MYF(0))))
{
buff=tmp_buff; buff_length=IO_SIZE;
@@ -5658,7 +5658,7 @@ word_init_ft_buf:
ft_buf->buf=ft_buf->lastkey+a_len;
/*
32 is just a safety margin here
- (at least max(val_len, sizeof(nod_flag)) should be there).
+ (at least MY_MAX(val_len, sizeof(nod_flag)) should be there).
May be better performance could be achieved if we'd put
(sort_info->keyinfo->block_length-32)/XXX
instead.
@@ -6071,7 +6071,7 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
maria_close(*org_info);
bzero((char*) &create_info,sizeof(create_info));
- create_info.max_rows=max(max_records,share.base.records);
+ create_info.max_rows=MY_MAX(max_records,share.base.records);
create_info.reloc_rows=share.base.reloc;
create_info.old_options=(share.options |
(unpack ? HA_OPTION_TEMP_COMPRESS_RECORD : 0));
@@ -6494,7 +6494,8 @@ static my_bool create_new_data_handle(MARIA_SORT_PARAM *param, File new_file)
DBUG_ENTER("create_new_data_handle");
if (!(sort_info->new_info= maria_open(info->s->open_file_name.str, O_RDWR,
- HA_OPEN_COPY | HA_OPEN_FOR_REPAIR)))
+ HA_OPEN_COPY | HA_OPEN_FOR_REPAIR |
+ HA_OPEN_INTERNAL_TABLE)))
DBUG_RETURN(1);
new_info= sort_info->new_info;
@@ -6915,7 +6916,7 @@ static TrID max_trid_in_system(void)
{
TrID id= trnman_get_max_trid(); /* 0 if transac manager not initialized */
/* 'id' may be far bigger, if last shutdown is old */
- return max(id, max_trid_in_control_file);
+ return MY_MAX(id, max_trid_in_control_file);
}
diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c
index 304216a76d9..51494300172 100644
--- a/storage/maria/ma_checkpoint.c
+++ b/storage/maria/ma_checkpoint.c
@@ -563,7 +563,7 @@ pthread_handler_t ma_checkpoint_background(void *arg)
DBUG_ASSERT(interval > 0);
#ifdef HAVE_PSI_THREAD_INTERFACE
- PSI_CALL(set_thread_user_host)(0,0,0,0);
+ PSI_THREAD_CALL(set_thread_user_host)(0,0,0,0);
#endif
/*
@@ -861,11 +861,11 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
my_malloc(STATE_COPIES * sizeof(struct st_state_copy), MYF(MY_WME));
dfiles= (PAGECACHE_FILE *)my_realloc((uchar *)dfiles,
/* avoid size of 0 for my_realloc */
- max(1, nb) * sizeof(PAGECACHE_FILE),
+ MY_MAX(1, nb) * sizeof(PAGECACHE_FILE),
MYF(MY_WME | MY_ALLOW_ZERO_PTR));
kfiles= (PAGECACHE_FILE *)my_realloc((uchar *)kfiles,
/* avoid size of 0 for my_realloc */
- max(1, nb) * sizeof(PAGECACHE_FILE),
+ MY_MAX(1, nb) * sizeof(PAGECACHE_FILE),
MYF(MY_WME | MY_ALLOW_ZERO_PTR));
if (unlikely((state_copies == NULL) ||
(dfiles == NULL) || (kfiles == NULL)))
@@ -898,7 +898,7 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
Collect and cache a bunch of states. We do this for many states at a
time, to not lock/unlock the log's lock too often.
*/
- uint j, bound= min(nb, i + STATE_COPIES);
+ uint j, bound= MY_MIN(nb, i + STATE_COPIES);
state_copy= state_copies;
/* part of the state is protected by log's lock */
translog_lock();
diff --git a/storage/maria/ma_close.c b/storage/maria/ma_close.c
index c355f1f1def..dd3a034425a 100644
--- a/storage/maria/ma_close.c
+++ b/storage/maria/ma_close.c
@@ -27,6 +27,7 @@ int maria_close(register MARIA_HA *info)
int error=0,flag;
my_bool share_can_be_freed= FALSE;
MARIA_SHARE *share= info->s;
+ my_bool internal_table= share->internal_table;
DBUG_ENTER("maria_close");
DBUG_PRINT("enter",("name: '%s' base: 0x%lx reopen: %u locks: %u",
share->open_file_name.str,
@@ -49,9 +50,9 @@ int maria_close(register MARIA_HA *info)
error= my_errno;
}
-
/* Ensure no one can open this file while we are closing it */
- mysql_mutex_lock(&THR_LOCK_maria);
+ if (!internal_table)
+ mysql_mutex_lock(&THR_LOCK_maria);
if (info->lock_type == F_EXTRA_LCK)
info->lock_type=F_UNLCK; /* HA_EXTRA_NO_USER_CHANGE */
@@ -60,8 +61,11 @@ int maria_close(register MARIA_HA *info)
if (maria_lock_database(info,F_UNLCK))
error=my_errno;
}
- mysql_mutex_lock(&share->close_lock);
- mysql_mutex_lock(&share->intern_lock);
+ if (!internal_table)
+ {
+ mysql_mutex_lock(&share->close_lock);
+ mysql_mutex_lock(&share->intern_lock);
+ }
if (share->options & HA_OPTION_READ_ONLY_DATA)
{
@@ -75,7 +79,8 @@ int maria_close(register MARIA_HA *info)
info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
}
flag= !--share->reopen;
- maria_open_list=list_delete(maria_open_list,&info->open_list);
+ if (!internal_table)
+ maria_open_list=list_delete(maria_open_list,&info->open_list);
my_free(info->rec_buff);
(*share->end)(info);
@@ -159,7 +164,8 @@ int maria_close(register MARIA_HA *info)
error= my_errno;
}
thr_lock_delete(&share->lock);
- (void) mysql_mutex_destroy(&share->key_del_lock);
+ mysql_mutex_destroy(&share->key_del_lock);
+
{
int i,keys;
keys = share->state.header.keys;
@@ -181,9 +187,11 @@ int maria_close(register MARIA_HA *info)
We have to unlock share->intern_lock then lock it after
LOCK_trn_list (trnman_lock()) to avoid dead locks.
*/
- mysql_mutex_unlock(&share->intern_lock);
+ if (!internal_table)
+ mysql_mutex_unlock(&share->intern_lock);
_ma_remove_not_visible_states_with_lock(share, TRUE);
- mysql_mutex_lock(&share->intern_lock);
+ if (!internal_table)
+ mysql_mutex_lock(&share->intern_lock);
if (share->in_checkpoint & MARIA_CHECKPOINT_LOOKS_AT_ME)
{
@@ -220,9 +228,12 @@ int maria_close(register MARIA_HA *info)
share->state_history= 0;
}
}
- mysql_mutex_unlock(&THR_LOCK_maria);
- mysql_mutex_unlock(&share->intern_lock);
- mysql_mutex_unlock(&share->close_lock);
+ if (!internal_table)
+ {
+ mysql_mutex_unlock(&THR_LOCK_maria);
+ mysql_mutex_unlock(&share->intern_lock);
+ mysql_mutex_unlock(&share->close_lock);
+ }
if (share_can_be_freed)
{
(void) mysql_mutex_destroy(&share->intern_lock);
diff --git a/storage/maria/ma_commit.c b/storage/maria/ma_commit.c
index 70bc668a220..46db3ca4ae5 100644
--- a/storage/maria/ma_commit.c
+++ b/storage/maria/ma_commit.c
@@ -39,11 +39,11 @@ int ma_commit(TRN *trn)
/*
- if COMMIT record is written before trnman_commit_trn():
if Checkpoint comes in the middle it will see trn is not committed,
- then if crash, Recovery might roll back trn (if min(rec_lsn) is after
+ then if crash, Recovery might roll back trn (if MY_MIN(rec_lsn) is after
COMMIT record) and this is not an issue as
* transaction's updates were not made visible to other transactions
* "commit ok" was not sent to client
- Alternatively, Recovery might commit trn (if min(rec_lsn) is before COMMIT
+ Alternatively, Recovery might commit trn (if MY_MIN(rec_lsn) is before COMMIT
record), which is ok too. All in all it means that "trn committed" is not
100% equal to "COMMIT record written".
- if COMMIT record is written after trnman_commit_trn():
diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c
index 28c3491730f..152302a5426 100644
--- a/storage/maria/ma_create.c
+++ b/storage/maria/ma_create.c
@@ -51,6 +51,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
base_pos,long_varchar_count,varchar_length,
unique_key_parts,fulltext_keys,offset, not_block_record_extra_length;
uint max_field_lengths, extra_header_size, column_nr;
+ uint internal_table= flags & HA_CREATE_INTERNAL_TABLE;
ulong reclength, real_reclength,min_pack_length;
char filename[FN_REFLEN], linkname[FN_REFLEN], *linkname_ptr;
ulong pack_reclength;
@@ -713,7 +714,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
got from MAI file header (see also mariapack.c:save_state)
*/
share.base.key_reflength=
- maria_get_pointer_length(max(ci->key_file_length,tmp),3);
+ maria_get_pointer_length(MY_MAX(ci->key_file_length,tmp),3);
share.base.keys= share.state.header.keys= keys;
share.state.header.uniques= uniques;
share.state.header.fulltext_keys= fulltext_keys;
@@ -780,7 +781,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
share.base.min_block_length=
(share.base.pack_reclength+3 < MARIA_EXTEND_BLOCK_LENGTH &&
! share.base.blobs) ?
- max(share.base.pack_reclength,MARIA_MIN_BLOCK_LENGTH) :
+ MY_MAX(share.base.pack_reclength,MARIA_MIN_BLOCK_LENGTH) :
MARIA_EXTEND_BLOCK_LENGTH;
}
else if (datafile_type == STATIC_RECORD)
@@ -789,7 +790,8 @@ int maria_create(const char *name, enum data_file_type datafile_type,
if (! (flags & HA_DONT_TOUCH_DATA))
share.state.create_time= time((time_t*) 0);
- mysql_mutex_lock(&THR_LOCK_maria);
+ if (!internal_table)
+ mysql_mutex_lock(&THR_LOCK_maria);
/*
NOTE: For test_if_reopen() we need a real path name. Hence we need
@@ -854,7 +856,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
NOTE: The filename is compared against unique_file_name of every
open table. Hence we need a real path here.
*/
- if (_ma_test_if_reopen(filename))
+ if (!internal_table && _ma_test_if_reopen(filename))
{
my_printf_error(HA_ERR_TABLE_EXIST, "Aria table '%s' is in use "
"(most likely by a MERGE table). Try FLUSH TABLES.",
@@ -1171,7 +1173,8 @@ int maria_create(const char *name, enum data_file_type datafile_type,
if (mysql_file_close(dfile,MYF(0)))
goto err;
}
- mysql_mutex_unlock(&THR_LOCK_maria);
+ if (!internal_table)
+ mysql_mutex_unlock(&THR_LOCK_maria);
res= 0;
my_free((char*) rec_per_key_part);
errpos=0;
@@ -1180,7 +1183,8 @@ int maria_create(const char *name, enum data_file_type datafile_type,
DBUG_RETURN(res);
err:
- mysql_mutex_unlock(&THR_LOCK_maria);
+ if (!internal_table)
+ mysql_mutex_unlock(&THR_LOCK_maria);
err_no_lock:
save_errno=my_errno;
diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c
index 5b8d0e01677..50edb216a1c 100644
--- a/storage/maria/ma_delete.c
+++ b/storage/maria/ma_delete.c
@@ -987,7 +987,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
*/
if (_ma_log_add(anc_page, anc_length, keypos,
anc_key_inserted.move_length +
- max(anc_key_inserted.changed_length -
+ MY_MAX(anc_key_inserted.changed_length -
anc_key_inserted.move_length,
key_deleted.changed_length),
anc_key_inserted.move_length -
@@ -1229,7 +1229,7 @@ static int underflow(MARIA_HA *info, MARIA_KEYDEF *keyinfo,
*/
if (_ma_log_add(anc_page, anc_length, keypos,
anc_key_inserted.move_length +
- max(anc_key_inserted.changed_length -
+ MY_MAX(anc_key_inserted.changed_length -
anc_key_inserted.move_length,
key_deleted.changed_length),
anc_key_inserted.move_length -
@@ -1570,7 +1570,7 @@ my_bool _ma_log_delete(MARIA_PAGE *ma_page, const uchar *key_pos,
current_size != share->max_index_block_size)
{
/* Append data that didn't fit on the page before */
- uint length= (min(ma_page->size, share->max_index_block_size) -
+ uint length= (MY_MIN(ma_page->size, share->max_index_block_size) -
current_size);
uchar *data= ma_page->buff + current_size;
diff --git a/storage/maria/ma_dynrec.c b/storage/maria/ma_dynrec.c
index c1c0a8e9729..4bb51d0dcf3 100644
--- a/storage/maria/ma_dynrec.c
+++ b/storage/maria/ma_dynrec.c
@@ -851,7 +851,7 @@ static my_bool update_dynamic_record(MARIA_HA *info, MARIA_RECORD_POS filepos,
uint tmp=MY_ALIGN(reclength - length + 3 +
test(reclength >= 65520L),MARIA_DYN_ALIGN_SIZE);
/* Don't create a block bigger than MARIA_MAX_BLOCK_LENGTH */
- tmp= min(length+tmp, MARIA_MAX_BLOCK_LENGTH)-length;
+ tmp= MY_MIN(length+tmp, MARIA_MAX_BLOCK_LENGTH)-length;
/* Check if we can extend this block */
if (block_info.filepos + block_info.block_len ==
info->state->data_file_length &&
diff --git a/storage/maria/ma_extra.c b/storage/maria/ma_extra.c
index 0847f3c729c..66e7b4033c7 100644
--- a/storage/maria/ma_extra.c
+++ b/storage/maria/ma_extra.c
@@ -105,7 +105,7 @@ int maria_extra(MARIA_HA *info, enum ha_extra_function function,
cache_size= (extra_arg ? *(ulong*) extra_arg :
my_default_record_cache_size);
if (!(init_io_cache(&info->rec_cache, info->dfile.file,
- (uint) min(share->state.state.data_file_length+1,
+ (uint) MY_MIN(share->state.state.data_file_length+1,
cache_size),
READ_CACHE,0L,(pbool) (info->lock_type != F_UNLCK),
MYF(share->write_flag & MY_WAIT_IF_FULL))))
diff --git a/storage/maria/ma_ft_boolean_search.c b/storage/maria/ma_ft_boolean_search.c
index c98c4b599fc..eb5813f84f1 100644
--- a/storage/maria/ma_ft_boolean_search.c
+++ b/storage/maria/ma_ft_boolean_search.c
@@ -46,9 +46,9 @@
three subexpressions (including the top-level one),
every one has its own max_docid, updated by its plus word.
but for the search word6 uses
- max(word1.max_docid, word3.max_docid, word5.max_docid),
+ MY_MAX(word1.max_docid, word3.max_docid, word5.max_docid),
while word4 uses, accordingly,
- max(word1.max_docid, word3.max_docid).
+ MY_MAX(word1.max_docid, word3.max_docid).
*/
#define FT_CORE
diff --git a/storage/maria/ma_info.c b/storage/maria/ma_info.c
index 341ea147785..912ed0984a3 100644
--- a/storage/maria/ma_info.c
+++ b/storage/maria/ma_info.c
@@ -31,7 +31,7 @@ MARIA_RECORD_POS maria_position(MARIA_HA *info)
uint maria_max_key_length()
{
uint tmp= (_ma_max_key_length() - 8 - HA_MAX_KEY_SEG*3);
- return min(HA_MAX_KEY_LENGTH, tmp);
+ return MY_MIN(HA_MAX_KEY_LENGTH, tmp);
}
/* Get information about the table */
diff --git a/storage/maria/ma_key_recover.c b/storage/maria/ma_key_recover.c
index 502ac2b8809..ae9427981ea 100644
--- a/storage/maria/ma_key_recover.c
+++ b/storage/maria/ma_key_recover.c
@@ -506,7 +506,7 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page,
move_length));
DBUG_ASSERT(info->s->now_transactional);
DBUG_ASSERT(move_length <= (int) changed_length);
- DBUG_ASSERT(ma_page->org_size == min(org_page_length, max_page_size));
+ DBUG_ASSERT(ma_page->org_size == MY_MIN(org_page_length, max_page_size));
DBUG_ASSERT(ma_page->size == org_page_length + move_length);
DBUG_ASSERT(offset <= ma_page->org_size);
@@ -618,7 +618,7 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page,
DBUG_ASSERT(current_size <= max_page_size && current_size <= ma_page->size);
if (current_size != ma_page->size && current_size != max_page_size)
{
- uint length= min(ma_page->size, max_page_size) - current_size;
+ uint length= MY_MIN(ma_page->size, max_page_size) - current_size;
uchar *data= ma_page->buff + current_size;
log_pos[0]= KEY_OP_ADD_SUFFIX;
@@ -641,7 +641,7 @@ my_bool _ma_log_add(MARIA_PAGE *ma_page,
overflow!
*/
ma_page->org_size= current_size;
- DBUG_ASSERT(ma_page->org_size == min(ma_page->size, max_page_size));
+ DBUG_ASSERT(ma_page->org_size == MY_MIN(ma_page->size, max_page_size));
if (translog_write_record(&lsn, LOGREC_REDO_INDEX,
info->trn, info,
@@ -663,7 +663,7 @@ void _ma_log_key_changes(MARIA_PAGE *ma_page, LEX_CUSTRING *log_array,
uint *translog_parts)
{
MARIA_SHARE *share= ma_page->info->s;
- int page_length= min(ma_page->size, share->max_index_block_size);
+ int page_length= MY_MIN(ma_page->size, share->max_index_block_size);
uint org_length;
ha_checksum crc;
@@ -1111,7 +1111,7 @@ uint _ma_apply_redo_index(MARIA_HA *info,
uint2korr(header), uint2korr(header+2)));
DBUG_ASSERT(uint2korr(header) == page_length);
#ifndef DBUG_OFF
- new_page_length= min(uint2korr(header+2), max_page_size);
+ new_page_length= MY_MIN(uint2korr(header+2), max_page_size);
#endif
header+= 4;
break;
@@ -1148,7 +1148,7 @@ uint _ma_apply_redo_index(MARIA_HA *info,
from= uint2korr(header);
header+= 2;
/* "from" is a place in the existing page */
- DBUG_ASSERT(max(from, to) < max_page_size);
+ DBUG_ASSERT(MY_MAX(from, to) < max_page_size);
memcpy(buff + to, buff + from, full_length);
}
break;
diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c
index 56926c048d8..2a2681c0844 100644
--- a/storage/maria/ma_loghandler.c
+++ b/storage/maria/ma_loghandler.c
@@ -4808,7 +4808,7 @@ static my_bool translog_advance_pointer(int pages, uint16 last_page_data)
}
#endif
- min_offset= min(buffer_end_offset, file_end_offset);
+ min_offset= MY_MIN(buffer_end_offset, file_end_offset);
/* TODO: check is it ptr or size enough */
log_descriptor.bc.buffer->size+= min_offset;
log_descriptor.bc.ptr+= min_offset;
@@ -6833,7 +6833,7 @@ translog_variable_length_header(uchar *page, translog_size_t page_offset,
page_rest= (uint16) (TRANSLOG_PAGE_SIZE - (src - page));
base_lsn= buff->lsn;
- body_len= min(page_rest, buff->record_length);
+ body_len= MY_MIN(page_rest, buff->record_length);
}
else
{
@@ -7396,7 +7396,7 @@ translog_size_t translog_read_record(LSN lsn,
data->scanner.fixed_horizon));
if (offset < data->read_header)
{
- uint16 len= min(data->read_header, end) - offset;
+ uint16 len= MY_MIN(data->read_header, end) - offset;
DBUG_PRINT("info",
("enter header offset: %lu length: %lu",
(ulong) offset, (ulong) length));
diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c
index 88422e3dc5f..0543f426af3 100644
--- a/storage/maria/ma_open.c
+++ b/storage/maria/ma_open.c
@@ -78,6 +78,7 @@ MARIA_HA *_ma_test_if_reopen(const char *filename)
mode Mode of table (O_RDONLY | O_RDWR)
data_file Filedescriptor of data file to use < 0 if one should open
open it.
+ internal_table <> 0 if this is an internal temporary table
RETURN
# Maria handler
@@ -86,7 +87,8 @@ MARIA_HA *_ma_test_if_reopen(const char *filename)
static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name,
- int mode, File data_file)
+ int mode, File data_file,
+ uint internal_table)
{
int save_errno;
uint errpos;
@@ -159,7 +161,7 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name,
/* The following should be big enough for all pinning purposes */
if (my_init_dynamic_array(&info.pinned_pages,
sizeof(MARIA_PINNED_PAGE),
- max(share->base.blobs*2 + 4,
+ MY_MAX(share->base.blobs*2 + 4,
MARIA_MAX_TREE_LEVELS*3), 16, MYF(0)))
goto err;
@@ -207,9 +209,17 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name,
if (share->options & HA_OPTION_TMP_TABLE)
m_info->lock.type= TL_WRITE;
- m_info->open_list.data=(void*) m_info;
- maria_open_list=list_add(maria_open_list,&m_info->open_list);
-
+ if (!internal_table)
+ {
+ m_info->open_list.data=(void*) m_info;
+ maria_open_list=list_add(maria_open_list,&m_info->open_list);
+ }
+ else
+ {
+ /* We don't need to mark internal temporary tables as changed on disk */
+ share->internal_table= 1;
+ share->global_changed= 1;
+ }
DBUG_RETURN(m_info);
err:
@@ -243,7 +253,7 @@ MARIA_HA *maria_clone(MARIA_SHARE *share, int mode)
mysql_mutex_lock(&THR_LOCK_maria);
new_info= maria_clone_internal(share, NullS, mode,
share->data_file_type == BLOCK_RECORD ?
- share->bitmap.file.file : -1);
+ share->bitmap.file.file : -1, 0);
mysql_mutex_unlock(&THR_LOCK_maria);
return new_info;
}
@@ -263,6 +273,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
int kfile,open_mode,save_errno;
uint i,j,len,errpos,head_length,base_pos,keys, realpath_err,
key_parts,unique_key_parts,fulltext_keys,uniques;
+ uint internal_table= test(open_flags & HA_OPEN_INTERNAL_TABLE);
size_t info_length;
char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
data_name[FN_REFLEN];
@@ -293,10 +304,11 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
DBUG_RETURN(0);
}
- mysql_mutex_lock(&THR_LOCK_maria);
old_info= 0;
+ if (!internal_table)
+ mysql_mutex_lock(&THR_LOCK_maria);
if ((open_flags & HA_OPEN_COPY) ||
- !(old_info=_ma_test_if_reopen(name_buff)))
+ (internal_table || !(old_info=_ma_test_if_reopen(name_buff))))
{
share= &share_buff;
bzero((uchar*) &share_buff,sizeof(share_buff));
@@ -592,7 +604,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
{
/* Packed key, ensure we don't get overflow in underflow() */
keyinfo->underflow_block_length=
- max((int) (share->max_index_block_size - keyinfo->maxlength * 3),
+ MY_MAX((int) (share->max_index_block_size - keyinfo->maxlength * 3),
(int) (share->keypage_header + share->base.key_reflength));
set_if_smaller(keyinfo->underflow_block_length,
keyinfo->block_length/3);
@@ -780,7 +792,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
/* Need some extra bytes for decode_bytes */
share->base.extra_rec_buff_size+= 7;
}
- share->base.default_rec_buff_size= max(share->base.pack_reclength +
+ share->base.default_rec_buff_size= MY_MAX(share->base.pack_reclength +
share->base.extra_rec_buff_size,
share->base.max_key_length);
@@ -981,14 +993,16 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
data_file= share->bitmap.file.file; /* Only opened once */
}
- if (!(m_info= maria_clone_internal(share, name, mode, data_file)))
+ if (!(m_info= maria_clone_internal(share, name, mode, data_file,
+ internal_table)))
goto err;
if (maria_is_crashed(m_info))
DBUG_PRINT("warning", ("table is crashed: changed: %u",
share->state.changed));
- mysql_mutex_unlock(&THR_LOCK_maria);
+ if (!internal_table)
+ mysql_mutex_unlock(&THR_LOCK_maria);
m_info->open_flags= open_flags;
DBUG_PRINT("exit", ("table: %p name: %s",m_info, name));
@@ -1027,7 +1041,8 @@ err:
default:
break;
}
- mysql_mutex_unlock(&THR_LOCK_maria);
+ if (!internal_table)
+ mysql_mutex_unlock(&THR_LOCK_maria);
my_errno= save_errno;
DBUG_RETURN (NULL);
} /* maria_open */
diff --git a/storage/maria/ma_packrec.c b/storage/maria/ma_packrec.c
index c14e69414b5..9b06c0d4f78 100644
--- a/storage/maria/ma_packrec.c
+++ b/storage/maria/ma_packrec.c
@@ -718,7 +718,7 @@ static uint find_longest_bitstream(uint16 *table, uint16 *end)
return OFFSET_TABLE_SIZE;
}
length2= find_longest_bitstream(next, end) + 1;
- length=max(length,length2);
+ length=MY_MAX(length,length2);
}
return length;
}
@@ -1447,7 +1447,7 @@ uint _ma_pack_get_block_info(MARIA_HA *maria, MARIA_BIT_BUFF *bit_buff,
info->filepos=filepos+head_length;
if (file > 0)
{
- info->offset=min(info->rec_len, ref_length - head_length);
+ info->offset=MY_MIN(info->rec_len, ref_length - head_length);
memcpy(*rec_buff_p, header + head_length, info->offset);
}
return 0;
diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c
index 9ac42f885b5..c896f730d3f 100644
--- a/storage/maria/ma_recovery.c
+++ b/storage/maria/ma_recovery.c
@@ -3679,7 +3679,7 @@ static void print_redo_phase_progress(TRANSLOG_ADDRESS addr)
cur_offset= LSN_OFFSET(addr);
local_remainder= (cur_logno == end_logno) ? (end_offset - cur_offset) :
(((longlong)log_file_size) - cur_offset +
- max(end_logno - cur_logno - 1, 0) * ((longlong)log_file_size) +
+ MY_MAX(end_logno - cur_logno - 1, 0) * ((longlong)log_file_size) +
end_offset);
if (initial_remainder == (ulonglong)(-1))
initial_remainder= local_remainder;
diff --git a/storage/maria/ma_rt_mbr.c b/storage/maria/ma_rt_mbr.c
index b3e2b0ceab8..496ace2a84f 100644
--- a/storage/maria/ma_rt_mbr.c
+++ b/storage/maria/ma_rt_mbr.c
@@ -329,8 +329,8 @@ int maria_rtree_d_mbr(const HA_KEYSEG *keyseg, const uchar *a,
bmin= korr_func(b); \
amax= korr_func(a+len); \
bmax= korr_func(b+len); \
- amin= min(amin, bmin); \
- amax= max(amax, bmax); \
+ amin= MY_MIN(amin, bmin); \
+ amax= MY_MAX(amax, bmax); \
store_func(c, amin); \
store_func(c+len, amax); \
}
@@ -342,8 +342,8 @@ int maria_rtree_d_mbr(const HA_KEYSEG *keyseg, const uchar *a,
get_func(bmin, b); \
get_func(amax, a+len); \
get_func(bmax, b+len); \
- amin= min(amin, bmin); \
- amax= max(amax, bmax); \
+ amin= MY_MIN(amin, bmin); \
+ amax= MY_MAX(amax, bmax); \
store_func(c, amin); \
store_func(c+len, amax); \
}
@@ -422,8 +422,8 @@ int maria_rtree_combine_rect(const HA_KEYSEG *keyseg, const uchar* a,
bmin= korr_func(b); \
amax= korr_func(a+len); \
bmax= korr_func(b+len); \
- amin= max(amin, bmin); \
- amax= min(amax, bmax); \
+ amin= MY_MAX(amin, bmin); \
+ amax= MY_MIN(amax, bmax); \
if (amin >= amax) \
return 0; \
res *= amax - amin; \
@@ -436,8 +436,8 @@ int maria_rtree_combine_rect(const HA_KEYSEG *keyseg, const uchar* a,
get_func(bmin, b); \
get_func(amax, a+len); \
get_func(bmax, b+len); \
- amin= max(amin, bmin); \
- amax= min(amax, bmax); \
+ amin= MY_MAX(amin, bmin); \
+ amax= MY_MIN(amax, bmax); \
if (amin >= amax) \
return 0; \
res *= amax - amin; \
@@ -513,7 +513,7 @@ double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b,
amax= korr_func(a+len); \
bmax= korr_func(b+len); \
a_area *= (((double)amax) - ((double)amin)); \
- loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+ loc_ab_area *= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
}
#define RT_AREA_INC_GET(type, get_func, len)\
@@ -524,7 +524,7 @@ double maria_rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b,
get_func(amax, a+len); \
get_func(bmax, b+len); \
a_area *= (((double)amax) - ((double)amin)); \
- loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+ loc_ab_area *= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
}
/*
@@ -612,7 +612,7 @@ safe_end:
amax= korr_func(a+len); \
bmax= korr_func(b+len); \
a_perim+= (((double)amax) - ((double)amin)); \
- *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+ *ab_perim+= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
}
#define RT_PERIM_INC_GET(type, get_func, len)\
@@ -623,7 +623,7 @@ safe_end:
get_func(amax, a+len); \
get_func(bmax, b+len); \
a_perim+= (((double)amax) - ((double)amin)); \
- *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+ *ab_perim+= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
}
/*
diff --git a/storage/maria/ma_sort.c b/storage/maria/ma_sort.c
index 4bc179c3008..72f9c7ceca3 100644
--- a/storage/maria/ma_sort.c
+++ b/storage/maria/ma_sort.c
@@ -133,7 +133,7 @@ int _ma_create_index_by_sort(MARIA_SORT_PARAM *info, my_bool no_messages,
sort_keys= (uchar **) NULL; error= 1;
maxbuffer=1;
- memavl=max(sortbuff_size,MIN_SORT_MEMORY);
+ memavl=MY_MAX(sortbuff_size,MIN_SORT_MEMORY);
records= info->sort_info->max_records;
sort_length= info->key_length;
LINT_INIT(keys);
@@ -364,7 +364,7 @@ pthread_handler_t _ma_thr_find_all_keys(void *arg)
bzero((char*) &sort_param->buffpek,sizeof(sort_param->buffpek));
bzero((char*) &sort_param->unique, sizeof(sort_param->unique));
- memavl= max(sort_param->sortbuff_size, MIN_SORT_MEMORY);
+ memavl= MY_MAX(sort_param->sortbuff_size, MIN_SORT_MEMORY);
idx= (uint)sort_param->sort_info->max_records;
sort_length= sort_param->key_length;
maxbuffer= 1;
@@ -857,7 +857,7 @@ static uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
register uint count;
uint length;
- if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+ if ((count=(uint) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
{
if (mysql_file_pread(fromfile->file, buffpek->base,
(length= sort_length*count),buffpek->file_pos,MYF_RW))
@@ -877,7 +877,7 @@ static uint read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek,
uint idx;
uchar *buffp;
- if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+ if ((count=(uint) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
{
buffp= buffpek->base;
diff --git a/storage/maria/ma_test1.c b/storage/maria/ma_test1.c
index 945654a0bbe..0147d00d6e0 100644
--- a/storage/maria/ma_test1.c
+++ b/storage/maria/ma_test1.c
@@ -631,7 +631,7 @@ static void create_record(uchar *record,uint rownr)
uint tmp;
uchar *ptr;;
sprintf((char*) blob_record,"... row: %d", rownr);
- strappend((char*) blob_record,max(MAX_REC_LENGTH-rownr,10),' ');
+ strappend((char*) blob_record,MY_MAX(MAX_REC_LENGTH-rownr,10),' ');
tmp=strlen((char*) blob_record);
int4store(pos,tmp);
ptr=blob_record;
diff --git a/storage/maria/ma_test2.c b/storage/maria/ma_test2.c
index ea1978b4ee5..242e5e16333 100644
--- a/storage/maria/ma_test2.c
+++ b/storage/maria/ma_test2.c
@@ -698,7 +698,7 @@ int main(int argc, char *argv[])
goto err2;
}
- for (i=min(2,keys) ; i-- > 0 ;)
+ for (i=MY_MIN(2,keys) ; i-- > 0 ;)
{
if (maria_rsame(file,read_record2,(int) i)) goto err;
if (bcmp(read_record,read_record2,reclength) != 0)
diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c
index 5d440a40dc0..24a3f96f42e 100644
--- a/storage/maria/ma_write.c
+++ b/storage/maria/ma_write.c
@@ -933,7 +933,7 @@ ChangeSet@1.2562, 2008-04-09 07:41:40+02:00, serg@janus.mylan +9 -0
&s_temp));
}
DBUG_RETURN(_ma_split_page(info, key, anc_page,
- min(org_anc_length,
+ MY_MIN(org_anc_length,
info->s->max_index_block_size),
key_pos, s_temp.changed_length, t_length,
key_buff, insert_last));
@@ -2075,7 +2075,7 @@ static my_bool _ma_log_split(MARIA_PAGE *ma_page,
Handle case when split happened directly after the newly inserted key.
*/
max_key_length= new_length - offset;
- extra_length= min(key_length, max_key_length);
+ extra_length= MY_MIN(key_length, max_key_length);
if (offset + move_length > new_length)
{
/* This is true when move_length includes changes for next packed key */
diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h
index e983f561bbb..c1ac49a6b35 100644
--- a/storage/maria/maria_def.h
+++ b/storage/maria/maria_def.h
@@ -465,6 +465,7 @@ typedef struct st_maria_share
my_bool changed, /* If changed since lock */
global_changed, /* If changed since open */
not_flushed;
+ my_bool internal_table; /* Internal tmp table */
my_bool lock_key_trees; /* If we have to lock trees on read */
my_bool non_transactional_concurrent_insert;
my_bool delay_key_write;
diff --git a/storage/maria/maria_pack.c b/storage/maria/maria_pack.c
index 40686995378..2fe5e818db9 100644
--- a/storage/maria/maria_pack.c
+++ b/storage/maria/maria_pack.c
@@ -1243,7 +1243,7 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
{
if (huff_counts->field_length > 2 &&
huff_counts->empty_fields + (records - huff_counts->empty_fields)*
- (1+max_bit(max(huff_counts->max_pre_space,
+ (1+max_bit(MY_MAX(huff_counts->max_pre_space,
huff_counts->max_end_space))) <
records * max_bit(huff_counts->field_length))
{
@@ -3021,7 +3021,7 @@ static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
if (mrg->src_file_has_indexes_disabled)
{
isam_file->s->state.state.key_file_length=
- max(isam_file->s->state.state.key_file_length, new_length);
+ MY_MAX(isam_file->s->state.state.key_file_length, new_length);
}
state.dellink= HA_OFFSET_ERROR;
state.version=(ulong) time((time_t*) 0);
diff --git a/storage/maria/trnman.c b/storage/maria/trnman.c
index f8959c977f8..38fdb358e53 100644
--- a/storage/maria/trnman.c
+++ b/storage/maria/trnman.c
@@ -877,7 +877,7 @@ TrID trnman_get_min_safe_trid()
{
TrID trid;
mysql_mutex_lock(&LOCK_trn_list);
- trid= min(active_list_min.next->min_read_from,
+ trid= MY_MIN(active_list_min.next->min_read_from,
global_trid_generator);
mysql_mutex_unlock(&LOCK_trn_list);
return trid;
diff --git a/storage/maria/unittest/ma_test_all-t b/storage/maria/unittest/ma_test_all-t
index e66d269ab93..18b26a7bd45 100755
--- a/storage/maria/unittest/ma_test_all-t
+++ b/storage/maria/unittest/ma_test_all-t
@@ -650,6 +650,8 @@ sub ok
{
exit 1;
}
+ # Unlink all files so that we can continue on error
+ unlink_all_possible_tmp_files();
return 0;
}
@@ -702,7 +704,7 @@ sub unlink_all_possible_tmp_files()
unlink_log_files();
# Unlink tmp files that may have been created when testing the test programs
- unlink <$full_tmpdir/*.TMD $full_tmpdir/aria_read_log_test1.txt $full_tmpdir/test1*.MA? $full_tmpdir/ma_test_recovery.output aria_log_control aria_log.00000001 aria_log.00000002 aria_logtest1.MA? test1.MA? test2.MA? test3.MA?>;
+ unlink <$full_tmpdir/*.TMD $full_tmpdir/aria_read_log_test1.txt $full_tmpdir/test1*.MA? $full_tmpdir/ma_test_recovery.output aria_log_control aria_log.00000001 aria_log.00000002 aria_logtest1.MA? test1.MA? test2.MA? test3.MA? *.TMD>;
}
####
diff --git a/storage/myisam/ha_myisam.cc b/storage/myisam/ha_myisam.cc
index f649de8bd5c..3e73bb7c801 100644
--- a/storage/myisam/ha_myisam.cc
+++ b/storage/myisam/ha_myisam.cc
@@ -247,8 +247,8 @@ int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out,
pos->algorithm;
keydef[i].block_length= pos->block_size;
keydef[i].seg= keyseg;
- keydef[i].keysegs= pos->key_parts;
- for (j= 0; j < pos->key_parts; j++)
+ keydef[i].keysegs= pos->user_defined_key_parts;
+ for (j= 0; j < pos->user_defined_key_parts; j++)
{
Field *field= pos->key_part[j].field;
type= field->key_type();
@@ -310,7 +310,7 @@ int table2myisam(TABLE *table_arg, MI_KEYDEF **keydef_out,
(uchar*) table_arg->record[0]);
}
}
- keyseg+= pos->key_parts;
+ keyseg+= pos->user_defined_key_parts;
}
if (table_arg->found_next_number_field)
keydef[share->next_number_index].flag|= HA_AUTO_KEY;
@@ -1136,8 +1136,8 @@ int ha_myisam::repair(THD *thd, HA_CHECK &param, bool do_optimize)
}
if (error && file->create_unique_index_by_sort &&
share->state.dupp_key != MAX_KEY)
- print_keydup_error(share->state.dupp_key,
- ER(ER_DUP_ENTRY_WITH_KEY_NAME), MYF(0));
+ print_keydup_error(table, &table->key_info[share->state.dupp_key],
+ MYF(0));
}
else
{
@@ -1527,8 +1527,8 @@ void ha_myisam::start_bulk_insert(ha_rows rows, uint flags)
{
DBUG_ENTER("ha_myisam::start_bulk_insert");
THD *thd= current_thd;
- ulong size= min(thd->variables.read_buff_size,
- (ulong) (table->s->avg_row_length*rows));
+ ulong size= MY_MIN(thd->variables.read_buff_size,
+ (ulong) (table->s->avg_row_length*rows));
DBUG_PRINT("info",("start_bulk_insert: rows %lu size %lu",
(ulong) rows, size));
@@ -1539,36 +1539,33 @@ void ha_myisam::start_bulk_insert(ha_rows rows, uint flags)
can_enable_indexes= mi_is_all_keys_active(file->s->state.key_map,
file->s->base.keys);
- if (!(specialflag & SPECIAL_SAFE_MODE))
+ /*
+ Only disable old index if the table was empty and we are inserting
+ a lot of rows.
+ Note that in end_bulk_insert() we may truncate the table if
+ enable_indexes() failed, thus it's essential that indexes are
+ disabled ONLY for an empty table.
+ */
+ if (file->state->records == 0 && can_enable_indexes &&
+ (!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES))
{
- /*
- Only disable old index if the table was empty and we are inserting
- a lot of rows.
- Note that in end_bulk_insert() we may truncate the table if
- enable_indexes() failed, thus it's essential that indexes are
- disabled ONLY for an empty table.
- */
- if (file->state->records == 0 && can_enable_indexes &&
- (!rows || rows >= MI_MIN_ROWS_TO_DISABLE_INDEXES))
+ if (file->open_flag & HA_OPEN_INTERNAL_TABLE)
{
- if (file->open_flag & HA_OPEN_INTERNAL_TABLE)
- {
- file->update|= HA_STATE_CHANGED;
- mi_clear_all_keys_active(file->s->state.key_map);
- }
- else
- {
- my_bool all_keys= test(flags & HA_CREATE_UNIQUE_INDEX_BY_SORT);
- mi_disable_indexes_for_rebuild(file, rows, all_keys);
- }
+ file->update|= HA_STATE_CHANGED;
+ mi_clear_all_keys_active(file->s->state.key_map);
}
else
+ {
+ my_bool all_keys= test(flags & HA_CREATE_UNIQUE_INDEX_BY_SORT);
+ mi_disable_indexes_for_rebuild(file, rows, all_keys);
+ }
+ }
+ else
if (!file->bulk_insert &&
(!rows || rows >= MI_MIN_ROWS_TO_USE_BULK_INSERT))
{
mi_init_bulk_insert(file, thd->variables.bulk_insert_buff_size, rows);
}
- }
DBUG_VOID_RETURN;
}
@@ -1846,7 +1843,7 @@ int ha_myisam::info(uint flag)
number of records in the buffer results in a different number of buffer
refills and in a different order of records in the result set.
*/
- stats.mrr_length_per_rec= misam_info.reflength + 8; // 8=max(sizeof(void *))
+ stats.mrr_length_per_rec= misam_info.reflength + 8; // 8=MY_MAX(sizeof(void *))
ref_length= misam_info.reflength;
share->db_options_in_use= misam_info.options;
@@ -1896,8 +1893,6 @@ int ha_myisam::info(uint flag)
int ha_myisam::extra(enum ha_extra_function operation)
{
- if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_KEYREAD)
- return 0;
if (operation == HA_EXTRA_MMAP && !opt_myisam_use_mmap)
return 0;
return mi_extra(file, operation, 0);
@@ -1915,8 +1910,6 @@ int ha_myisam::reset(void)
int ha_myisam::extra_opt(enum ha_extra_function operation, ulong cache_size)
{
- if ((specialflag & SPECIAL_SAFE_MODE) && operation == HA_EXTRA_WRITE_CACHE)
- return 0;
return mi_extra(file, operation, (void*) &cache_size);
}
diff --git a/storage/myisam/mi_cache.c b/storage/myisam/mi_cache.c
index 6e9feaefb2d..3477e67eae5 100644
--- a/storage/myisam/mi_cache.c
+++ b/storage/myisam/mi_cache.c
@@ -62,7 +62,7 @@ int _mi_read_cache(IO_CACHE *info, uchar *buff, my_off_t pos, uint length,
(my_off_t) (info->read_end - info->request_pos))
{
in_buff_pos=info->request_pos+(uint) offset;
- in_buff_length= min(length, (size_t) (info->read_end-in_buff_pos));
+ in_buff_length= MY_MIN(length, (size_t) (info->read_end-in_buff_pos));
memcpy(buff,info->request_pos+(uint) offset,(size_t) in_buff_length);
if (!(length-=in_buff_length))
DBUG_RETURN(0);
diff --git a/storage/myisam/mi_check.c b/storage/myisam/mi_check.c
index 056aff5a72b..61dbbb7a18d 100644
--- a/storage/myisam/mi_check.c
+++ b/storage/myisam/mi_check.c
@@ -1946,7 +1946,13 @@ int mi_sort_index(HA_CHECK *param, register MI_INFO *info, char * name)
key++,keyinfo++)
{
if (! mi_is_key_active(info->s->state.key_map, key))
+ {
+ /* Since the key is not active, this should not be read, but we
+ initialize it anyway to silence a Valgrind warn when passing that
+ chunk of memory to pwrite(). */
+ index_pos[key]= HA_OFFSET_ERROR;
continue;
+ }
if (share->state.key_root[key] != HA_OFFSET_ERROR)
{
@@ -2145,7 +2151,7 @@ int filecopy(HA_CHECK *param, File to,File from,my_off_t start,
ulong buff_length;
DBUG_ENTER("filecopy");
- buff_length=(ulong) min(param->write_buffer_length,length);
+ buff_length=(ulong) MY_MIN(param->write_buffer_length,length);
if (!(buff=my_malloc(buff_length,MYF(0))))
{
buff=tmp_buff; buff_length=IO_SIZE;
@@ -2303,7 +2309,7 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info,
MYF(param->malloc_flags));
if (share->data_file_type == DYNAMIC_RECORD)
- length=max(share->base.min_pack_length+1,share->base.min_block_length);
+ length=MY_MAX(share->base.min_pack_length+1,share->base.min_block_length);
else if (share->data_file_type == COMPRESSED_RECORD)
length=share->base.min_block_length;
else
@@ -2392,7 +2398,7 @@ int mi_repair_by_sort(HA_CHECK *param, register MI_INFO *info,
(see _create_index_by_sort)
*/
sort_info.max_records= 10 *
- max(param->sort_buffer_length, MIN_SORT_BUFFER) /
+ MY_MAX(param->sort_buffer_length, MIN_SORT_BUFFER) /
sort_param.key_length;
}
@@ -2759,7 +2765,7 @@ int mi_repair_parallel(HA_CHECK *param, register MI_INFO *info,
mysql_file_seek(param->read_cache.file, 0L, MY_SEEK_END, MYF(0));
if (share->data_file_type == DYNAMIC_RECORD)
- rec_length=max(share->base.min_pack_length+1,share->base.min_block_length);
+ rec_length=MY_MAX(share->base.min_pack_length+1,share->base.min_block_length);
else if (share->data_file_type == COMPRESSED_RECORD)
rec_length=share->base.min_block_length;
else
@@ -3984,7 +3990,7 @@ word_init_ft_buf:
ft_buf->buf=ft_buf->lastkey+a_len;
/*
32 is just a safety margin here
- (at least max(val_len, sizeof(nod_flag)) should be there).
+ (at least MY_MAX(val_len, sizeof(nod_flag)) should be there).
May be better performance could be achieved if we'd put
(sort_info->keyinfo->block_length-32)/XXX
instead.
diff --git a/storage/myisam/mi_close.c b/storage/myisam/mi_close.c
index e58c2e0f189..f0a82bcef04 100644
--- a/storage/myisam/mi_close.c
+++ b/storage/myisam/mi_close.c
@@ -31,7 +31,8 @@ int mi_close(register MI_INFO *info)
(long) info, (uint) share->reopen,
(uint) share->tot_locks));
- mysql_mutex_lock(&THR_LOCK_myisam);
+ if (info->open_list.data)
+ mysql_mutex_lock(&THR_LOCK_myisam);
if (info->lock_type == F_EXTRA_LCK)
info->lock_type=F_UNLCK; /* HA_EXTRA_NO_USER_CHANGE */
@@ -54,7 +55,8 @@ int mi_close(register MI_INFO *info)
info->opt_flag&= ~(READ_CACHE_USED | WRITE_CACHE_USED);
}
flag= !--share->reopen;
- myisam_open_list=list_delete(myisam_open_list,&info->open_list);
+ if (info->open_list.data)
+ myisam_open_list= list_delete(myisam_open_list, &info->open_list);
mysql_mutex_unlock(&share->intern_lock);
my_free(mi_get_rec_buff_ptr(info, info->rec_buff));
@@ -111,7 +113,8 @@ int mi_close(register MI_INFO *info)
}
my_free(info->s);
}
- mysql_mutex_unlock(&THR_LOCK_myisam);
+ if (info->open_list.data)
+ mysql_mutex_unlock(&THR_LOCK_myisam);
if (info->ftparser_param)
{
my_free(info->ftparser_param);
diff --git a/storage/myisam/mi_create.c b/storage/myisam/mi_create.c
index ad97fba2cbb..cc0cfd0ae3a 100644
--- a/storage/myisam/mi_create.c
+++ b/storage/myisam/mi_create.c
@@ -43,6 +43,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
base_pos,long_varchar_count,varchar_length,
max_key_block_length,unique_key_parts,fulltext_keys,offset;
uint aligned_key_start, block_length, res;
+ uint internal_table= flags & HA_CREATE_INTERNAL_TABLE;
ulong reclength, real_reclength,min_pack_length;
char filename[FN_REFLEN],linkname[FN_REFLEN], *linkname_ptr;
ulong pack_reclength;
@@ -446,8 +447,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
block_length= (keydef->block_length ?
my_round_up_to_next_power(keydef->block_length) :
myisam_block_size);
- block_length= max(block_length, MI_MIN_KEY_BLOCK_LENGTH);
- block_length= min(block_length, MI_MAX_KEY_BLOCK_LENGTH);
+ block_length= MY_MAX(block_length, MI_MIN_KEY_BLOCK_LENGTH);
+ block_length= MY_MIN(block_length, MI_MAX_KEY_BLOCK_LENGTH);
keydef->block_length= (uint16) MI_BLOCK_SIZE(length-real_length_diff,
pointer,MI_MAX_KEYPTR_SIZE,
@@ -536,7 +537,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
got from MYI file header (see also myisampack.c:save_state)
*/
share.base.key_reflength=
- mi_get_pointer_length(max(ci->key_file_length,tmp),3);
+ mi_get_pointer_length(MY_MAX(ci->key_file_length,tmp),3);
share.base.keys= share.state.header.keys= keys;
share.state.header.uniques= uniques;
share.state.header.fulltext_keys= fulltext_keys;
@@ -569,12 +570,13 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
share.base.min_block_length=
(share.base.pack_reclength+3 < MI_EXTEND_BLOCK_LENGTH &&
! share.base.blobs) ?
- max(share.base.pack_reclength,MI_MIN_BLOCK_LENGTH) :
+ MY_MAX(share.base.pack_reclength,MI_MIN_BLOCK_LENGTH) :
MI_EXTEND_BLOCK_LENGTH;
if (! (flags & HA_DONT_TOUCH_DATA))
share.state.create_time= time((time_t*) 0);
- mysql_mutex_lock(&THR_LOCK_myisam);
+ if (!internal_table)
+ mysql_mutex_lock(&THR_LOCK_myisam);
/*
NOTE: For test_if_reopen() we need a real path name. Hence we need
@@ -631,7 +633,7 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
NOTE: The filename is compared against unique_file_name of every
open table. Hence we need a real path here.
*/
- if (test_if_reopen(filename))
+ if (!internal_table && test_if_reopen(filename))
{
my_printf_error(HA_ERR_TABLE_EXIST, "MyISAM table '%s' is in use "
"(most likely by a MERGE table). Try FLUSH TABLES.",
@@ -820,7 +822,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
goto err;
}
errpos=0;
- mysql_mutex_unlock(&THR_LOCK_myisam);
+ if (!internal_table)
+ mysql_mutex_unlock(&THR_LOCK_myisam);
res= 0;
if (mysql_file_close(file, MYF(0)))
res= my_errno;
@@ -828,7 +831,8 @@ int mi_create(const char *name,uint keys,MI_KEYDEF *keydefs,
DBUG_RETURN(res);
err:
- mysql_mutex_unlock(&THR_LOCK_myisam);
+ if (!internal_table)
+ mysql_mutex_unlock(&THR_LOCK_myisam);
err_no_lock:
save_errno=my_errno;
diff --git a/storage/myisam/mi_dynrec.c b/storage/myisam/mi_dynrec.c
index 009a2affe0c..021809ed892 100644
--- a/storage/myisam/mi_dynrec.c
+++ b/storage/myisam/mi_dynrec.c
@@ -118,7 +118,8 @@ int mi_munmap_file(MI_INFO *info)
{
int ret;
DBUG_ENTER("mi_unmap_file");
- if ((ret= my_munmap(info->s->file_map, (size_t) info->s->mmaped_length)))
+ if ((ret= my_munmap((void*) info->s->file_map,
+ (size_t) info->s->mmaped_length)))
DBUG_RETURN(ret);
info->s->file_read= mi_nommap_pread;
info->s->file_write= mi_nommap_pwrite;
@@ -865,7 +866,7 @@ static int update_dynamic_record(MI_INFO *info, my_off_t filepos, uchar *record,
uint tmp=MY_ALIGN(reclength - length + 3 +
test(reclength >= 65520L),MI_DYN_ALIGN_SIZE);
/* Don't create a block bigger than MI_MAX_BLOCK_LENGTH */
- tmp= min(length+tmp, MI_MAX_BLOCK_LENGTH)-length;
+ tmp= MY_MIN(length+tmp, MI_MAX_BLOCK_LENGTH)-length;
/* Check if we can extend this block */
if (block_info.filepos + block_info.block_len ==
info->state->data_file_length &&
@@ -1780,15 +1781,21 @@ int _mi_read_rnd_dynamic_record(MI_INFO *info, uchar *buf,
if (b_type & (BLOCK_DELETED | BLOCK_ERROR | BLOCK_SYNC_ERROR |
BLOCK_FATAL_ERROR))
{
- if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
- && skip_deleted_blocks)
- {
- filepos=block_info.filepos+block_info.block_len;
- block_info.second_read=0;
- continue; /* Search after next_record */
- }
- if (b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR))
+ if ((b_type & (BLOCK_DELETED | BLOCK_SYNC_ERROR)))
{
+ if (skip_deleted_blocks)
+ {
+ filepos=block_info.filepos+block_info.block_len;
+ block_info.second_read=0;
+ continue; /* Search after next_record */
+ }
+ /*
+ If we're not on the first block of a record and
+ the block is marked as deleted or out of sync,
+ something's gone wrong: the record is damaged.
+ */
+ if (block_of_record != 0)
+ goto panic;
my_errno=HA_ERR_RECORD_DELETED;
info->lastpos=block_info.filepos;
info->nextpos=block_info.filepos+block_info.block_len;
diff --git a/storage/myisam/mi_extra.c b/storage/myisam/mi_extra.c
index dab1f66ed6d..f57fba5c2c5 100644
--- a/storage/myisam/mi_extra.c
+++ b/storage/myisam/mi_extra.c
@@ -100,7 +100,7 @@ int mi_extra(MI_INFO *info, enum ha_extra_function function, void *extra_arg)
cache_size= (extra_arg ? *(ulong*) extra_arg :
my_default_record_cache_size);
if (!(init_io_cache(&info->rec_cache,info->dfile,
- (uint) min(info->state->data_file_length+1,
+ (uint) MY_MIN(info->state->data_file_length+1,
cache_size),
READ_CACHE,0L,(pbool) (info->lock_type != F_UNLCK),
MYF(share->write_flag & MY_WAIT_IF_FULL))))
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index 438057e22df..f8213b1a3a5 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -14,7 +14,18 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
-/* open a isam-database */
+/*
+ open a isam-database
+
+ Internal temporary tables
+ -------------------------
+ Since only single instance of internal temporary table is required by
+ optimizer, such tables are not registered on myisam_open_list. In effect
+ it means (a) THR_LOCK_myisam is not held while such table is being created,
+ opened or closed; (b) no iteration through myisam_open_list while opening a
+ table. This optimization gives nice scalability benefit in concurrent
+ environment. MEMORY internal temporary tables are optimized similarly.
+*/
#include "fulltext.h"
#include "sp_defs.h"
@@ -74,10 +85,11 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
int lock_error,kfile,open_mode,save_errno,have_rtree=0, realpath_err;
uint i,j,len,errpos,head_length,base_pos,offset,info_length,keys,
key_parts,unique_key_parts,base_key_parts,fulltext_keys,uniques;
+ uint internal_table= open_flags & HA_OPEN_INTERNAL_TABLE;
char name_buff[FN_REFLEN], org_name[FN_REFLEN], index_name[FN_REFLEN],
data_name[FN_REFLEN];
uchar *UNINIT_VAR(disk_cache), *disk_pos, *end_pos;
- MI_INFO info,*UNINIT_VAR(m_info),*old_info;
+ MI_INFO info,*UNINIT_VAR(m_info),*old_info= NULL;
MYISAM_SHARE share_buff,*share;
ulong *rec_per_key_part= 0;
my_off_t *key_root, *key_del;
@@ -99,8 +111,13 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
DBUG_RETURN (NULL);
}
- mysql_mutex_lock(&THR_LOCK_myisam);
- if (!(old_info=test_if_reopen(name_buff)))
+ if (!internal_table)
+ {
+ mysql_mutex_lock(&THR_LOCK_myisam);
+ old_info= test_if_reopen(name_buff);
+ }
+
+ if (!old_info)
{
share= &share_buff;
bzero((uchar*) &share_buff,sizeof(share_buff));
@@ -311,7 +328,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
strmov(share->index_file_name, index_name);
strmov(share->data_file_name, data_name);
- share->blocksize=min(IO_SIZE,myisam_block_size);
+ share->blocksize=MY_MIN(IO_SIZE,myisam_block_size);
{
HA_KEYSEG *pos=share->keyparts;
uint32 ftkey_nr= 1;
@@ -349,6 +366,12 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
}
else if (pos->type == HA_KEYTYPE_BINARY)
pos->charset= &my_charset_bin;
+ if (!(share->keyinfo[i].flag & HA_SPATIAL) &&
+ pos->start > share->base.reclength)
+ {
+ my_errno= HA_ERR_CRASHED;
+ goto err;
+ }
}
if (share->keyinfo[i].flag & HA_SPATIAL)
{
@@ -491,7 +514,7 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
share->base.margin_key_file_length=(share->base.max_key_file_length -
(keys ? MI_INDEX_BLOCK_MARGIN *
share->blocksize * keys : 0));
- share->blocksize=min(IO_SIZE,myisam_block_size);
+ share->blocksize=MY_MIN(IO_SIZE,myisam_block_size);
share->data_file_type=STATIC_RECORD;
if (share->options & HA_OPTION_COMPRESS_RECORD)
{
@@ -638,10 +661,13 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
*m_info=info;
thr_lock_data_init(&share->lock,&m_info->lock,(void*) m_info);
- m_info->open_list.data=(void*) m_info;
- myisam_open_list=list_add(myisam_open_list,&m_info->open_list);
- mysql_mutex_unlock(&THR_LOCK_myisam);
+ if (!internal_table)
+ {
+ m_info->open_list.data= (void*) m_info;
+ myisam_open_list= list_add(myisam_open_list, &m_info->open_list);
+ mysql_mutex_unlock(&THR_LOCK_myisam);
+ }
bzero(info.buff, share->base.max_key_block_length * 2);
my_free(rec_per_key_part);
@@ -686,7 +712,8 @@ err:
default:
break;
}
- mysql_mutex_unlock(&THR_LOCK_myisam);
+ if (!internal_table)
+ mysql_mutex_unlock(&THR_LOCK_myisam);
my_errno=save_errno;
DBUG_RETURN (NULL);
} /* mi_open */
@@ -706,10 +733,10 @@ uchar *mi_alloc_rec_buff(MI_INFO *info, ulong length, uchar **buf)
if (length == (ulong) -1)
{
if (info->s->options & HA_OPTION_COMPRESS_RECORD)
- length= max(info->s->base.pack_reclength, info->s->max_pack_length);
+ length= MY_MAX(info->s->base.pack_reclength, info->s->max_pack_length);
else
length= info->s->base.pack_reclength;
- length= max(length, info->s->base.max_key_length);
+ length= MY_MAX(length, info->s->base.max_key_length);
/* Avoid unnecessary realloc */
if (newptr && length == old_length)
return newptr;
diff --git a/storage/myisam/mi_packrec.c b/storage/myisam/mi_packrec.c
index 7e2403b64c9..c95afe57725 100644
--- a/storage/myisam/mi_packrec.c
+++ b/storage/myisam/mi_packrec.c
@@ -685,7 +685,7 @@ static uint find_longest_bitstream(uint16 *table, uint16 *end)
return OFFSET_TABLE_SIZE;
}
length2= find_longest_bitstream(next, end) + 1;
- length=max(length,length2);
+ length=MY_MAX(length,length2);
}
return length;
}
@@ -1399,7 +1399,7 @@ uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff,
info->filepos=filepos+head_length;
if (file > 0)
{
- info->offset=min(info->rec_len, ref_length - head_length);
+ info->offset=MY_MIN(info->rec_len, ref_length - head_length);
memcpy(*rec_buff_p, header + head_length, info->offset);
}
return 0;
diff --git a/storage/myisam/mi_search.c b/storage/myisam/mi_search.c
index 968cb9624a6..01fa10de7a3 100644
--- a/storage/myisam/mi_search.c
+++ b/storage/myisam/mi_search.c
@@ -949,9 +949,7 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
("Found too long binary packed key: %u of %u at 0x%lx",
length, keyinfo->maxlength, (long) *page_pos));
DBUG_DUMP("key", *page_pos, 16);
- mi_print_error(keyinfo->share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
- DBUG_RETURN(0); /* Wrong key */
+ goto crashed; /* Wrong key */
}
/* Key is packed against prev key, take prefix from prev key. */
from= key;
@@ -994,6 +992,8 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
if (from == from_end) { from=page; from_end=page_end; }
length+= (uint) ((*key++ = *from++));
}
+ if (length > keyseg->length)
+ goto crashed;
}
else
length=keyseg->length;
@@ -1033,15 +1033,18 @@ uint _mi_get_binary_pack_key(register MI_KEYDEF *keyinfo, uint nod_flag,
if (from_end != page_end)
{
DBUG_PRINT("error",("Error when unpacking key"));
- mi_print_error(keyinfo->share, HA_ERR_CRASHED);
- my_errno=HA_ERR_CRASHED;
- DBUG_RETURN(0); /* Error */
+ goto crashed; /* Error */
}
/* Copy data pointer and, if appropriate, key block pointer. */
memcpy((uchar*) key,(uchar*) from,(size_t) length);
*page_pos= from+length;
}
DBUG_RETURN((uint) (key-start_key)+keyseg->length);
+
+ crashed:
+ mi_print_error(keyinfo->share, HA_ERR_CRASHED);
+ my_errno= HA_ERR_CRASHED;
+ DBUG_RETURN(0);
}
diff --git a/storage/myisam/mi_test1.c b/storage/myisam/mi_test1.c
index 3b2597eb01e..9e4e1c46891 100644
--- a/storage/myisam/mi_test1.c
+++ b/storage/myisam/mi_test1.c
@@ -439,7 +439,7 @@ static void create_record(uchar *record,uint rownr)
uint tmp;
uchar *ptr;;
sprintf((char*) blob_record,"... row: %d", rownr);
- strappend((char*) blob_record,max(MAX_REC_LENGTH-rownr,10),' ');
+ strappend((char*) blob_record,MY_MAX(MAX_REC_LENGTH-rownr,10),' ');
tmp=strlen((char*) blob_record);
int4store(pos,tmp);
ptr=blob_record;
diff --git a/storage/myisam/mi_test2.c b/storage/myisam/mi_test2.c
index 3ec12ef5cca..e53c68874b2 100644
--- a/storage/myisam/mi_test2.c
+++ b/storage/myisam/mi_test2.c
@@ -597,7 +597,7 @@ int main(int argc, char *argv[])
goto err;
bmove(read_record2,read_record,reclength);
- for (i=min(2,keys) ; i-- > 0 ;)
+ for (i=MY_MIN(2,keys) ; i-- > 0 ;)
{
if (mi_rsame(file,read_record2,(int) i)) goto err;
if (memcmp(read_record,read_record2,reclength) != 0)
diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c
index c8546ee56f5..64ffffc3a1e 100644
--- a/storage/myisam/myisamchk.c
+++ b/storage/myisam/myisamchk.c
@@ -16,6 +16,7 @@
/* Describe, check and repair of MyISAM tables */
#include "fulltext.h"
+#include "my_default.h"
#include <m_ctype.h>
#include <stdarg.h>
#include <my_getopt.h>
diff --git a/storage/myisam/myisamlog.c b/storage/myisam/myisamlog.c
index 1624213851b..86e1978edaa 100644
--- a/storage/myisam/myisamlog.c
+++ b/storage/myisam/myisamlog.c
@@ -91,7 +91,7 @@ int main(int argc, char **argv)
log_filename=myisam_log_filename;
get_options(&argc,&argv);
/* Number of MyISAM files we can have open at one time */
- max_files= (my_set_max_open_files(min(max_files,8))-6)/2;
+ max_files= (my_set_max_open_files(MY_MIN(max_files,8))-6)/2;
if (update)
printf("Trying to %s MyISAM files according to log '%s'\n",
(recover ? "recover" : "update"),log_filename);
diff --git a/storage/myisam/myisampack.c b/storage/myisam/myisampack.c
index 6ce88db87f5..c52bef1e40e 100644
--- a/storage/myisam/myisampack.c
+++ b/storage/myisam/myisampack.c
@@ -20,6 +20,7 @@
#endif
#include "myisamdef.h"
+#include "my_default.h"
#include <queues.h>
#include <my_tree.h>
#include "mysys_err.h"
@@ -783,7 +784,7 @@ static int create_dest_frm(char *source_table, char *dest_table)
*/
(void) my_copy(source_name, dest_name, MYF(MY_DONT_OVERWRITE_FILE));
- return 0;
+ DBUG_RETURN(0);
}
@@ -1269,7 +1270,7 @@ static void check_counts(HUFF_COUNTS *huff_counts, uint trees,
{
if (huff_counts->field_length > 2 &&
huff_counts->empty_fields + (records - huff_counts->empty_fields)*
- (1+max_bit(max(huff_counts->max_pre_space,
+ (1+max_bit(MY_MAX(huff_counts->max_pre_space,
huff_counts->max_end_space))) <
records * max_bit(huff_counts->field_length))
{
@@ -3022,7 +3023,7 @@ static int save_state_mrg(File file,PACK_MRG_INFO *mrg,my_off_t new_length,
if (mrg->src_file_has_indexes_disabled)
{
isam_file->s->state.state.key_file_length=
- max(isam_file->s->state.state.key_file_length, new_length);
+ MY_MAX(isam_file->s->state.state.key_file_length, new_length);
}
state.dellink= HA_OFFSET_ERROR;
state.version=(ulong) time((time_t*) 0);
diff --git a/storage/myisam/rt_mbr.c b/storage/myisam/rt_mbr.c
index deca23bbec7..90569f4a5fc 100644
--- a/storage/myisam/rt_mbr.c
+++ b/storage/myisam/rt_mbr.c
@@ -325,8 +325,8 @@ int rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res)
bmin = korr_func(b); \
amax = korr_func(a+len); \
bmax = korr_func(b+len); \
- amin = min(amin, bmin); \
- amax = max(amax, bmax); \
+ amin = MY_MIN(amin, bmin); \
+ amax = MY_MAX(amax, bmax); \
store_func(c, amin); \
store_func(c+len, amax); \
}
@@ -338,8 +338,8 @@ int rtree_d_mbr(HA_KEYSEG *keyseg, uchar *a, uint key_length, double *res)
get_func(bmin, b); \
get_func(amax, a+len); \
get_func(bmax, b+len); \
- amin = min(amin, bmin); \
- amax = max(amax, bmax); \
+ amin = MY_MIN(amin, bmin); \
+ amax = MY_MAX(amax, bmax); \
store_func(c, amin); \
store_func(c+len, amax); \
}
@@ -417,8 +417,8 @@ int rtree_combine_rect(HA_KEYSEG *keyseg, uchar* a, uchar* b, uchar* c,
bmin = korr_func(b); \
amax = korr_func(a+len); \
bmax = korr_func(b+len); \
- amin = max(amin, bmin); \
- amax = min(amax, bmax); \
+ amin = MY_MAX(amin, bmin); \
+ amax = MY_MIN(amax, bmax); \
if (amin >= amax) \
return 0; \
res *= amax - amin; \
@@ -431,8 +431,8 @@ int rtree_combine_rect(HA_KEYSEG *keyseg, uchar* a, uchar* b, uchar* c,
get_func(bmin, b); \
get_func(amax, a+len); \
get_func(bmax, b+len); \
- amin = max(amin, bmin); \
- amax = min(amax, bmax); \
+ amin = MY_MAX(amin, bmin); \
+ amax = MY_MIN(amax, bmax); \
if (amin >= amax) \
return 0; \
res *= amax - amin; \
@@ -508,7 +508,7 @@ double rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b,
amax = korr_func(a+len); \
bmax = korr_func(b+len); \
a_area *= (((double)amax) - ((double)amin)); \
- loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+ loc_ab_area *= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
}
#define RT_AREA_INC_GET(type, get_func, len)\
@@ -519,7 +519,7 @@ double rtree_overlapping_area(HA_KEYSEG *keyseg, uchar* a, uchar* b,
get_func(amax, a+len); \
get_func(bmax, b+len); \
a_area *= (((double)amax) - ((double)amin)); \
- loc_ab_area *= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+ loc_ab_area *= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
}
/*
@@ -604,7 +604,7 @@ safe_end:
amax = korr_func(a+len); \
bmax = korr_func(b+len); \
a_perim+= (((double)amax) - ((double)amin)); \
- *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+ *ab_perim+= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
}
#define RT_PERIM_INC_GET(type, get_func, len)\
@@ -615,7 +615,7 @@ safe_end:
get_func(amax, a+len); \
get_func(bmax, b+len); \
a_perim+= (((double)amax) - ((double)amin)); \
- *ab_perim+= ((double)max(amax, bmax) - (double)min(amin, bmin)); \
+ *ab_perim+= ((double)MY_MAX(amax, bmax) - (double)MY_MIN(amin, bmin)); \
}
/*
diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c
index 4af45ea02e9..6a328f9ef4e 100644
--- a/storage/myisam/sort.c
+++ b/storage/myisam/sort.c
@@ -130,7 +130,7 @@ int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages,
sort_keys= (uchar **) NULL; error= 1;
maxbuffer=1;
- memavl= max(sortbuff_size, MIN_SORT_BUFFER);
+ memavl= MY_MAX(sortbuff_size, MIN_SORT_BUFFER);
records= info->sort_info->max_records;
sort_length= info->key_length;
LINT_INIT(keys);
@@ -351,7 +351,7 @@ pthread_handler_t thr_find_all_keys(void *arg)
bzero((char*) &sort_param->unique, sizeof(sort_param->unique));
sort_keys= (uchar **) NULL;
- memavl= max(sort_param->sortbuff_size, MIN_SORT_BUFFER);
+ memavl= MY_MAX(sort_param->sortbuff_size, MIN_SORT_BUFFER);
idx= (uint)sort_param->sort_info->max_records;
sort_length= sort_param->key_length;
maxbuffer= 1;
@@ -824,7 +824,7 @@ static uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
register uint count;
uint length;
- if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+ if ((count=(uint) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
{
if (mysql_file_pread(fromfile->file, (uchar*) buffpek->base,
(length= sort_length*count),
@@ -846,7 +846,7 @@ static uint read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek,
uint idx;
uchar *buffp;
- if ((count=(uint) min((ha_rows) buffpek->max_keys,buffpek->count)))
+ if ((count=(uint) MY_MIN((ha_rows) buffpek->max_keys,buffpek->count)))
{
buffp = buffpek->base;
diff --git a/storage/myisammrg/ha_myisammrg.cc b/storage/myisammrg/ha_myisammrg.cc
index 7bb7990d9a3..0971e9297d5 100644
--- a/storage/myisammrg/ha_myisammrg.cc
+++ b/storage/myisammrg/ha_myisammrg.cc
@@ -1304,7 +1304,7 @@ int ha_myisammrg::info(uint flag)
memcpy((char*) table->key_info[0].rec_per_key,
(char*) mrg_info.rec_per_key,
sizeof(table->key_info[0].rec_per_key[0]) *
- min(file->keys, table->s->key_parts));
+ MY_MIN(file->keys, table->s->key_parts));
}
}
if (flag & HA_STATUS_ERRKEY)
diff --git a/storage/perfschema/CMakeLists.txt b/storage/perfschema/CMakeLists.txt
index 0c9713d45d4..ef644030317 100644
--- a/storage/perfschema/CMakeLists.txt
+++ b/storage/perfschema/CMakeLists.txt
@@ -118,6 +118,10 @@ table_tiws_by_index_usage.h
table_tiws_by_table.h
table_tlws_by_table.h
table_users.h
+cursor_by_thread_connect_attr.h
+table_session_connect.h
+table_session_connect_attrs.h
+table_session_account_connect_attrs.h
cursor_by_account.cc
cursor_by_host.cc
cursor_by_thread.cc
@@ -126,6 +130,7 @@ ha_perfschema.cc
pfs.cc
pfs_account.cc
pfs_atomic.cc
+pfs_autosize.cc
pfs_check.cc
pfs_column_values.cc
pfs_con_slice.cc
@@ -189,6 +194,10 @@ table_tiws_by_index_usage.cc
table_tiws_by_table.cc
table_tlws_by_table.cc
table_users.cc
+cursor_by_thread_connect_attr.cc
+table_session_connect.cc
+table_session_connect_attrs.cc
+table_session_account_connect_attrs.cc
)
MYSQL_ADD_PLUGIN(perfschema ${PERFSCHEMA_SOURCES} STORAGE_ENGINE DEFAULT STATIC_ONLY)
diff --git a/storage/perfschema/cursor_by_thread_connect_attr.cc b/storage/perfschema/cursor_by_thread_connect_attr.cc
new file mode 100644
index 00000000000..7a0dd04119d
--- /dev/null
+++ b/storage/perfschema/cursor_by_thread_connect_attr.cc
@@ -0,0 +1,71 @@
+/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#include "my_global.h"
+#include "cursor_by_thread_connect_attr.h"
+
+cursor_by_thread_connect_attr::cursor_by_thread_connect_attr(
+ const PFS_engine_table_share *share) :
+ PFS_engine_table(share, &m_pos), m_row_exists(false)
+{}
+
+int cursor_by_thread_connect_attr::rnd_next(void)
+{
+ PFS_thread *thread;
+
+ for (m_pos.set_at(&m_next_pos);
+ m_pos.has_more_thread();
+ m_pos.next_thread())
+ {
+ thread= &thread_array[m_pos.m_index_1];
+
+ if (thread->m_lock.is_populated())
+ {
+ make_row(thread, m_pos.m_index_2);
+ if (m_row_exists)
+ {
+ m_next_pos.set_after(&m_pos);
+ return 0;
+ }
+ }
+ }
+ return HA_ERR_END_OF_FILE;
+}
+
+
+int cursor_by_thread_connect_attr::rnd_pos(const void *pos)
+{
+ PFS_thread *thread;
+
+ set_position(pos);
+ DBUG_ASSERT(m_pos.m_index_1 < thread_max);
+
+ thread= &thread_array[m_pos.m_index_1];
+ if (!thread->m_lock.is_populated())
+ return HA_ERR_RECORD_DELETED;
+
+ make_row(thread, m_pos.m_index_2);
+ if (m_row_exists)
+ return 0;
+
+ return HA_ERR_RECORD_DELETED;
+}
+
+
+void cursor_by_thread_connect_attr::reset_position(void)
+{
+ m_pos.reset();
+ m_next_pos.reset();
+}
diff --git a/storage/perfschema/cursor_by_thread_connect_attr.h b/storage/perfschema/cursor_by_thread_connect_attr.h
new file mode 100644
index 00000000000..fbce56f208d
--- /dev/null
+++ b/storage/perfschema/cursor_by_thread_connect_attr.h
@@ -0,0 +1,81 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#ifndef CURSOR_BY_THREAD_CONNECT_ATTR_H
+#define CURSOR_BY_THREAD_CONNECT_ATTR_H
+
+#include "pfs_column_types.h"
+#include "pfs_engine_table.h"
+#include "pfs_instr.h"
+
+/**
+ \addtogroup Performance_schema_tables
+ @{
+*/
+
+struct pos_connect_attr_by_thread_by_attr
+: public PFS_double_index
+{
+ pos_connect_attr_by_thread_by_attr()
+ : PFS_double_index(0, 0)
+ {}
+
+ inline bool has_more_thread(void)
+ {
+ return (m_index_1 < thread_max);
+ }
+
+ inline void next_thread(void)
+ {
+ m_index_1++;
+ m_index_2= 0;
+ }
+
+ inline void reset(void)
+ {
+ m_index_1= 0;
+ m_index_2= 0;
+ }
+};
+
+/** Cursor CURSOR_BY_THREAD_CONNECT_ATTR. */
+class cursor_by_thread_connect_attr : public PFS_engine_table
+{
+public:
+ virtual int rnd_next();
+ virtual int rnd_pos(const void *pos);
+ virtual void reset_position(void);
+
+protected:
+ cursor_by_thread_connect_attr(const PFS_engine_table_share *share);
+
+public:
+ ~cursor_by_thread_connect_attr()
+ {}
+
+protected:
+ virtual void make_row(PFS_thread *thread, uint ordinal)= 0;
+ /** True if row exists */
+ bool m_row_exists;
+
+private:
+ /** Current position. */
+ pos_connect_attr_by_thread_by_attr m_pos;
+ /** Next position. */
+ pos_connect_attr_by_thread_by_attr m_next_pos;
+};
+
+/** @} */
+#endif
diff --git a/storage/perfschema/gen_pfs_lex_token.cc b/storage/perfschema/gen_pfs_lex_token.cc
index b7470061de1..7581255b284 100644
--- a/storage/perfschema/gen_pfs_lex_token.cc
+++ b/storage/perfschema/gen_pfs_lex_token.cc
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -243,7 +243,7 @@ void print_tokens()
int main(int argc,char **argv)
{
puts("/*");
- puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011, 2012"));
+ puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2011"));
puts("*/");
printf("/*\n");
diff --git a/storage/perfschema/ha_perfschema.cc b/storage/perfschema/ha_perfschema.cc
index 773d822af2b..50bdb043566 100644
--- a/storage/perfschema/ha_perfschema.cc
+++ b/storage/perfschema/ha_perfschema.cc
@@ -166,6 +166,8 @@ static struct st_mysql_show_var pfs_status_vars[]=
(char*) &statement_class_lost, SHOW_LONG},
{"Performance_schema_digest_lost",
(char*) &digest_lost, SHOW_LONG},
+ {"Performance_schema_session_connect_attrs_lost",
+ (char*) &session_connect_attrs_lost, SHOW_LONG},
{NullS, NullS, SHOW_LONG}
};
@@ -256,12 +258,12 @@ int ha_perfschema::write_row(uchar *buf)
int result;
DBUG_ENTER("ha_perfschema::write_row");
+ if (!pfs_initialized)
+ DBUG_RETURN(HA_ERR_WRONG_COMMAND);
- ha_statistic_increment(&SSV::ha_write_count);
DBUG_ASSERT(m_table_share);
-
+ ha_statistic_increment(&SSV::ha_write_count);
result= m_table_share->write_row(table, buf, table->field);
-
DBUG_RETURN(result);
}
@@ -279,7 +281,9 @@ void ha_perfschema::use_hidden_primary_key(void)
int ha_perfschema::update_row(const uchar *old_data, uchar *new_data)
{
DBUG_ENTER("ha_perfschema::update_row");
-
+ if (!pfs_initialized)
+ DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+
DBUG_ASSERT(m_table);
ha_statistic_increment(&SSV::ha_update_count);
int result= m_table->update_row(table, old_data, new_data, table->field);
@@ -289,6 +293,8 @@ int ha_perfschema::update_row(const uchar *old_data, uchar *new_data)
int ha_perfschema::delete_row(const uchar *buf)
{
DBUG_ENTER("ha_perfschema::delete_row");
+ if (!pfs_initialized)
+ DBUG_RETURN(HA_ERR_WRONG_COMMAND);
DBUG_ASSERT(m_table);
ha_statistic_increment(&SSV::ha_delete_count);
@@ -329,6 +335,8 @@ int ha_perfschema::rnd_end(void)
int ha_perfschema::rnd_next(uchar *buf)
{
DBUG_ENTER("ha_perfschema::rnd_next");
+ if (!pfs_initialized)
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
DBUG_ASSERT(m_table);
ha_statistic_increment(&SSV::ha_read_rnd_next_count);
@@ -355,6 +363,8 @@ void ha_perfschema::position(const uchar *record)
int ha_perfschema::rnd_pos(uchar *buf, uchar *pos)
{
DBUG_ENTER("ha_perfschema::rnd_pos");
+ if (!pfs_initialized)
+ DBUG_RETURN(HA_ERR_END_OF_FILE);
DBUG_ASSERT(m_table);
ha_statistic_increment(&SSV::ha_read_rnd_count);
@@ -380,6 +390,8 @@ int ha_perfschema::delete_all_rows(void)
int result;
DBUG_ENTER("ha_perfschema::delete_all_rows");
+ if (!pfs_initialized)
+ DBUG_RETURN(0);
DBUG_ASSERT(m_table_share);
if (m_table_share->m_delete_all_rows)
diff --git a/storage/perfschema/ha_perfschema.h b/storage/perfschema/ha_perfschema.h
index dc465da3758..c2929046f3d 100644
--- a/storage/perfschema/ha_perfschema.h
+++ b/storage/perfschema/ha_perfschema.h
@@ -72,8 +72,7 @@ public:
records.
*/
return (HA_NO_TRANSACTIONS | HA_REC_NOT_IN_SEQ | HA_NO_AUTO_INCREMENT |
- HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE |
- HA_PRIMARY_KEY_REQUIRED_FOR_DELETE | HA_HAS_OWN_BINLOGGING);
+ HA_PRIMARY_KEY_REQUIRED_FOR_DELETE);
}
/**
diff --git a/storage/perfschema/pfs.cc b/storage/perfschema/pfs.cc
index d3de38d025c..33b21ee2817 100644
--- a/storage/perfschema/pfs.cc
+++ b/storage/perfschema/pfs.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -688,6 +688,7 @@ static inline int mysql_mutex_lock(...)
- socket io (MYSQL_SOCKET)
- table io
- table lock
+ - idle
The flow of data between aggregates tables varies for each instrumentation.
@@ -857,24 +858,35 @@ static inline int mysql_mutex_lock(...)
@subsection IMPL_WAIT_SOCKET Socket waits
@verbatim
- socket_locker(T, F)
+ socket_locker(T, S)
|
| [1]
|
- |-> pfs_socket(F) =====>> [A], [B], [C], [D], [E]
+ |-> pfs_socket(S) =====>> [A], [B], [C], [D], [E]
|
| [2]
|
- |-> pfs_socket_class(F.class) =====>> [C], [D]
+ |-> pfs_socket_class(S.class) =====>> [C], [D]
|
- |-> pfs_thread(T).event_name(F) =====>> [A]
+ |-> pfs_thread(T).event_name(S) =====>> [A]
|
- ...
+ | [3]
+ |
+ 3a |-> pfs_account(U, H).event_name(S) =====>> [F], [G], [H]
+ . |
+ . | [4-RESET]
+ . |
+ 3b .....+-> pfs_user(U).event_name(S) =====>> [G]
+ . |
+ 3c .....+-> pfs_host(H).event_name(S) =====>> [H]
@endverbatim
Implemented as:
- [1] @c start_socket_wait_v1(), @c end_socket_wait_v1().
- [2] @c close_socket_v1()
+ - [3] @c aggregate_thread_waits()
+ - [4] @c PFS_account::aggregate_waits()
+ - [5] @c PFS_host::aggregate_waits()
- [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
@c table_ews_by_thread_by_event_name::make_row()
- [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
@@ -885,37 +897,78 @@ static inline int mysql_mutex_lock(...)
@c table_socket_summary_by_event_name::make_row()
- [E] SOCKET_SUMMARY_BY_INSTANCE,
@c table_socket_summary_by_instance::make_row()
+ - [F] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
+ @c table_ews_by_account_by_event_name::make_row()
+ - [G] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
+ @c table_ews_by_user_by_event_name::make_row()
+ - [H] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
+ @c table_ews_by_host_by_event_name::make_row()
@subsection IMPL_WAIT_TABLE Table waits
@verbatim
- table_locker(T, Tb)
+ table_locker(Thread Th, Table Tb, Event = io or lock)
|
| [1]
|
- |-> pfs_table(Tb) =====>> [B], [C], [D]
- |
- | [2]
- |
- |-> pfs_table_share(Tb.share) =====>> [C], [D]
- |
- |-> pfs_thread(T).event_name(Tb) =====>> [A]
- |
- ...
+1a |-> pfs_table(Tb) =====>> [A], [B], [C]
+ | |
+ | | [2]
+ | |
+ | |-> pfs_table_share(Tb.share) =====>> [B], [C]
+ | |
+ | | [3]
+ | |
+ | |-> global_table_io_stat =====>> [C]
+ | |
+ | |-> global_table_lock_stat =====>> [C]
+ |
+1b |-> pfs_thread(Th).event_name(E) =====>> [D], [E], [F], [G]
+ | |
+ | | [ 4-RESET]
+ | |
+ | |-> pfs_account(U, H).event_name(E) =====>> [E], [F], [G]
+ | . |
+ | . | [5-RESET]
+ | . |
+ | .....+-> pfs_user(U).event_name(E) =====>> [F]
+ | . |
+ | .....+-> pfs_host(H).event_name(E) =====>> [G]
+ |
+1c |-> pfs_thread(Th).waits_current(W) =====>> [H]
+ |
+1d |-> pfs_thread(Th).waits_history(W) =====>> [I]
+ |
+1e |-> waits_history_long(W) =====>> [J]
@endverbatim
Implemented as:
- [1] @c start_table_io_wait_v1(), @c end_table_io_wait_v1()
- [2] @c close_table_v1()
- - [A] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
- @c table_ews_by_thread_by_event_name::make_row()
- - [B] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
+ - [3] @c drop_table_share_v1()
+ - [4] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
+ - [5] @c TRUNCATE TABLE EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME
+ - [A] EVENTS_WAITS_SUMMARY_BY_INSTANCE,
@c table_events_waits_summary_by_instance::make_table_row()
+ - [B] OBJECTS_SUMMARY_GLOBAL_BY_TYPE,
+ @c table_os_global_by_type::make_row()
- [C] EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME,
@c table_ews_global_by_event_name::make_table_io_row(),
@c table_ews_global_by_event_name::make_table_lock_row()
- - [D] OBJECTS_SUMMARY_GLOBAL_BY_TYPE,
- @c table_os_global_by_type::make_row()
+ - [D] EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME,
+ @c table_ews_by_thread_by_event_name::make_row()
+ - [E] EVENTS_WAITS_SUMMARY_BY_ACCOUNT_BY_EVENT_NAME,
+ @c table_ews_by_user_by_account_name::make_row()
+ - [F] EVENTS_WAITS_SUMMARY_BY_USER_BY_EVENT_NAME,
+ @c table_ews_by_user_by_event_name::make_row()
+ - [G] EVENTS_WAITS_SUMMARY_BY_HOST_BY_EVENT_NAME,
+ @c table_ews_by_host_by_event_name::make_row()
+ - [H] EVENTS_WAITS_CURRENT,
+ @c table_events_waits_common::make_row()
+ - [I] EVENTS_WAITS_HISTORY,
+ @c table_events_waits_common::make_row()
+ - [J] EVENTS_WAITS_HISTORY_LONG,
+ @c table_events_waits_common::make_row()
@section IMPL_STAGE Implementation for stages aggregates
@@ -1594,7 +1647,6 @@ static void unbind_table_v1(PSI_table *table)
PFS_table *pfs= reinterpret_cast<PFS_table*> (table);
if (likely(pfs != NULL))
{
- pfs->aggregate();
pfs->m_thread_owner= NULL;
}
}
@@ -1615,12 +1667,6 @@ rebind_table_v1(PSI_table_share *share, const void *identity, PSI_table *table)
/* The table handle was already instrumented, reuse it for this thread. */
thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
- if (unlikely(thread == NULL))
- {
- destroy_table(pfs);
- return NULL;
- }
-
if (unlikely(! pfs->m_share->m_enabled))
{
destroy_table(pfs);
@@ -1660,8 +1706,6 @@ rebind_table_v1(PSI_table_share *share, const void *identity, PSI_table *table)
return NULL;
PFS_thread *thread= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
- if (unlikely(thread == NULL))
- return NULL;
PFS_table *pfs_table= create_table(pfs_table_share, thread, identity);
return reinterpret_cast<PSI_table *> (pfs_table);
@@ -1681,9 +1725,18 @@ static void close_table_v1(PSI_table *table)
}
static PSI_socket*
-init_socket_v1(PSI_socket_key key, const my_socket *fd)
+init_socket_v1(PSI_socket_key key, const my_socket *fd,
+ const struct sockaddr *addr, socklen_t addr_len)
{
- INIT_BODY_V1(socket, key, fd);
+ PFS_socket_class *klass;
+ PFS_socket *pfs;
+ klass= find_socket_class(key);
+ if (unlikely(klass == NULL))
+ return NULL;
+ if (! klass->m_enabled)
+ return NULL;
+ pfs= create_socket(klass, fd, addr, addr_len);
+ return reinterpret_cast<PSI_socket *> (pfs);
}
static void destroy_socket_v1(PSI_socket *socket)
@@ -1731,7 +1784,7 @@ static void create_file_v1(PSI_file_key key, const char *name, File file)
}
uint len= strlen(name);
- PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len);
+ PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len, true);
file_handle_array[index]= pfs_file;
}
@@ -1835,13 +1888,13 @@ static int spawn_thread_v1(PSI_thread_key key,
@sa PSI_v1::new_thread.
*/
static PSI_thread*
-new_thread_v1(PSI_thread_key key, const void *identity, ulong thread_id)
+new_thread_v1(PSI_thread_key key, const void *identity, ulonglong processlist_id)
{
PFS_thread *pfs;
PFS_thread_class *klass= find_thread_class(key);
if (likely(klass != NULL))
- pfs= create_thread(klass, identity, thread_id);
+ pfs= create_thread(klass, identity, processlist_id);
else
pfs= NULL;
@@ -1852,12 +1905,12 @@ new_thread_v1(PSI_thread_key key, const void *identity, ulong thread_id)
Implementation of the thread instrumentation interface.
@sa PSI_v1::set_thread_id.
*/
-static void set_thread_id_v1(PSI_thread *thread, unsigned long id)
+static void set_thread_id_v1(PSI_thread *thread, ulonglong processlist_id)
{
PFS_thread *pfs= reinterpret_cast<PFS_thread*> (thread);
if (unlikely(pfs == NULL))
return;
- pfs->m_thread_id= id;
+ pfs->m_processlist_id= processlist_id;
}
/**
@@ -2045,10 +2098,10 @@ static void set_thread_state_v1(const char* state)
{
int state_len= state ? strlen(state) : 0;
- pfs->m_lock.allocated_to_dirty();
+ pfs->m_processlist_lock.allocated_to_dirty();
pfs->m_processlist_state_ptr= state;
pfs->m_processlist_state_length= state_len;
- pfs->m_lock.dirty_to_allocated();
+ pfs->m_processlist_lock.dirty_to_allocated();
}
}
@@ -2060,12 +2113,14 @@ static void set_thread_info_v1(const char* info, int info_len)
{
PFS_thread *pfs= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
+ DBUG_ASSERT((info != NULL) || (info_len == 0));
+
if (likely(pfs != NULL))
{
- pfs->m_lock.allocated_to_dirty();
+ pfs->m_processlist_lock.allocated_to_dirty();
pfs->m_processlist_info_ptr= info;
pfs->m_processlist_info_length= info_len;
- pfs->m_lock.dirty_to_allocated();
+ pfs->m_processlist_lock.dirty_to_allocated();
}
}
@@ -2196,7 +2251,7 @@ start_mutex_wait_v1(PSI_mutex_locker_state *state,
Complete shortcut.
*/
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
- pfs_mutex->m_wait_stat.aggregate_counted();
+ pfs_mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
return NULL;
}
}
@@ -2294,7 +2349,7 @@ start_rwlock_wait_v1(PSI_rwlock_locker_state *state,
Complete shortcut.
*/
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
- pfs_rwlock->m_wait_stat.aggregate_counted();
+ pfs_rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
return NULL;
}
}
@@ -2401,7 +2456,7 @@ start_cond_wait_v1(PSI_cond_locker_state *state,
Complete shortcut.
*/
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
- pfs_cond->m_wait_stat.aggregate_counted();
+ pfs_cond->m_cond_stat.m_wait_stat.aggregate_counted();
return NULL;
}
}
@@ -2478,8 +2533,6 @@ start_table_io_wait_v1(PSI_table_locker_state *state,
return NULL;
PFS_thread *pfs_thread= pfs_table->m_thread_owner;
- if (unlikely(pfs_thread == NULL))
- return NULL;
DBUG_ASSERT(pfs_thread ==
my_pthread_getspecific_ptr(PFS_thread*, THR_PFS));
@@ -2489,6 +2542,8 @@ start_table_io_wait_v1(PSI_table_locker_state *state,
if (flag_thread_instrumentation)
{
+ if (pfs_thread == NULL)
+ return NULL;
if (! pfs_thread->m_enabled)
return NULL;
state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
@@ -2538,7 +2593,6 @@ start_table_io_wait_v1(PSI_table_locker_state *state,
pfs_thread->m_events_waits_current++;
}
- /* TODO: consider a shortcut here */
}
else
{
@@ -2585,11 +2639,6 @@ start_table_lock_wait_v1(PSI_table_locker_state *state,
return NULL;
PFS_thread *pfs_thread= pfs_table->m_thread_owner;
- if (unlikely(pfs_thread == NULL))
- return NULL;
-
- DBUG_ASSERT(pfs_thread ==
- my_pthread_getspecific_ptr(PFS_thread*, THR_PFS));
PFS_TL_LOCK_TYPE lock_type;
@@ -2619,6 +2668,8 @@ start_table_lock_wait_v1(PSI_table_locker_state *state,
if (flag_thread_instrumentation)
{
+ if (pfs_thread == NULL)
+ return NULL;
if (! pfs_thread->m_enabled)
return NULL;
state->m_thread= reinterpret_cast<PSI_thread *> (pfs_thread);
@@ -2668,7 +2719,6 @@ start_table_lock_wait_v1(PSI_table_locker_state *state,
pfs_thread->m_events_waits_current++;
}
- /* TODO: consider a shortcut here */
}
else
{
@@ -2729,11 +2779,6 @@ get_thread_file_name_locker_v1(PSI_file_locker_state *state,
if (klass->m_timed)
flags|= STATE_FLAG_TIMED;
- uint len= strlen(name);
- PFS_file *pfs_file= find_or_create_file(pfs_thread, klass, name, len);
- if (unlikely(pfs_file == NULL))
- return NULL;
-
if (flag_events_waits_current)
{
if (unlikely(pfs_thread->m_events_waits_current >=
@@ -2755,9 +2800,9 @@ get_thread_file_name_locker_v1(PSI_file_locker_state *state,
wait->m_class= klass;
wait->m_timer_start= 0;
wait->m_timer_end= 0;
- wait->m_object_instance_addr= pfs_file;
- wait->m_weak_file= pfs_file;
- wait->m_weak_version= pfs_file->get_version();
+ wait->m_object_instance_addr= NULL;
+ wait->m_weak_file= NULL;
+ wait->m_weak_version= 0;
wait->m_event_id= pfs_thread->m_event_id++;
wait->m_end_event_id= 0;
wait->m_operation= file_operation_map[static_cast<int> (op)];
@@ -2767,7 +2812,9 @@ get_thread_file_name_locker_v1(PSI_file_locker_state *state,
}
state->m_flags= flags;
- state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
+ state->m_file= NULL;
+ state->m_name= name;
+ state->m_class= klass;
state->m_operation= op;
return reinterpret_cast<PSI_file_locker*> (state);
}
@@ -2788,6 +2835,7 @@ get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
if (unlikely(pfs_file == NULL))
return NULL;
DBUG_ASSERT(pfs_file->m_class != NULL);
+ PFS_file_class *klass= pfs_file->m_class;
if (! pfs_file->m_enabled)
return NULL;
@@ -2825,7 +2873,7 @@ get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
wait->m_nesting_event_type= parent_event->m_event_type;
wait->m_thread= pfs_thread;
- wait->m_class= pfs_file->m_class;
+ wait->m_class= klass;
wait->m_timer_start= 0;
wait->m_timer_end= 0;
wait->m_object_instance_addr= pfs_file;
@@ -2856,6 +2904,8 @@ get_thread_file_stream_locker_v1(PSI_file_locker_state *state,
state->m_flags= flags;
state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
state->m_operation= op;
+ state->m_name= NULL;
+ state->m_class= klass;
return reinterpret_cast<PSI_file_locker*> (state);
}
@@ -2890,10 +2940,12 @@ get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
if (op == PSI_FILE_CLOSE)
file_handle_array[index]= NULL;
- DBUG_ASSERT(pfs_file->m_class != NULL);
if (! pfs_file->m_enabled)
return NULL;
+ DBUG_ASSERT(pfs_file->m_class != NULL);
+ PFS_file_class *klass= pfs_file->m_class;
+
register uint flags;
if (flag_thread_instrumentation)
@@ -2927,7 +2979,7 @@ get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
wait->m_nesting_event_type= parent_event->m_event_type;
wait->m_thread= pfs_thread;
- wait->m_class= pfs_file->m_class;
+ wait->m_class= klass;
wait->m_timer_start= 0;
wait->m_timer_end= 0;
wait->m_object_instance_addr= pfs_file;
@@ -2958,6 +3010,8 @@ get_thread_file_descriptor_locker_v1(PSI_file_locker_state *state,
state->m_flags= flags;
state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
state->m_operation= op;
+ state->m_name= NULL;
+ state->m_class= klass;
return reinterpret_cast<PSI_file_locker*> (state);
}
@@ -2991,14 +3045,6 @@ start_socket_wait_v1(PSI_socket_locker_state *state,
if (unlikely(pfs_thread == NULL))
return NULL;
-#ifdef LATER
- /*
- Needs refinement, because of KILL.
- */
- DBUG_ASSERT(pfs_thread ==
- my_pthread_getspecific_ptr(PFS_thread*, THR_PFS));
-#endif
-
if (!pfs_thread->m_enabled)
return NULL;
@@ -3112,22 +3158,15 @@ static void unlock_mutex_v1(PSI_mutex *mutex)
PFS_mutex::m_lock_stat is not exposed in user visible tables
currently, so there is no point spending time computing it.
*/
- PFS_thread *pfs_thread= reinterpret_cast<PFS_thread*> (thread);
- DBUG_ASSERT(pfs_thread != NULL);
-
- if (unlikely(! flag_events_waits_current))
- return;
- if (! pfs_mutex->m_class->m_enabled)
+ if (! pfs_mutex->m_enabled)
return;
- if (! pfs_thread->m_enabled)
+
+ if (! pfs_mutex->m_timed)
return;
- if (pfs_mutex->m_class->m_timed)
- {
- ulonglong locked_time;
- locked_time= get_timer_pico_value(wait_timer) - pfs_mutex->m_last_locked;
- aggregate_single_stat_chain(&pfs_mutex->m_lock_stat, locked_time);
- }
+ ulonglong locked_time;
+ locked_time= get_timer_pico_value(wait_timer) - pfs_mutex->m_last_locked;
+ pfs_mutex->m_mutex_stat.m_lock_stat.aggregate_value(locked_time);
#endif
}
@@ -3185,32 +3224,23 @@ static void unlock_rwlock_v1(PSI_rwlock *rwlock)
#ifdef LATER_WL2333
/* See WL#2333: SHOW ENGINE ... LOCK STATUS. */
- PFS_thread *pfs_thread= reinterpret_cast<PFS_thread*> (thread);
- DBUG_ASSERT(pfs_thread != NULL);
- if (unlikely(! flag_events_waits_current))
- return;
- if (! pfs_rwlock->m_class->m_enabled)
+ if (! pfs_rwlock->m_enabled)
return;
- if (! pfs_thread->m_enabled)
+
+ if (! pfs_rwlock->m_timed)
return;
ulonglong locked_time;
if (last_writer)
{
- if (pfs_rwlock->m_class->m_timed)
- {
- locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_written;
- aggregate_single_stat_chain(&pfs_rwlock->m_write_lock_stat, locked_time);
- }
+ locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_written;
+ pfs_rwlock->m_rwlock_stat.m_write_lock_stat.aggregate_value(locked_time);
}
else if (last_reader)
{
- if (pfs_rwlock->m_class->m_timed)
- {
- locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_read;
- aggregate_single_stat_chain(&pfs_rwlock->m_read_lock_stat, locked_time);
- }
+ locked_time= get_timer_pico_value(wait_timer) - pfs_rwlock->m_last_read;
+ pfs_rwlock->m_rwlock_stat.m_read_lock_stat.aggregate_value(locked_time);
}
#else
(void) last_reader;
@@ -3352,17 +3382,16 @@ static void end_idle_wait_v1(PSI_idle_locker* locker)
PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
PFS_single_stat *event_name_array;
event_name_array= thread->m_instr_class_waits_stats;
- uint index= global_idle_class.m_event_name_index;
if (flags & STATE_FLAG_TIMED)
{
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (timed) */
- event_name_array[index].aggregate_value(wait_time);
+ event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_value(wait_time);
}
else
{
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME (counted) */
- event_name_array[index].aggregate_counted();
+ event_name_array[GLOBAL_IDLE_EVENT_INDEX].aggregate_counted();
}
if (flags & STATE_FLAG_EVENT)
@@ -3379,6 +3408,17 @@ static void end_idle_wait_v1(PSI_idle_locker* locker)
thread->m_events_waits_current--;
}
}
+
+ if (flags & STATE_FLAG_TIMED)
+ {
+ /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (timed) */
+ global_idle_stat.aggregate_value(wait_time);
+ }
+ else
+ {
+ /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME (counted) */
+ global_idle_stat.aggregate_counted();
+ }
}
/**
@@ -3404,12 +3444,12 @@ static void end_mutex_wait_v1(PSI_mutex_locker* locker, int rc)
timer_end= state->m_timer();
wait_time= timer_end - state->m_timer_start;
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
- mutex->m_wait_stat.aggregate_value(wait_time);
+ mutex->m_mutex_stat.m_wait_stat.aggregate_value(wait_time);
}
else
{
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
- mutex->m_wait_stat.aggregate_counted();
+ mutex->m_mutex_stat.m_wait_stat.aggregate_counted();
}
if (likely(rc == 0))
@@ -3471,12 +3511,12 @@ static void end_rwlock_rdwait_v1(PSI_rwlock_locker* locker, int rc)
timer_end= state->m_timer();
wait_time= timer_end - state->m_timer_start;
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
- rwlock->m_wait_stat.aggregate_value(wait_time);
+ rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
}
else
{
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
- rwlock->m_wait_stat.aggregate_counted();
+ rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
}
if (rc == 0)
@@ -3551,12 +3591,12 @@ static void end_rwlock_wrwait_v1(PSI_rwlock_locker* locker, int rc)
timer_end= state->m_timer();
wait_time= timer_end - state->m_timer_start;
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
- rwlock->m_wait_stat.aggregate_value(wait_time);
+ rwlock->m_rwlock_stat.m_wait_stat.aggregate_value(wait_time);
}
else
{
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
- rwlock->m_wait_stat.aggregate_counted();
+ rwlock->m_rwlock_stat.m_wait_stat.aggregate_counted();
}
if (likely(rc == 0))
@@ -3622,12 +3662,12 @@ static void end_cond_wait_v1(PSI_cond_locker* locker, int rc)
timer_end= state->m_timer();
wait_time= timer_end - state->m_timer_start;
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (timed) */
- cond->m_wait_stat.aggregate_value(wait_time);
+ cond->m_cond_stat.m_wait_stat.aggregate_value(wait_time);
}
else
{
/* Aggregate to EVENTS_WAITS_SUMMARY_BY_INSTANCE (counted) */
- cond->m_wait_stat.aggregate_counted();
+ cond->m_cond_stat.m_wait_stat.aggregate_counted();
}
if (state->m_flags & STATE_FLAG_THREAD)
@@ -3682,23 +3722,27 @@ static void end_table_io_wait_v1(PSI_table_locker* locker)
DBUG_ASSERT(table != NULL);
PFS_single_stat *stat;
+ PFS_table_io_stat *table_io_stat;
DBUG_ASSERT((state->m_index < table->m_share->m_key_count) ||
- (state->m_index == MAX_KEY));
+ (state->m_index == MAX_INDEXES));
+
+ table_io_stat= & table->m_table_stat.m_index_stat[state->m_index];
+ table_io_stat->m_has_data= true;
switch (state->m_io_operation)
{
case PSI_TABLE_FETCH_ROW:
- stat= & table->m_table_stat.m_index_stat[state->m_index].m_fetch;
+ stat= & table_io_stat->m_fetch;
break;
case PSI_TABLE_WRITE_ROW:
- stat= & table->m_table_stat.m_index_stat[state->m_index].m_insert;
+ stat= & table_io_stat->m_insert;
break;
case PSI_TABLE_UPDATE_ROW:
- stat= & table->m_table_stat.m_index_stat[state->m_index].m_update;
+ stat= & table_io_stat->m_update;
break;
case PSI_TABLE_DELETE_ROW:
- stat= & table->m_table_stat.m_index_stat[state->m_index].m_delete;
+ stat= & table_io_stat->m_delete;
break;
default:
DBUG_ASSERT(false);
@@ -3719,22 +3763,40 @@ static void end_table_io_wait_v1(PSI_table_locker* locker)
stat->aggregate_counted();
}
- if (flags & STATE_FLAG_EVENT)
+ if (flags & STATE_FLAG_THREAD)
{
- DBUG_ASSERT(flags & STATE_FLAG_THREAD);
PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
DBUG_ASSERT(thread != NULL);
- PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
- DBUG_ASSERT(wait != NULL);
+ PFS_single_stat *event_name_array;
+ event_name_array= thread->m_instr_class_waits_stats;
- wait->m_timer_end= timer_end;
- wait->m_end_event_id= thread->m_event_id;
- if (flag_events_waits_history)
- insert_events_waits_history(thread, wait);
- if (flag_events_waits_history_long)
- insert_events_waits_history_long(wait);
- thread->m_events_waits_current--;
+ /*
+ Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
+ (for wait/io/table/sql/handler)
+ */
+ if (flags & STATE_FLAG_TIMED)
+ {
+ event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_value(wait_time);
+ }
+ else
+ {
+ event_name_array[GLOBAL_TABLE_IO_EVENT_INDEX].aggregate_counted();
+ }
+
+ if (flags & STATE_FLAG_EVENT)
+ {
+ PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
+ DBUG_ASSERT(wait != NULL);
+
+ wait->m_timer_end= timer_end;
+ wait->m_end_event_id= thread->m_event_id;
+ if (flag_events_waits_history)
+ insert_events_waits_history(thread, wait);
+ if (flag_events_waits_history_long)
+ insert_events_waits_history_long(wait);
+ thread->m_events_waits_current--;
+ }
}
table->m_has_io_stats= true;
@@ -3770,22 +3832,40 @@ static void end_table_lock_wait_v1(PSI_table_locker* locker)
stat->aggregate_counted();
}
- if (flags & STATE_FLAG_EVENT)
+ if (flags & STATE_FLAG_THREAD)
{
- DBUG_ASSERT(flags & STATE_FLAG_THREAD);
PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
DBUG_ASSERT(thread != NULL);
- PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
- DBUG_ASSERT(wait != NULL);
+ PFS_single_stat *event_name_array;
+ event_name_array= thread->m_instr_class_waits_stats;
- wait->m_timer_end= timer_end;
- wait->m_end_event_id= thread->m_event_id;
- if (flag_events_waits_history)
- insert_events_waits_history(thread, wait);
- if (flag_events_waits_history_long)
- insert_events_waits_history_long(wait);
- thread->m_events_waits_current--;
+ /*
+ Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
+ (for wait/lock/table/sql/handler)
+ */
+ if (flags & STATE_FLAG_TIMED)
+ {
+ event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_value(wait_time);
+ }
+ else
+ {
+ event_name_array[GLOBAL_TABLE_LOCK_EVENT_INDEX].aggregate_counted();
+ }
+
+ if (flags & STATE_FLAG_EVENT)
+ {
+ PFS_events_waits *wait= reinterpret_cast<PFS_events_waits*> (state->m_wait);
+ DBUG_ASSERT(wait != NULL);
+
+ wait->m_timer_end= timer_end;
+ wait->m_end_event_id= thread->m_event_id;
+ if (flag_events_waits_history)
+ insert_events_waits_history(thread, wait);
+ if (flag_events_waits_history_long)
+ insert_events_waits_history_long(wait);
+ thread->m_events_waits_current--;
+ }
}
table->m_has_lock_stats= true;
@@ -3803,25 +3883,50 @@ static void end_file_wait_v1(PSI_file_locker *locker,
Implementation of the file instrumentation interface.
@sa PSI_v1::start_file_open_wait.
*/
-static PSI_file* start_file_open_wait_v1(PSI_file_locker *locker,
- const char *src_file,
- uint src_line)
+static void start_file_open_wait_v1(PSI_file_locker *locker,
+ const char *src_file,
+ uint src_line)
{
- PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
- DBUG_ASSERT(state != NULL);
-
start_file_wait_v1(locker, 0, src_file, src_line);
- return state->m_file;
+ return;
}
/**
Implementation of the file instrumentation interface.
@sa PSI_v1::end_file_open_wait.
*/
-static void end_file_open_wait_v1(PSI_file_locker *locker)
+static PSI_file* end_file_open_wait_v1(PSI_file_locker *locker,
+ void *result)
{
+ PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
+ DBUG_ASSERT(state != NULL);
+
+ switch (state->m_operation)
+ {
+ case PSI_FILE_STAT:
+ break;
+ case PSI_FILE_STREAM_OPEN:
+ case PSI_FILE_CREATE:
+ if (result != NULL)
+ {
+ PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
+ PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
+ const char *name= state->m_name;
+ uint len= strlen(name);
+ PFS_file *pfs_file= find_or_create_file(thread, klass, name, len, true);
+ state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
+ }
+ break;
+ case PSI_FILE_OPEN:
+ default:
+ DBUG_ASSERT(false);
+ break;
+ }
+
end_file_wait_v1(locker, 0);
+
+ return state->m_file;
}
/**
@@ -3831,25 +3936,33 @@ static void end_file_open_wait_v1(PSI_file_locker *locker)
static void end_file_open_wait_and_bind_to_descriptor_v1
(PSI_file_locker *locker, File file)
{
+ PFS_file *pfs_file= NULL;
int index= (int) file;
PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
DBUG_ASSERT(state != NULL);
- end_file_wait_v1(locker, 0);
+ if (index >= 0)
+ {
+ PFS_file_class *klass= reinterpret_cast<PFS_file_class*> (state->m_class);
+ PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
+ const char *name= state->m_name;
+ uint len= strlen(name);
+ pfs_file= find_or_create_file(thread, klass, name, len, true);
+ state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
+ }
- PFS_file *pfs_file= reinterpret_cast<PFS_file*> (state->m_file);
- DBUG_ASSERT(pfs_file != NULL);
+ end_file_wait_v1(locker, 0);
if (likely(index >= 0))
{
if (likely(index < file_handle_max))
file_handle_array[index]= pfs_file;
else
+ {
+ if (pfs_file != NULL)
+ release_file(pfs_file);
file_handle_lost++;
- }
- else
- {
- release_file(pfs_file);
+ }
}
}
@@ -3896,7 +4009,7 @@ static void end_file_wait_v1(PSI_file_locker *locker,
PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
DBUG_ASSERT(state != NULL);
PFS_file *file= reinterpret_cast<PFS_file *> (state->m_file);
- DBUG_ASSERT(file != NULL);
+ PFS_file_class *klass= reinterpret_cast<PFS_file_class *> (state->m_class);
PFS_thread *thread= reinterpret_cast<PFS_thread *> (state->m_thread);
ulonglong timer_end= 0;
@@ -3905,15 +4018,26 @@ static void end_file_wait_v1(PSI_file_locker *locker,
register uint flags= state->m_flags;
size_t bytes= ((int)byte_count > -1 ? byte_count : 0);
+ PFS_file_stat *file_stat;
+
+ if (file != NULL)
+ {
+ file_stat= & file->m_file_stat;
+ }
+ else
+ {
+ file_stat= & klass->m_file_stat;
+ }
+
switch (state->m_operation)
{
/* Group read operations */
case PSI_FILE_READ:
- byte_stat= &file->m_file_stat.m_io_stat.m_read;
+ byte_stat= &file_stat->m_io_stat.m_read;
break;
/* Group write operations */
case PSI_FILE_WRITE:
- byte_stat= &file->m_file_stat.m_io_stat.m_write;
+ byte_stat= &file_stat->m_io_stat.m_write;
break;
/* Group remaining operations as miscellaneous */
case PSI_FILE_CREATE:
@@ -3931,7 +4055,7 @@ static void end_file_wait_v1(PSI_file_locker *locker,
case PSI_FILE_SYNC:
case PSI_FILE_STAT:
case PSI_FILE_CLOSE:
- byte_stat= &file->m_file_stat.m_io_stat.m_misc;
+ byte_stat= &file_stat->m_io_stat.m_misc;
break;
default:
DBUG_ASSERT(false);
@@ -3959,7 +4083,7 @@ static void end_file_wait_v1(PSI_file_locker *locker,
PFS_single_stat *event_name_array;
event_name_array= thread->m_instr_class_waits_stats;
- uint index= file->m_class->m_event_name_index;
+ uint index= klass->m_event_name_index;
if (flags & STATE_FLAG_TIMED)
{
@@ -3980,6 +4104,9 @@ static void end_file_wait_v1(PSI_file_locker *locker,
wait->m_timer_end= timer_end;
wait->m_number_of_bytes= bytes;
wait->m_end_event_id= thread->m_event_id;
+ wait->m_object_instance_addr= file;
+ wait->m_weak_file= file;
+ wait->m_weak_version= (file ? file->get_version() : 0);
if (flag_events_waits_history)
insert_events_waits_history(thread, wait);
@@ -3988,22 +4115,79 @@ static void end_file_wait_v1(PSI_file_locker *locker,
thread->m_events_waits_current--;
}
}
+}
- /* Release or destroy the file if necessary */
- switch(state->m_operation)
+/**
+ Implementation of the file instrumentation interface.
+ @sa PSI_v1::start_file_close_wait.
+*/
+static void start_file_close_wait_v1(PSI_file_locker *locker,
+ const char *src_file,
+ uint src_line)
+{
+ PFS_thread *thread;
+ const char *name;
+ uint len;
+ PFS_file *pfs_file;
+ PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
+ DBUG_ASSERT(state != NULL);
+
+ switch (state->m_operation)
{
- case PSI_FILE_CLOSE:
- case PSI_FILE_STREAM_CLOSE:
- case PSI_FILE_STAT:
- release_file(file);
- break;
case PSI_FILE_DELETE:
- DBUG_ASSERT(thread != NULL);
- destroy_file(thread, file);
+ thread= reinterpret_cast<PFS_thread*> (state->m_thread);
+ name= state->m_name;
+ len= strlen(name);
+ pfs_file= find_or_create_file(thread, NULL, name, len, false);
+ state->m_file= reinterpret_cast<PSI_file*> (pfs_file);
+ break;
+ case PSI_FILE_STREAM_CLOSE:
+ case PSI_FILE_CLOSE:
break;
default:
+ DBUG_ASSERT(false);
break;
}
+
+ start_file_wait_v1(locker, 0, src_file, src_line);
+
+ return;
+}
+
+/**
+ Implementation of the file instrumentation interface.
+ @sa PSI_v1::end_file_close_wait.
+*/
+static void end_file_close_wait_v1(PSI_file_locker *locker, int rc)
+{
+ PSI_file_locker_state *state= reinterpret_cast<PSI_file_locker_state*> (locker);
+ DBUG_ASSERT(state != NULL);
+
+ end_file_wait_v1(locker, 0);
+
+ if (rc == 0)
+ {
+ PFS_thread *thread= reinterpret_cast<PFS_thread*> (state->m_thread);
+ PFS_file *file= reinterpret_cast<PFS_file*> (state->m_file);
+
+ /* Release or destroy the file if necessary */
+ switch(state->m_operation)
+ {
+ case PSI_FILE_CLOSE:
+ case PSI_FILE_STREAM_CLOSE:
+ if (file != NULL)
+ release_file(file);
+ break;
+ case PSI_FILE_DELETE:
+ if (file != NULL)
+ destroy_file(thread, file);
+ break;
+ default:
+ DBUG_ASSERT(false);
+ break;
+ }
+ }
+ return;
}
static void start_stage_v1(PSI_stage_key key, const char *src_file, int src_line)
@@ -4165,7 +4349,8 @@ static void end_stage_v1()
static PSI_statement_locker*
get_thread_statement_locker_v1(PSI_statement_locker_state *state,
- PSI_statement_key key)
+ PSI_statement_key key,
+ const void *charset)
{
DBUG_ASSERT(state != NULL);
if (! flag_global_instrumentation)
@@ -4262,9 +4447,11 @@ get_thread_statement_locker_v1(PSI_statement_locker_state *state,
if (flag_statements_digest)
{
+ const CHARSET_INFO *cs= static_cast <const CHARSET_INFO*> (charset);
flags|= STATE_FLAG_DIGEST;
state->m_digest_state.m_last_id_index= 0;
digest_reset(& state->m_digest_state.m_digest_storage);
+ state->m_digest_state.m_digest_storage.m_charset_number= cs->number;
}
state->m_discarded= false;
@@ -4288,6 +4475,8 @@ get_thread_statement_locker_v1(PSI_statement_locker_state *state,
state->m_no_index_used= 0;
state->m_no_good_index_used= 0;
+ state->m_schema_name_length= 0;
+
return reinterpret_cast<PSI_statement_locker*> (state);
}
@@ -4352,6 +4541,13 @@ static void start_statement_v1(PSI_statement_locker *locker,
state->m_timer_start= timer_start;
}
+ compile_time_assert(PSI_SCHEMA_NAME_LEN == NAME_LEN);
+ DBUG_ASSERT(db_len <= sizeof(state->m_schema_name));
+
+ if (db_len > 0)
+ memcpy(state->m_schema_name, db, db_len);
+ state->m_schema_name_length= db_len;
+
if (flags & STATE_FLAG_EVENT)
{
PFS_events_statements *pfs= reinterpret_cast<PFS_events_statements*> (state->m_statement);
@@ -4563,11 +4759,10 @@ static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
if (flags & STATE_FLAG_DIGEST)
{
digest_storage= &state->m_digest_state.m_digest_storage;
-
- /*
- Populate PFS_statements_digest_stat with computed digest information.
- */
- digest_stat= find_or_create_digest(thread, digest_storage);
+ /* Populate PFS_statements_digest_stat with computed digest information.*/
+ digest_stat= find_or_create_digest(thread, digest_storage,
+ state->m_schema_name,
+ state->m_schema_name_length);
}
if (flags & STATE_FLAG_EVENT)
@@ -4633,11 +4828,10 @@ static void end_statement_v1(PSI_statement_locker *locker, void *stmt_da)
{
/* Set digest stat. */
digest_storage= &state->m_digest_state.m_digest_storage;
-
- /*
- Populate PFS_statements_digest_stat with computed digest information.
- */
- digest_stat= find_or_create_digest(thread, digest_storage);
+ /* Populate statements_digest_stat with computed digest information. */
+ digest_stat= find_or_create_digest(thread, digest_storage,
+ state->m_schema_name,
+ state->m_schema_name_length);
}
}
@@ -4869,6 +5063,42 @@ static void set_socket_thread_owner_v1(PSI_socket *socket)
pfs_socket->m_thread_owner= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
}
+
+/**
+ Implementation of the thread attribute connection interface
+ @sa PSI_v1::set_thread_connect_attr.
+*/
+static int set_thread_connect_attrs_v1(const char *buffer, uint length,
+ const void *from_cs)
+{
+
+ PFS_thread *thd= my_pthread_getspecific_ptr(PFS_thread*, THR_PFS);
+
+ DBUG_ASSERT(buffer != NULL);
+
+ if (likely(thd != NULL) && session_connect_attrs_size_per_thread > 0)
+ {
+ /* copy from the input buffer as much as we can fit */
+ uint copy_size= (uint)(length < session_connect_attrs_size_per_thread ?
+ length : session_connect_attrs_size_per_thread);
+ thd->m_lock.allocated_to_dirty();
+ memcpy(thd->m_session_connect_attrs, buffer, copy_size);
+ thd->m_session_connect_attrs_length= copy_size;
+ thd->m_session_connect_attrs_cs= (const CHARSET_INFO *) from_cs;
+ thd->m_lock.dirty_to_allocated();
+
+ if (copy_size == length)
+ return 0;
+ else
+ {
+ session_connect_attrs_lost++;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+
/**
Implementation of the instrumentation interface.
@sa PSI_v1.
@@ -4939,6 +5169,8 @@ PSI_v1 PFS_v1=
end_file_open_wait_and_bind_to_descriptor_v1,
start_file_wait_v1,
end_file_wait_v1,
+ start_file_close_wait_v1,
+ end_file_close_wait_v1,
start_stage_v1,
end_stage_v1,
get_thread_statement_locker_v1,
@@ -4968,7 +5200,8 @@ PSI_v1 PFS_v1=
set_socket_info_v1,
set_socket_thread_owner_v1,
pfs_digest_start_v1,
- pfs_digest_add_token_v1
+ pfs_digest_add_token_v1,
+ set_thread_connect_attrs_v1,
};
static void* get_interface(int version)
diff --git a/storage/perfschema/pfs_account.cc b/storage/perfschema/pfs_account.cc
index 18716478681..9221fc3b991 100644
--- a/storage/perfschema/pfs_account.cc
+++ b/storage/perfschema/pfs_account.cc
@@ -45,7 +45,7 @@ static PFS_single_stat *account_instr_class_waits_array= NULL;
static PFS_stage_stat *account_instr_class_stages_array= NULL;
static PFS_statement_stat *account_instr_class_statements_array= NULL;
-static LF_HASH account_hash;
+LF_HASH account_hash;
static bool account_hash_inited= false;
/**
@@ -149,10 +149,11 @@ C_MODE_END
*/
int init_account_hash(void)
{
- if (! account_hash_inited)
+ if ((! account_hash_inited) && (account_max > 0))
{
lf_hash_init(&account_hash, sizeof(PFS_account*), LF_HASH_UNIQUE,
0, 0, account_hash_get_key, &my_charset_bin);
+ account_hash.size= account_max;
account_hash_inited= true;
}
return 0;
diff --git a/storage/perfschema/pfs_account.h b/storage/perfschema/pfs_account.h
index 77a9dfab7ba..1ac379e0fc9 100644
--- a/storage/perfschema/pfs_account.h
+++ b/storage/perfschema/pfs_account.h
@@ -46,7 +46,7 @@ struct PFS_account_key
uint m_key_length;
};
-struct PFS_account : PFS_connection_slice
+struct PFS_ALIGNED PFS_account : PFS_connection_slice
{
public:
inline void init_refcount(void)
@@ -115,6 +115,8 @@ extern ulong account_lost;
extern PFS_account *account_array;
+extern LF_HASH account_hash;
+
/** @} */
#endif
diff --git a/storage/perfschema/pfs_atomic.h b/storage/perfschema/pfs_atomic.h
index ffb4c24ecbf..61b8c2b2804 100644
--- a/storage/perfschema/pfs_atomic.h
+++ b/storage/perfschema/pfs_atomic.h
@@ -43,6 +43,16 @@ public:
}
/** Atomic load. */
+ static inline int64 load_64(volatile int64 *ptr)
+ {
+ int64 result;
+ rdlock(ptr);
+ result= my_atomic_load64(ptr);
+ rdunlock(ptr);
+ return result;
+ }
+
+ /** Atomic load. */
static inline uint32 load_u32(volatile uint32 *ptr)
{
uint32 result;
@@ -52,6 +62,16 @@ public:
return result;
}
+ /** Atomic load. */
+ static inline uint64 load_u64(volatile uint64 *ptr)
+ {
+ uint64 result;
+ rdlock(ptr);
+ result= (uint64) my_atomic_load64((int64*) ptr);
+ rdunlock(ptr);
+ return result;
+ }
+
/** Atomic store. */
static inline void store_32(volatile int32 *ptr, int32 value)
{
@@ -61,6 +81,14 @@ public:
}
/** Atomic store. */
+ static inline void store_64(volatile int64 *ptr, int64 value)
+ {
+ wrlock(ptr);
+ my_atomic_store64(ptr, value);
+ wrunlock(ptr);
+ }
+
+ /** Atomic store. */
static inline void store_u32(volatile uint32 *ptr, uint32 value)
{
wrlock(ptr);
@@ -68,6 +96,14 @@ public:
wrunlock(ptr);
}
+ /** Atomic store. */
+ static inline void store_u64(volatile uint64 *ptr, uint64 value)
+ {
+ wrlock(ptr);
+ my_atomic_store64((int64*) ptr, (int64) value);
+ wrunlock(ptr);
+ }
+
/** Atomic add. */
static inline int32 add_32(volatile int32 *ptr, int32 value)
{
@@ -79,6 +115,16 @@ public:
}
/** Atomic add. */
+ static inline int64 add_64(volatile int64 *ptr, int64 value)
+ {
+ int64 result;
+ wrlock(ptr);
+ result= my_atomic_add64(ptr, value);
+ wrunlock(ptr);
+ return result;
+ }
+
+ /** Atomic add. */
static inline uint32 add_u32(volatile uint32 *ptr, uint32 value)
{
uint32 result;
@@ -88,6 +134,16 @@ public:
return result;
}
+ /** Atomic add. */
+ static inline uint64 add_u64(volatile uint64 *ptr, uint64 value)
+ {
+ uint64 result;
+ wrlock(ptr);
+ result= (uint64) my_atomic_add64((int64*) ptr, (int64) value);
+ wrunlock(ptr);
+ return result;
+ }
+
/** Atomic compare and swap. */
static inline bool cas_32(volatile int32 *ptr, int32 *old_value,
int32 new_value)
@@ -100,6 +156,17 @@ public:
}
/** Atomic compare and swap. */
+ static inline bool cas_64(volatile int64 *ptr, int64 *old_value,
+ int64 new_value)
+ {
+ bool result;
+ wrlock(ptr);
+ result= my_atomic_cas64(ptr, old_value, new_value);
+ wrunlock(ptr);
+ return result;
+ }
+
+ /** Atomic compare and swap. */
static inline bool cas_u32(volatile uint32 *ptr, uint32 *old_value,
uint32 new_value)
{
@@ -111,6 +178,18 @@ public:
return result;
}
+ /** Atomic compare and swap. */
+ static inline bool cas_u64(volatile uint64 *ptr, uint64 *old_value,
+ uint64 new_value)
+ {
+ bool result;
+ wrlock(ptr);
+ result= my_atomic_cas64((int64*) ptr, (int64*) old_value,
+ (uint64) new_value);
+ wrunlock(ptr);
+ return result;
+ }
+
private:
static my_atomic_rwlock_t m_rwlock_array[256];
diff --git a/storage/perfschema/pfs_autosize.cc b/storage/perfschema/pfs_autosize.cc
new file mode 100644
index 00000000000..38bd36d8321
--- /dev/null
+++ b/storage/perfschema/pfs_autosize.cc
@@ -0,0 +1,366 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+/**
+ @file storage/perfschema/pfs_autosize.cc
+ Private interface for the server (implementation).
+*/
+
+#include "my_global.h"
+#include "sql_const.h"
+#include "pfs_server.h"
+
+#include <algorithm>
+using std::min;
+using std::max;
+
+static const ulong fixed_mutex_instances= 500;
+static const ulong fixed_rwlock_instances= 200;
+static const ulong fixed_cond_instances= 50;
+static const ulong fixed_file_instances= 200;
+static const ulong fixed_socket_instances= 10;
+static const ulong fixed_thread_instances= 50;
+
+static const ulong mutex_per_connection= 3;
+static const ulong rwlock_per_connection= 1;
+static const ulong cond_per_connection= 2;
+static const ulong file_per_connection= 0;
+static const ulong socket_per_connection= 1;
+static const ulong thread_per_connection= 1;
+
+static const ulong mutex_per_handle= 0;
+static const ulong rwlock_per_handle= 0;
+static const ulong cond_per_handle= 0;
+static const ulong file_per_handle= 0;
+static const ulong socket_per_handle= 0;
+static const ulong thread_per_handle= 0;
+
+static const ulong mutex_per_share= 5;
+static const ulong rwlock_per_share= 3;
+static const ulong cond_per_share= 1;
+static const ulong file_per_share= 3;
+static const ulong socket_per_share= 0;
+static const ulong thread_per_share= 0;
+
+struct PFS_sizing_data
+{
+ /** Default value for @c PFS_param.m_account_sizing. */
+ ulong m_account_sizing;
+ /** Default value for @c PFS_param.m_user_sizing. */
+ ulong m_user_sizing;
+ /** Default value for @c PFS_param.m_host_sizing. */
+ ulong m_host_sizing;
+
+ /** Default value for @c PFS_param.m_events_waits_history_sizing. */
+ ulong m_events_waits_history_sizing;
+ /** Default value for @c PFS_param.m_events_waits_history_long_sizing. */
+ ulong m_events_waits_history_long_sizing;
+ /** Default value for @c PFS_param.m_events_stages_history_sizing. */
+ ulong m_events_stages_history_sizing;
+ /** Default value for @c PFS_param.m_events_stages_history_long_sizing. */
+ ulong m_events_stages_history_long_sizing;
+ /** Default value for @c PFS_param.m_events_statements_history_sizing. */
+ ulong m_events_statements_history_sizing;
+ /** Default value for @c PFS_param.m_events_statements_history_long_sizing. */
+ ulong m_events_statements_history_long_sizing;
+ /** Default value for @c PFS_param.m_digest_sizing. */
+ ulong m_digest_sizing;
+ /** Default value for @c PFS_param.m_session_connect_attrs_sizing. */
+ ulong m_session_connect_attrs_sizing;
+
+ /**
+ Minimum number of tables to keep statistics for.
+ On small deployments, all the tables can fit into the table definition cache,
+ and this value can be 0.
+ On big deployments, the table definition cache is only a subset of all the tables
+ in the database, which are accounted for here.
+ */
+ ulong m_min_number_of_tables;
+
+ /**
+ Load factor for 'volatile' objects (mutexes, table handles, ...).
+ Instrumented objects that:
+ - use little memory
+ - are created/destroyed very frequently
+ should be stored in a low density (mostly empty) memory buffer,
+ to optimize for speed.
+ */
+ float m_load_factor_volatile;
+ /**
+ Load factor for 'normal' objects (files).
+ Instrumented objects that:
+ - use a medium amount of memory
+ - are created/destroyed
+ should be stored in a medium density memory buffer,
+ as a trade off between space and speed.
+ */
+ float m_load_factor_normal;
+ /**
+ Load factor for 'static' objects (table shares).
+ Instrumented objects that:
+ - use a lot of memory
+ - are created/destroyed very rarely
+ can be stored in a high density (mostly packed) memory buffer,
+ to optimize for space.
+ */
+ float m_load_factor_static;
+};
+
+PFS_sizing_data small_data=
+{
+ /* Account / user / host */
+ 10, 5, 20,
+ /* History sizes */
+ 5, 100, 5, 100, 5, 100,
+ /* Digests */
+ 1000,
+ /* Session connect attrs. */
+ 512,
+ /* Min tables */
+ 200,
+ /* Load factors */
+ 0.90, 0.90, 0.90
+};
+
+PFS_sizing_data medium_data=
+{
+ /* Account / user / host */
+ 100, 100, 100,
+ /* History sizes */
+ 10, 1000, 10, 1000, 10, 1000,
+ /* Digests */
+ 5000,
+ /* Session connect attrs. */
+ 512,
+ /* Min tables */
+ 500,
+ /* Load factors */
+ 0.70, 0.80, 0.90
+};
+
+PFS_sizing_data large_data=
+{
+ /* Account / user / host */
+ 100, 100, 100,
+ /* History sizes */
+ 10, 10000, 10, 10000, 10, 10000,
+ /* Digests */
+ 10000,
+ /* Session connect attrs. */
+ 512,
+ /* Min tables */
+ 10000,
+ /* Load factors */
+ 0.50, 0.65, 0.80
+};
+
+static inline ulong apply_load_factor(ulong raw_value, float factor)
+{
+ float value = ((float) raw_value) / factor;
+ return (ulong) ceil(value);
+}
+
+PFS_sizing_data *estimate_hints(PFS_global_param *param)
+{
+ if ((param->m_hints.m_max_connections <= MAX_CONNECTIONS_DEFAULT) &&
+ (param->m_hints.m_table_definition_cache <= TABLE_DEF_CACHE_DEFAULT) &&
+ (param->m_hints.m_table_open_cache <= TABLE_OPEN_CACHE_DEFAULT))
+ {
+ /* The my.cnf used is either unchanged, or lower than factory defaults. */
+ return & small_data;
+ }
+
+ if ((param->m_hints.m_max_connections <= MAX_CONNECTIONS_DEFAULT * 2) &&
+ (param->m_hints.m_table_definition_cache <= TABLE_DEF_CACHE_DEFAULT * 2) &&
+ (param->m_hints.m_table_open_cache <= TABLE_OPEN_CACHE_DEFAULT * 2))
+ {
+ /* Some defaults have been increased, to "moderate" values. */
+ return & medium_data;
+ }
+
+ /* Looks like a server in production. */
+ return & large_data;
+}
+
+static void apply_heuristic(PFS_global_param *p, PFS_sizing_data *h)
+{
+ ulong count;
+ ulong con = p->m_hints.m_max_connections;
+ ulong handle = p->m_hints.m_table_open_cache;
+ ulong share = p->m_hints.m_table_definition_cache;
+ ulong file = p->m_hints.m_open_files_limit;
+
+ if (p->m_table_sizing < 0)
+ {
+ count= handle;
+
+ p->m_table_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+ }
+
+ if (p->m_table_share_sizing < 0)
+ {
+ count= share;
+
+ count= max<ulong>(count, h->m_min_number_of_tables);
+ p->m_table_share_sizing= apply_load_factor(count, h->m_load_factor_static);
+ }
+
+ if (p->m_account_sizing < 0)
+ {
+ p->m_account_sizing= h->m_account_sizing;
+ }
+
+ if (p->m_user_sizing < 0)
+ {
+ p->m_user_sizing= h->m_user_sizing;
+ }
+
+ if (p->m_host_sizing < 0)
+ {
+ p->m_host_sizing= h->m_host_sizing;
+ }
+
+ if (p->m_events_waits_history_sizing < 0)
+ {
+ p->m_events_waits_history_sizing= h->m_events_waits_history_sizing;
+ }
+
+ if (p->m_events_waits_history_long_sizing < 0)
+ {
+ p->m_events_waits_history_long_sizing= h->m_events_waits_history_long_sizing;
+ }
+
+ if (p->m_events_stages_history_sizing < 0)
+ {
+ p->m_events_stages_history_sizing= h->m_events_stages_history_sizing;
+ }
+
+ if (p->m_events_stages_history_long_sizing < 0)
+ {
+ p->m_events_stages_history_long_sizing= h->m_events_stages_history_long_sizing;
+ }
+
+ if (p->m_events_statements_history_sizing < 0)
+ {
+ p->m_events_statements_history_sizing= h->m_events_statements_history_sizing;
+ }
+
+ if (p->m_events_statements_history_long_sizing < 0)
+ {
+ p->m_events_statements_history_long_sizing= h->m_events_statements_history_long_sizing;
+ }
+
+ if (p->m_digest_sizing < 0)
+ {
+ p->m_digest_sizing= h->m_digest_sizing;
+ }
+
+ if (p->m_session_connect_attrs_sizing < 0)
+ {
+ p->m_session_connect_attrs_sizing= h->m_session_connect_attrs_sizing;
+ }
+
+ if (p->m_mutex_sizing < 0)
+ {
+ count= fixed_mutex_instances
+ + con * mutex_per_connection
+ + handle * mutex_per_handle
+ + share * mutex_per_share;
+
+ p->m_mutex_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+ }
+
+ if (p->m_rwlock_sizing < 0)
+ {
+ count= fixed_rwlock_instances
+ + con * rwlock_per_connection
+ + handle * rwlock_per_handle
+ + share * rwlock_per_share;
+
+ p->m_rwlock_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+ }
+
+ if (p->m_cond_sizing < 0)
+ {
+ ulong count;
+ count= fixed_cond_instances
+ + con * cond_per_connection
+ + handle * cond_per_handle
+ + share * cond_per_share;
+
+ p->m_cond_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+ }
+
+ if (p->m_file_sizing < 0)
+ {
+ count= fixed_file_instances
+ + con * file_per_connection
+ + handle * file_per_handle
+ + share * file_per_share;
+
+ count= max<ulong>(count, file);
+ p->m_file_sizing= apply_load_factor(count, h->m_load_factor_normal);
+ }
+
+ if (p->m_socket_sizing < 0)
+ {
+ count= fixed_socket_instances
+ + con * socket_per_connection
+ + handle * socket_per_handle
+ + share * socket_per_share;
+
+ p->m_socket_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+ }
+
+ if (p->m_thread_sizing < 0)
+ {
+ count= fixed_thread_instances
+ + con * thread_per_connection
+ + handle * thread_per_handle
+ + share * thread_per_share;
+
+ p->m_thread_sizing= apply_load_factor(count, h->m_load_factor_volatile);
+ }
+}
+
+void pfs_automated_sizing(PFS_global_param *param)
+{
+ PFS_sizing_data *heuristic;
+ heuristic= estimate_hints(param);
+ apply_heuristic(param, heuristic);
+
+ DBUG_ASSERT(param->m_account_sizing >= 0);
+ DBUG_ASSERT(param->m_digest_sizing >= 0);
+ DBUG_ASSERT(param->m_host_sizing >= 0);
+ DBUG_ASSERT(param->m_user_sizing >= 0);
+
+ DBUG_ASSERT(param->m_events_waits_history_sizing >= 0);
+ DBUG_ASSERT(param->m_events_waits_history_long_sizing >= 0);
+ DBUG_ASSERT(param->m_events_stages_history_sizing >= 0);
+ DBUG_ASSERT(param->m_events_stages_history_long_sizing >= 0);
+ DBUG_ASSERT(param->m_events_statements_history_sizing >= 0);
+ DBUG_ASSERT(param->m_events_statements_history_long_sizing >= 0);
+ DBUG_ASSERT(param->m_session_connect_attrs_sizing >= 0);
+
+ DBUG_ASSERT(param->m_mutex_sizing >= 0);
+ DBUG_ASSERT(param->m_rwlock_sizing >= 0);
+ DBUG_ASSERT(param->m_cond_sizing >= 0);
+ DBUG_ASSERT(param->m_file_sizing >= 0);
+ DBUG_ASSERT(param->m_socket_sizing >= 0);
+ DBUG_ASSERT(param->m_thread_sizing >= 0);
+ DBUG_ASSERT(param->m_table_sizing >= 0);
+ DBUG_ASSERT(param->m_table_share_sizing >= 0);
+}
+
diff --git a/storage/perfschema/pfs_digest.cc b/storage/perfschema/pfs_digest.cc
index 92c27b2e85f..c5df64d9243 100644
--- a/storage/perfschema/pfs_digest.cc
+++ b/storage/perfschema/pfs_digest.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -30,6 +30,7 @@
#include "table_helper.h"
#include "my_md5.h"
#include "sql_lex.h"
+#include "sql_string.h"
#include <string.h>
/* Generated code */
@@ -58,7 +59,6 @@
ulong digest_max= 0;
ulong digest_lost= 0;
-
/** EVENTS_STATEMENTS_HISTORY_LONG circular buffer. */
PFS_statements_digest_stat *statements_digest_stat_array= NULL;
/** Consumer flag for table EVENTS_STATEMENTS_SUMMARY_BY_DIGEST. */
@@ -69,7 +69,7 @@ bool flag_statements_digest= true;
*/
volatile uint32 digest_index= 1;
-static LF_HASH digest_hash;
+LF_HASH digest_hash;
static bool digest_hash_inited= false;
/**
@@ -123,8 +123,8 @@ static uchar *digest_hash_get_key(const uchar *entry, size_t *length,
DBUG_ASSERT(typed_entry != NULL);
digest= *typed_entry;
DBUG_ASSERT(digest != NULL);
- *length= PFS_MD5_SIZE;
- result= digest->m_digest_hash.m_md5;
+ *length= sizeof (PFS_digest_key);
+ result= & digest->m_digest_key;
return const_cast<uchar*> (reinterpret_cast<const uchar*> (result));
}
C_MODE_END
@@ -136,11 +136,12 @@ C_MODE_END
*/
int init_digest_hash(void)
{
- if (! digest_hash_inited)
+ if ((! digest_hash_inited) && (digest_max > 0))
{
lf_hash_init(&digest_hash, sizeof(PFS_statements_digest_stat*),
LF_HASH_UNIQUE, 0, 0, digest_hash_get_key,
&my_charset_bin);
+ digest_hash.size= digest_max;
digest_hash_inited= true;
}
return 0;
@@ -167,8 +168,10 @@ static LF_PINS* get_digest_hash_pins(PFS_thread *thread)
}
PFS_statement_stat*
-find_or_create_digest(PFS_thread* thread,
- PSI_digest_storage* digest_storage)
+find_or_create_digest(PFS_thread *thread,
+ PSI_digest_storage *digest_storage,
+ const char *schema_name,
+ uint schema_name_length)
{
if (statements_digest_stat_array == NULL)
return NULL;
@@ -180,13 +183,21 @@ find_or_create_digest(PFS_thread* thread,
if (unlikely(pins == NULL))
return NULL;
+ /*
+ Note: the LF_HASH key is a block of memory,
+ make sure to clean unused bytes,
+ so that memcmp() can compare keys.
+ */
+ PFS_digest_key hash_key;
+ memset(& hash_key, 0, sizeof(hash_key));
/* Compute MD5 Hash of the tokens received. */
- PFS_digest_hash md5;
- compute_md5_hash((char *) md5.m_md5,
+ compute_md5_hash((char *) hash_key.m_md5,
(char *) digest_storage->m_token_array,
digest_storage->m_byte_count);
-
- unsigned char* hash_key= md5.m_md5;
+ /* Add the current schema to the key */
+ hash_key.m_schema_name_length= schema_name_length;
+ if (schema_name_length > 0)
+ memcpy(hash_key.m_schema_name, schema_name, schema_name_length);
int res;
ulong safe_index;
@@ -202,7 +213,7 @@ search:
/* Lookup LF_HASH using this new key. */
entry= reinterpret_cast<PFS_statements_digest_stat**>
(lf_hash_search(&digest_hash, pins,
- hash_key, PFS_MD5_SIZE));
+ &hash_key, sizeof(PFS_digest_key)));
if (entry && (entry != MY_ERRPTR))
{
@@ -244,7 +255,7 @@ search:
pfs= &statements_digest_stat_array[safe_index];
/* Copy digest hash/LF Hash search key. */
- memcpy(pfs->m_digest_hash.m_md5, md5.m_md5, PFS_MD5_SIZE);
+ memcpy(& pfs->m_digest_key, &hash_key, sizeof(PFS_digest_key));
/*
Copy digest storage to statement_digest_stat_array so that it could be
@@ -278,7 +289,7 @@ search:
return NULL;
}
-void purge_digest(PFS_thread* thread, unsigned char* hash_key)
+void purge_digest(PFS_thread* thread, PFS_digest_key *hash_key)
{
LF_PINS *pins= get_digest_hash_pins(thread);
if (unlikely(pins == NULL))
@@ -289,12 +300,12 @@ void purge_digest(PFS_thread* thread, unsigned char* hash_key)
/* Lookup LF_HASH using this new key. */
entry= reinterpret_cast<PFS_statements_digest_stat**>
(lf_hash_search(&digest_hash, pins,
- hash_key, PFS_MD5_SIZE));
+ hash_key, sizeof(PFS_digest_key)));
if (entry && (entry != MY_ERRPTR))
- {
+ {
lf_hash_delete(&digest_hash, pins,
- hash_key, PFS_MD5_SIZE);
+ hash_key, sizeof(PFS_digest_key));
}
lf_hash_search_unpin(pins);
return;
@@ -313,7 +324,7 @@ void PFS_statements_digest_stat::reset_index(PFS_thread *thread)
/* Only remove entries that exists in the HASH index. */
if (m_digest_storage.m_byte_count > 0)
{
- purge_digest(thread, m_digest_hash.m_md5);
+ purge_digest(thread, & m_digest_key);
}
}
@@ -347,98 +358,130 @@ void reset_esms_by_digest()
*/
void get_digest_text(char* digest_text, PSI_digest_storage* digest_storage)
{
+ DBUG_ASSERT(digest_storage != NULL);
bool truncated= false;
int byte_count= digest_storage->m_byte_count;
- int need_bytes;
+ int bytes_needed= 0;
uint tok= 0;
- char *id_string;
- int id_length;
int current_byte= 0;
lex_token_string *tok_data;
/* -4 is to make sure extra space for '...' and a '\0' at the end. */
- int available_bytes_to_write= COL_DIGEST_TEXT_SIZE - 4;
+ int bytes_available= COL_DIGEST_TEXT_SIZE - 4;
+
+ /* Convert text to utf8 */
+ const CHARSET_INFO *from_cs= get_charset(digest_storage->m_charset_number, MYF(0));
+ const CHARSET_INFO *to_cs= &my_charset_utf8_bin;
+
+ if (from_cs == NULL)
+ {
+ /*
+ Can happen, as we do dirty reads on digest_storage,
+ which can be written to in another thread.
+ */
+ *digest_text= '\0';
+ return;
+ }
+
+ /*
+ Max converted size is number of characters * max multibyte length of the
+ target charset, which is 4 for UTF8.
+ */
+ const uint max_converted_size= PSI_MAX_DIGEST_STORAGE_SIZE * 4;
+ char id_buffer[max_converted_size];
+ char *id_string;
+ int id_length;
+ bool convert_text= !my_charset_same(from_cs, to_cs);
DBUG_ASSERT(byte_count <= PSI_MAX_DIGEST_STORAGE_SIZE);
while ((current_byte < byte_count) &&
- (available_bytes_to_write > 0) &&
- (! truncated))
+ (bytes_available > 0) &&
+ !truncated)
{
current_byte= read_token(digest_storage, current_byte, &tok);
- tok_data= & lex_token_array[tok];
+ tok_data= &lex_token_array[tok];
switch (tok)
{
/* All identifiers are printed with their name. */
case IDENT:
- current_byte= read_identifier(digest_storage, current_byte,
- & id_string, & id_length);
- need_bytes= id_length + 1; /* <id> space */
- if (need_bytes <= available_bytes_to_write)
+ case IDENT_QUOTED:
{
- if (id_length > 0)
+ char *id_ptr;
+ int id_len;
+ uint err_cs= 0;
+
+ /* Get the next identifier from the storage buffer. */
+ current_byte= read_identifier(digest_storage, current_byte,
+ &id_ptr, &id_len);
+ if (convert_text)
{
- strncpy(digest_text, id_string, id_length);
- digest_text+= id_length;
+ /* Verify that the converted text will fit. */
+ if (to_cs->mbmaxlen*id_len > max_converted_size)
+ {
+ truncated= true;
+ break;
+ }
+ /* Convert identifier string into the storage character set. */
+ id_length= my_convert(id_buffer, max_converted_size, to_cs,
+ id_ptr, id_len, from_cs, &err_cs);
+ id_string= id_buffer;
}
- *digest_text= ' ';
- digest_text++;
- available_bytes_to_write-= need_bytes;
- }
- else
- {
- truncated= true;
- }
- break;
- case IDENT_QUOTED:
- current_byte= read_identifier(digest_storage, current_byte,
- & id_string, & id_length);
- need_bytes= id_length + 3; /* quote <id> quote space */
- if (need_bytes <= available_bytes_to_write)
- {
- *digest_text= '`';
- digest_text++;
- if (id_length > 0)
+ else
{
- strncpy(digest_text, id_string, id_length);
- digest_text+= id_length;
+ id_string= id_ptr;
+ id_length= id_len;
+ }
+
+ if (id_length == 0 || err_cs != 0)
+ {
+ truncated= true;
+ break;
+ }
+ /* Copy the converted identifier into the digest string. */
+ bytes_needed= id_length + (tok == IDENT ? 1 : 3);
+ if (bytes_needed <= bytes_available)
+ {
+ if (tok == IDENT_QUOTED)
+ *digest_text++= '`';
+ if (id_length > 0)
+ {
+ memcpy(digest_text, id_string, id_length);
+ digest_text+= id_length;
+ }
+ if (tok == IDENT_QUOTED)
+ *digest_text++= '`';
+ *digest_text++= ' ';
+ bytes_available-= bytes_needed;
+ }
+ else
+ {
+ truncated= true;
}
- *digest_text= '`';
- digest_text++;
- *digest_text= ' ';
- digest_text++;
- available_bytes_to_write-= need_bytes;
- }
- else
- {
- truncated= true;
}
break;
/* Everything else is printed as is. */
default:
/*
- Make sure not to overflow digest_text buffer while writing
- this token string.
+ Make sure not to overflow digest_text buffer.
+1 is to make sure extra space for ' '.
*/
int tok_length= tok_data->m_token_length;
- need_bytes= tok_length + 1;
+ bytes_needed= tok_length + 1;
- if (need_bytes <= available_bytes_to_write)
+ if (bytes_needed <= bytes_available)
{
- strncpy(digest_text,
- tok_data->m_token_string,
- tok_length);
+ strncpy(digest_text, tok_data->m_token_string, tok_length);
digest_text+= tok_length;
- *digest_text= ' ';
- digest_text++;
- available_bytes_to_write-= need_bytes;
+ *digest_text++= ' ';
+ bytes_available-= bytes_needed;
}
else
{
truncated= true;
}
+ break;
}
}
@@ -524,7 +567,11 @@ PSI_digest_locker* pfs_digest_add_token_v1(PSI_digest_locker *locker,
digest_storage= &state->m_digest_storage;
- if (digest_storage->m_full)
+ /*
+ Stop collecting further tokens if digest storage is full or
+ if END token is received.
+ */
+ if (digest_storage->m_full || token == END_OF_INPUT)
return NULL;
/*
@@ -555,19 +602,23 @@ PSI_digest_locker* pfs_digest_add_token_v1(PSI_digest_locker *locker,
TOK_PFS_GENERIC_VALUE := BIN_NUM | DECIMAL_NUM | ... | ULONGLONG_NUM
*/
token= TOK_PFS_GENERIC_VALUE;
-
+ }
+ /* fall through */
+ case NULL_SYM:
+ {
if ((last_token2 == TOK_PFS_GENERIC_VALUE ||
- last_token2 == TOK_PFS_GENERIC_VALUE_LIST) &&
+ last_token2 == TOK_PFS_GENERIC_VALUE_LIST ||
+ last_token2 == NULL_SYM) &&
(last_token == ','))
{
/*
REDUCE:
TOK_PFS_GENERIC_VALUE_LIST :=
- TOK_PFS_GENERIC_VALUE ',' TOK_PFS_GENERIC_VALUE
+ (TOK_PFS_GENERIC_VALUE|NULL_SYM) ',' (TOK_PFS_GENERIC_VALUE|NULL_SYM)
REDUCE:
TOK_PFS_GENERIC_VALUE_LIST :=
- TOK_PFS_GENERIC_VALUE_LIST ',' TOK_PFS_GENERIC_VALUE
+ TOK_PFS_GENERIC_VALUE_LIST ',' (TOK_PFS_GENERIC_VALUE|NULL_SYM)
*/
digest_storage->m_byte_count-= 2*PFS_SIZE_OF_A_TOKEN;
token= TOK_PFS_GENERIC_VALUE_LIST;
diff --git a/storage/perfschema/pfs_digest.h b/storage/perfschema/pfs_digest.h
index 2646596171c..d2453dc32c6 100644
--- a/storage/perfschema/pfs_digest.h
+++ b/storage/perfschema/pfs_digest.h
@@ -38,32 +38,26 @@ struct PFS_thread;
/**
Structure to store a MD5 hash value (digest) for a statement.
*/
-struct PFS_digest_hash
+struct PFS_digest_key
{
unsigned char m_md5[PFS_MD5_SIZE];
+ char m_schema_name[NAME_LEN];
+ uint m_schema_name_length;
};
/** A statement digest stat record. */
-struct PFS_statements_digest_stat
+struct PFS_ALIGNED PFS_statements_digest_stat
{
- /**
- Digest MD5 Hash.
- */
- PFS_digest_hash m_digest_hash;
+ /** Digest Schema + MD5 Hash. */
+ PFS_digest_key m_digest_key;
- /**
- Digest Storage.
- */
+ /** Digest Storage. */
PSI_digest_storage m_digest_storage;
- /**
- Statement stat.
- */
+ /** Statement stat. */
PFS_statement_stat m_stat;
- /**
- First Seen/last seen.
- */
+ /** First and last seen timestamps.*/
ulonglong m_first_seen;
ulonglong m_last_seen;
@@ -78,10 +72,12 @@ void cleanup_digest();
int init_digest_hash(void);
void cleanup_digest_hash(void);
-PFS_statement_stat* find_or_create_digest(PFS_thread*,
- PSI_digest_storage*);
+PFS_statement_stat* find_or_create_digest(PFS_thread *thread,
+ PSI_digest_storage *digest_storage,
+ const char *schema_name,
+ uint schema_name_length);
-void get_digest_text(char* digest_text, PSI_digest_storage*);
+void get_digest_text(char *digest_text, PSI_digest_storage *digest_storage);
void reset_esms_by_digest();
@@ -90,8 +86,8 @@ extern PFS_statements_digest_stat *statements_digest_stat_array;
/* Instrumentation callbacks for pfs.cc */
-struct PSI_digest_locker* pfs_digest_start_v1(PSI_statement_locker *locker);
-PSI_digest_locker* pfs_digest_add_token_v1(PSI_digest_locker *locker,
+struct PSI_digest_locker *pfs_digest_start_v1(PSI_statement_locker *locker);
+PSI_digest_locker *pfs_digest_add_token_v1(PSI_digest_locker *locker,
uint token,
OPAQUE_LEX_YYSTYPE *yylval);
@@ -99,6 +95,7 @@ static inline void digest_reset(PSI_digest_storage *digest)
{
digest->m_full= false;
digest->m_byte_count= 0;
+ digest->m_charset_number= 0;
}
static inline void digest_copy(PSI_digest_storage *to, const PSI_digest_storage *from)
@@ -107,20 +104,21 @@ static inline void digest_copy(PSI_digest_storage *to, const PSI_digest_storage
{
to->m_full= from->m_full;
to->m_byte_count= from->m_byte_count;
+ to->m_charset_number= from->m_charset_number;
DBUG_ASSERT(to->m_byte_count <= PSI_MAX_DIGEST_STORAGE_SIZE);
memcpy(to->m_token_array, from->m_token_array, to->m_byte_count);
}
else
{
- DBUG_ASSERT(! from->m_full);
DBUG_ASSERT(from->m_byte_count == 0);
to->m_full= false;
to->m_byte_count= 0;
+ to->m_charset_number= 0;
}
}
/**
- Function to read a single token from token array.
+ Read a single token from token array.
*/
inline int read_token(PSI_digest_storage *digest_storage,
int index, uint *tok)
@@ -141,7 +139,7 @@ inline int read_token(PSI_digest_storage *digest_storage,
}
/**
- Function to store a single token in token array.
+ Store a single token in token array.
*/
inline void store_token(PSI_digest_storage* digest_storage, uint token)
{
@@ -162,7 +160,7 @@ inline void store_token(PSI_digest_storage* digest_storage, uint token)
}
/**
- Function to read an identifier from token array.
+ Read an identifier from token array.
*/
inline int read_identifier(PSI_digest_storage* digest_storage,
int index, char ** id_string, int *id_length)
@@ -186,7 +184,7 @@ inline int read_identifier(PSI_digest_storage* digest_storage,
}
/**
- Function to store an identifier in token array.
+ Store an identifier in token array.
*/
inline void store_token_identifier(PSI_digest_storage* digest_storage,
uint token,
@@ -207,9 +205,7 @@ inline void store_token_identifier(PSI_digest_storage* digest_storage,
dest[3]= (id_length >> 8) & 0xff;
/* Write the string data */
if (id_length > 0)
- {
- strncpy((char *)(dest + 4), id_name, id_length);
- }
+ memcpy((char *)(dest + 4), id_name, id_length);
digest_storage->m_byte_count+= bytes_needed;
}
else
@@ -218,4 +214,6 @@ inline void store_token_identifier(PSI_digest_storage* digest_storage,
}
}
+extern LF_HASH digest_hash;
+
#endif
diff --git a/storage/perfschema/pfs_engine_table.cc b/storage/perfschema/pfs_engine_table.cc
index 304e837fa84..8f6f0fa3bcd 100644
--- a/storage/perfschema/pfs_engine_table.cc
+++ b/storage/perfschema/pfs_engine_table.cc
@@ -20,6 +20,7 @@
#include "my_global.h"
#include "my_pthread.h"
+#include "hostname.h" /* For Host_entry */
#include "pfs_engine_table.h"
#include "table_events_waits.h"
@@ -69,6 +70,8 @@
#include "table_socket_instances.h"
#include "table_socket_summary_by_instance.h"
#include "table_socket_summary_by_event_name.h"
+#include "table_session_connect_attrs.h"
+#include "table_session_account_connect_attrs.h"
/* For show status */
#include "pfs_column_values.h"
@@ -145,6 +148,8 @@ static PFS_engine_table_share *all_shares[]=
&table_socket_instances::m_share,
&table_socket_summary_by_instance::m_share,
&table_socket_summary_by_event_name::m_share,
+ &table_session_connect_attrs::m_share,
+ &table_session_account_connect_attrs::m_share,
NULL
};
@@ -683,20 +688,22 @@ PFS_unknown_acl pfs_unknown_acl;
ACL_internal_access_result
PFS_unknown_acl::check(ulong want_access, ulong *save_priv) const
{
- const ulong always_forbidden= INSERT_ACL | UPDATE_ACL | DELETE_ACL
- | CREATE_ACL | REFERENCES_ACL | INDEX_ACL | ALTER_ACL
- | CREATE_VIEW_ACL | TRIGGER_ACL | LOCK_TABLES_ACL;
+ const ulong always_forbidden= CREATE_ACL
+ | REFERENCES_ACL | INDEX_ACL | ALTER_ACL
+ | CREATE_VIEW_ACL | TRIGGER_ACL;
if (unlikely(want_access & always_forbidden))
return ACL_INTERNAL_ACCESS_DENIED;
/*
- There is no point in hidding (by enforcing ACCESS_DENIED for SELECT_ACL
+ There is no point in hiding (by enforcing ACCESS_DENIED for SELECT_ACL
on performance_schema.*) tables that do not exist anyway.
When SELECT_ACL is granted on performance_schema.* or *.*,
SELECT * from performance_schema.wrong_table
will fail with a more understandable ER_NO_SUCH_TABLE error,
instead of ER_TABLEACCESS_DENIED_ERROR.
+ The same goes for other DML (INSERT_ACL | UPDATE_ACL | DELETE_ACL),
+ for ease of use: error messages will be less surprising.
*/
return ACL_INTERNAL_ACCESS_CHECK_GRANT;
}
@@ -978,363 +985,445 @@ bool pfs_show_status(handlerton *hton, THD *thd,
total_memory+= size;
break;
case 56:
- name= "events_waits_summary_global_by_event_name.row_size";
- size= sizeof(PFS_single_stat);
- break;
- case 57:
- name= "events_waits_summary_global_by_event_name.row_count";
- size= wait_class_max;
- break;
- case 58:
- name= "events_waits_summary_global_by_event_name.memory";
- size= wait_class_max * sizeof(PFS_single_stat);
- total_memory+= size;
- break;
- case 59:
name= "(pfs_account).row_size";
size= sizeof(PFS_account);
break;
- case 60:
+ case 57:
name= "(pfs_account).row_count";
size= account_max;
break;
- case 61:
+ case 58:
name= "(pfs_account).memory";
size= account_max * sizeof(PFS_account);
total_memory+= size;
break;
- case 62:
+ case 59:
name= "events_waits_summary_by_account_by_event_name.row_size";
size= sizeof(PFS_single_stat);
break;
- case 63:
+ case 60:
name= "events_waits_summary_by_account_by_event_name.row_count";
size= account_max * wait_class_max;
break;
- case 64:
+ case 61:
name= "events_waits_summary_by_account_by_event_name.memory";
size= account_max * wait_class_max * sizeof(PFS_single_stat);
total_memory+= size;
break;
- case 65:
+ case 62:
name= "events_waits_summary_by_user_by_event_name.row_size";
size= sizeof(PFS_single_stat);
break;
- case 66:
+ case 63:
name= "events_waits_summary_by_user_by_event_name.row_count";
size= user_max * wait_class_max;
break;
- case 67:
+ case 64:
name= "events_waits_summary_by_user_by_event_name.memory";
size= user_max * wait_class_max * sizeof(PFS_single_stat);
total_memory+= size;
break;
- case 68:
+ case 65:
name= "events_waits_summary_by_host_by_event_name.row_size";
size= sizeof(PFS_single_stat);
break;
- case 69:
+ case 66:
name= "events_waits_summary_by_host_by_event_name.row_count";
size= host_max * wait_class_max;
break;
- case 70:
+ case 67:
name= "events_waits_summary_by_host_by_event_name.memory";
size= host_max * wait_class_max * sizeof(PFS_single_stat);
total_memory+= size;
break;
- case 71:
+ case 68:
name= "(pfs_user).row_size";
size= sizeof(PFS_user);
break;
- case 72:
+ case 69:
name= "(pfs_user).row_count";
size= user_max;
break;
- case 73:
+ case 70:
name= "(pfs_user).memory";
size= user_max * sizeof(PFS_user);
total_memory+= size;
break;
- case 74:
+ case 71:
name= "(pfs_host).row_size";
size= sizeof(PFS_host);
break;
- case 75:
+ case 72:
name= "(pfs_host).row_count";
size= host_max;
break;
- case 76:
+ case 73:
name= "(pfs_host).memory";
size= host_max * sizeof(PFS_host);
total_memory+= size;
break;
- case 77:
+ case 74:
name= "(pfs_stage_class).row_size";
size= sizeof(PFS_stage_class);
break;
- case 78:
+ case 75:
name= "(pfs_stage_class).row_count";
size= stage_class_max;
break;
- case 79:
+ case 76:
name= "(pfs_stage_class).memory";
size= stage_class_max * sizeof(PFS_stage_class);
total_memory+= size;
break;
- case 80:
+ case 77:
name= "events_stages_history.row_size";
size= sizeof(PFS_events_stages);
break;
- case 81:
+ case 78:
name= "events_stages_history.row_count";
size= events_stages_history_per_thread * thread_max;
break;
- case 82:
+ case 79:
name= "events_stages_history.memory";
size= events_stages_history_per_thread * thread_max
* sizeof(PFS_events_stages);
total_memory+= size;
break;
- case 83:
+ case 80:
name= "events_stages_history_long.row_size";
size= sizeof(PFS_events_stages);
break;
- case 84:
+ case 81:
name= "events_stages_history_long.row_count";
size= events_stages_history_long_size;
break;
- case 85:
+ case 82:
name= "events_stages_history_long.memory";
size= events_stages_history_long_size * sizeof(PFS_events_stages);
total_memory+= size;
break;
- case 86:
+ case 83:
name= "events_stages_summary_by_thread_by_event_name.row_size";
size= sizeof(PFS_stage_stat);
break;
- case 87:
+ case 84:
name= "events_stages_summary_by_thread_by_event_name.row_count";
size= thread_max * stage_class_max;
break;
- case 88:
+ case 85:
name= "events_stages_summary_by_thread_by_event_name.memory";
size= thread_max * stage_class_max * sizeof(PFS_stage_stat);
total_memory+= size;
break;
- case 89:
+ case 86:
name= "events_stages_summary_global_by_event_name.row_size";
size= sizeof(PFS_stage_stat);
break;
- case 90:
+ case 87:
name= "events_stages_summary_global_by_event_name.row_count";
size= stage_class_max;
break;
- case 91:
+ case 88:
name= "events_stages_summary_global_by_event_name.memory";
size= stage_class_max * sizeof(PFS_stage_stat);
total_memory+= size;
break;
- case 92:
+ case 89:
name= "events_stages_summary_by_account_by_event_name.row_size";
size= sizeof(PFS_stage_stat);
break;
- case 93:
+ case 90:
name= "events_stages_summary_by_account_by_event_name.row_count";
size= account_max * stage_class_max;
break;
- case 94:
+ case 91:
name= "events_stages_summary_by_account_by_event_name.memory";
size= account_max * stage_class_max * sizeof(PFS_stage_stat);
total_memory+= size;
break;
- case 95:
+ case 92:
name= "events_stages_summary_by_user_by_event_name.row_size";
size= sizeof(PFS_stage_stat);
break;
- case 96:
+ case 93:
name= "events_stages_summary_by_user_by_event_name.row_count";
size= user_max * stage_class_max;
break;
- case 97:
+ case 94:
name= "events_stages_summary_by_user_by_event_name.memory";
size= user_max * stage_class_max * sizeof(PFS_stage_stat);
total_memory+= size;
break;
- case 98:
+ case 95:
name= "events_stages_summary_by_host_by_event_name.row_size";
size= sizeof(PFS_stage_stat);
break;
- case 99:
+ case 96:
name= "events_stages_summary_by_host_by_event_name.row_count";
size= host_max * stage_class_max;
break;
- case 100:
+ case 97:
name= "events_stages_summary_by_host_by_event_name.memory";
size= host_max * stage_class_max * sizeof(PFS_stage_stat);
total_memory+= size;
break;
- case 101:
+ case 98:
name= "(pfs_statement_class).row_size";
size= sizeof(PFS_statement_class);
break;
- case 102:
+ case 99:
name= "(pfs_statement_class).row_count";
size= statement_class_max;
break;
- case 103:
+ case 100:
name= "(pfs_statement_class).memory";
size= statement_class_max * sizeof(PFS_statement_class);
total_memory+= size;
break;
- case 104:
+ case 101:
name= "events_statements_history.row_size";
size= sizeof(PFS_events_statements);
break;
- case 105:
+ case 102:
name= "events_statements_history.row_count";
size= events_statements_history_per_thread * thread_max;
break;
- case 106:
+ case 103:
name= "events_statements_history.memory";
size= events_statements_history_per_thread * thread_max
* sizeof(PFS_events_statements);
total_memory+= size;
break;
- case 107:
+ case 104:
name= "events_statements_history_long.row_size";
size= sizeof(PFS_events_statements);
break;
- case 108:
+ case 105:
name= "events_statements_history_long.row_count";
size= events_statements_history_long_size;
break;
- case 109:
+ case 106:
name= "events_statements_history_long.memory";
size= events_statements_history_long_size * sizeof(PFS_events_statements);
total_memory+= size;
break;
- case 110:
+ case 107:
name= "events_statements_summary_by_thread_by_event_name.row_size";
size= sizeof(PFS_statement_stat);
break;
- case 111:
+ case 108:
name= "events_statements_summary_by_thread_by_event_name.row_count";
size= thread_max * statement_class_max;
break;
- case 112:
+ case 109:
name= "events_statements_summary_by_thread_by_event_name.memory";
size= thread_max * statement_class_max * sizeof(PFS_statement_stat);
total_memory+= size;
break;
- case 113:
+ case 110:
name= "events_statements_summary_global_by_event_name.row_size";
size= sizeof(PFS_statement_stat);
break;
- case 114:
+ case 111:
name= "events_statements_summary_global_by_event_name.row_count";
size= statement_class_max;
break;
- case 115:
+ case 112:
name= "events_statements_summary_global_by_event_name.memory";
size= statement_class_max * sizeof(PFS_statement_stat);
total_memory+= size;
break;
- case 116:
+ case 113:
name= "events_statements_summary_by_account_by_event_name.row_size";
size= sizeof(PFS_statement_stat);
break;
- case 117:
+ case 114:
name= "events_statements_summary_by_account_by_event_name.row_count";
size= account_max * statement_class_max;
break;
- case 118:
+ case 115:
name= "events_statements_summary_by_account_by_event_name.memory";
size= account_max * statement_class_max * sizeof(PFS_statement_stat);
total_memory+= size;
break;
- case 119:
+ case 116:
name= "events_statements_summary_by_user_by_event_name.row_size";
size= sizeof(PFS_statement_stat);
break;
- case 120:
+ case 117:
name= "events_statements_summary_by_user_by_event_name.row_count";
size= user_max * statement_class_max;
break;
- case 121:
+ case 118:
name= "events_statements_summary_by_user_by_event_name.memory";
size= user_max * statement_class_max * sizeof(PFS_statement_stat);
total_memory+= size;
break;
- case 122:
+ case 119:
name= "events_statements_summary_by_host_by_event_name.row_size";
size= sizeof(PFS_statement_stat);
break;
- case 123:
+ case 120:
name= "events_statements_summary_by_host_by_event_name.row_count";
size= host_max * statement_class_max;
break;
- case 124:
+ case 121:
name= "events_statements_summary_by_host_by_event_name.memory";
size= host_max * statement_class_max * sizeof(PFS_statement_stat);
total_memory+= size;
break;
- case 125:
+ case 122:
name= "events_statements_current.row_size";
size= sizeof(PFS_events_statements);
break;
- case 126:
+ case 123:
name= "events_statements_current.row_count";
size= thread_max * statement_stack_max;
break;
- case 127:
+ case 124:
name= "events_statements_current.memory";
size= thread_max * statement_stack_max * sizeof(PFS_events_statements);
total_memory+= size;
break;
- case 128:
+ case 125:
name= "(pfs_socket_class).row_size";
size= sizeof(PFS_socket_class);
break;
- case 129:
+ case 126:
name= "(pfs_socket_class).row_count";
size= socket_class_max;
break;
- case 130:
+ case 127:
name= "(pfs_socket_class).memory";
size= socket_class_max * sizeof(PFS_socket_class);
total_memory+= size;
break;
- case 131:
+ case 128:
name= "socket_instances.row_size";
size= sizeof(PFS_socket);
break;
- case 132:
+ case 129:
name= "socket_instances.row_count";
size= socket_max;
break;
- case 133:
+ case 130:
name= "socket_instances.memory";
size= socket_max * sizeof(PFS_socket);
total_memory+= size;
break;
- case 134:
+ case 131:
name= "events_statements_summary_by_digest.row_size";
size= sizeof(PFS_statements_digest_stat);
break;
- case 135:
+ case 132:
name= "events_statements_summary_by_digest.row_count";
size= digest_max;
break;
- case 136:
+ case 133:
name= "events_statements_summary_by_digest.memory";
size= digest_max * sizeof(PFS_statements_digest_stat);
total_memory+= size;
- break;
+ break;
+ case 134:
+ name= "session_connect_attrs.row_size";
+ size= thread_max;
+ break;
+ case 135:
+ name= "session_connect_attrs.row_count";
+ size= session_connect_attrs_size_per_thread;
+ break;
+ case 136:
+ name= "session_connect_attrs.memory";
+ size= thread_max * session_connect_attrs_size_per_thread;
+ total_memory+= size;
+ break;
+
+ case 137:
+ name= "(account_hash).count";
+ size= account_hash.count;
+ break;
+ case 138:
+ name= "(account_hash).size";
+ size= account_hash.size;
+ break;
+ case 139:
+ name= "(digest_hash).count";
+ size= digest_hash.count;
+ break;
+ case 140:
+ name= "(digest_hash).size";
+ size= digest_hash.size;
+ break;
+ case 141:
+ name= "(filename_hash).count";
+ size= filename_hash.count;
+ break;
+ case 142:
+ name= "(filename_hash).size";
+ size= filename_hash.size;
+ break;
+ case 143:
+ name= "(host_hash).count";
+ size= host_hash.count;
+ break;
+ case 144:
+ name= "(host_hash).size";
+ size= host_hash.size;
+ break;
+ case 145:
+ name= "(setup_actor_hash).count";
+ size= setup_actor_hash.count;
+ break;
+ case 146:
+ name= "(setup_actor_hash).size";
+ size= setup_actor_hash.size;
+ break;
+ case 147:
+ name= "(setup_object_hash).count";
+ size= setup_object_hash.count;
+ break;
+ case 148:
+ name= "(setup_object_hash).size";
+ size= setup_object_hash.size;
+ break;
+ case 149:
+ name= "(table_share_hash).count";
+ size= table_share_hash.count;
+ break;
+ case 150:
+ name= "(table_share_hash).size";
+ size= table_share_hash.size;
+ break;
+ case 151:
+ name= "(user_hash).count";
+ size= user_hash.count;
+ break;
+ case 152:
+ name= "(user_hash).size";
+ size= user_hash.size;
+ break;
+ case 153:
+ /*
+ This is not a performance_schema buffer,
+ the data is maintained in the server,
+ in hostname_cache.
+ Print the size only, there are:
+ - no host_cache.count
+ - no host_cache.memory
+ */
+ name= "host_cache.size";
+#ifdef NOT_YET_IMPLEMENTED
+ size= sizeof(Host_entry);
+#else
+ size= 0;
+#endif
+ break;
+
/*
This case must be last,
for aggregation in total_memory.
*/
- case 137:
+ case 154:
name= "performance_schema.memory";
size= total_memory;
/* This will fail if something is not advertised here */
diff --git a/storage/perfschema/pfs_engine_table.h b/storage/perfschema/pfs_engine_table.h
index 40f5404d0b7..981d72ee19e 100644
--- a/storage/perfschema/pfs_engine_table.h
+++ b/storage/perfschema/pfs_engine_table.h
@@ -263,7 +263,7 @@ public:
~PFS_readonly_acl()
{}
- ACL_internal_access_result check(ulong want_access, ulong *save_priv) const;
+ virtual ACL_internal_access_result check(ulong want_access, ulong *save_priv) const;
};
/** Singleton instance of PFS_readonly_acl. */
diff --git a/storage/perfschema/pfs_events.h b/storage/perfschema/pfs_events.h
index c9586df11bd..97fb7e08d63 100644
--- a/storage/perfschema/pfs_events.h
+++ b/storage/perfschema/pfs_events.h
@@ -29,7 +29,7 @@ struct PFS_instr_class;
struct PFS_events
{
/** THREAD_ID. */
- ulong m_thread_internal_id;
+ ulonglong m_thread_internal_id;
/** EVENT_ID. */
ulonglong m_event_id;
/** END_EVENT_ID. */
diff --git a/storage/perfschema/pfs_events_waits.cc b/storage/perfschema/pfs_events_waits.cc
index 2ee9ec292a2..c8a9d20a2f1 100644
--- a/storage/perfschema/pfs_events_waits.cc
+++ b/storage/perfschema/pfs_events_waits.cc
@@ -230,16 +230,6 @@ void reset_events_waits_by_host()
}
}
-/** Reset table EVENTS_WAITS_GLOBAL_BY_EVENT_NAME data. */
-void reset_events_waits_global()
-{
- PFS_single_stat *stat= global_instr_class_waits_array;
- PFS_single_stat *stat_last= global_instr_class_waits_array + wait_class_max;
-
- for ( ; stat < stat_last; stat++)
- stat->reset();
-}
-
void reset_table_waits_by_table()
{
PFS_table_share *pfs= table_share_array;
diff --git a/storage/perfschema/pfs_global.cc b/storage/perfschema/pfs_global.cc
index 6c3b79a3e1f..0c022b85748 100644
--- a/storage/perfschema/pfs_global.cc
+++ b/storage/perfschema/pfs_global.cc
@@ -18,13 +18,16 @@
Miscellaneous global dependencies (implementation).
*/
-#include "my_global.h"
-#include "my_sys.h"
#include "pfs_global.h"
-#include "my_net.h"
+#include <my_sys.h>
+#include <my_net.h>
+#ifdef HAVE_MALLOC_H
+#include <malloc.h> /* memalign() may be here */
+#endif
-#include <stdlib.h>
-#include <string.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
#ifdef __WIN__
#include <winsock2.h>
@@ -45,18 +48,65 @@ void *pfs_malloc(size_t size, myf flags)
DBUG_ASSERT(! pfs_initialized);
DBUG_ASSERT(size > 0);
- void *ptr= malloc(size);
- if (likely(ptr != NULL))
- pfs_allocated_memory+= size;
- if (likely((ptr != NULL) && (flags & MY_ZEROFILL)))
+ void *ptr;
+
+#ifdef PFS_ALIGNEMENT
+#ifdef HAVE_POSIX_MEMALIGN
+ /* Linux */
+ if (unlikely(posix_memalign(& ptr, PFS_ALIGNEMENT, size)))
+ return NULL;
+#else
+#ifdef HAVE_MEMALIGN
+ /* Solaris */
+ ptr= memalign(PFS_ALIGNEMENT, size);
+ if (unlikely(ptr == NULL))
+ return NULL;
+#else
+#ifdef HAVE_ALIGNED_MALLOC
+ /* Windows */
+ ptr= _aligned_malloc(size, PFS_ALIGNEMENT);
+ if (unlikely(ptr == NULL))
+ return NULL;
+#else
+#error "Missing implementation for PFS_ALIGNENT"
+#endif /* HAVE_ALIGNED_MALLOC */
+#endif /* HAVE_MEMALIGN */
+#endif /* HAVE_POSIX_MEMALIGN */
+#else /* PFS_ALIGNMENT */
+ /* Everything else */
+ ptr= malloc(size);
+ if (unlikely(ptr == NULL))
+ return NULL;
+#endif
+
+ pfs_allocated_memory+= size;
+ if (flags & MY_ZEROFILL)
memset(ptr, 0, size);
return ptr;
}
void pfs_free(void *ptr)
{
- if (ptr != NULL)
- free(ptr);
+ if (ptr == NULL)
+ return;
+
+#ifdef HAVE_POSIX_MEMALIGN
+ /* Allocated with posix_memalign() */
+ free(ptr);
+#else
+#ifdef HAVE_MEMALIGN
+ /* Allocated with memalign() */
+ free(ptr);
+#else
+#ifdef HAVE_ALIGNED_MALLOC
+ /* Allocated with _aligned_malloc() */
+ _aligned_free(ptr);
+#else
+ /* Allocated with malloc() */
+ free(ptr);
+#endif /* HAVE_ALIGNED_MALLOC */
+#endif /* HAVE_MEMALIGN */
+#endif /* HAVE_POSIX_MEMALIGN */
}
void pfs_print_error(const char *format, ...)
diff --git a/storage/perfschema/pfs_global.h b/storage/perfschema/pfs_global.h
index 693153cb097..cddf688ddf4 100644
--- a/storage/perfschema/pfs_global.h
+++ b/storage/perfschema/pfs_global.h
@@ -16,6 +16,9 @@
#ifndef PFS_GLOBAL_H
#define PFS_GLOBAL_H
+#include "my_global.h"
+#include "my_compiler.h"
+
/**
@file storage/perfschema/pfs_global.h
Miscellaneous global dependencies (declarations).
@@ -26,6 +29,18 @@ extern bool pfs_initialized;
/** Total memory allocated by the performance schema, in bytes. */
extern ulonglong pfs_allocated_memory;
+#if defined(HAVE_POSIX_MEMALIGN) || defined(HAVE_MEMALIGN) || defined(HAVE_ALIGNED_MALLOC)
+#define PFS_ALIGNEMENT 64
+#define PFS_ALIGNED MY_ALIGNED(PFS_ALIGNEMENT)
+#else
+/*
+ Known platforms that do not provide aligned memory:
+ - MacOSX Darwin (osx10.5)
+ For these platforms, compile without the alignment optimization.
+*/
+#define PFS_ALIGNED
+#endif /* HAVE_POSIX_MEMALIGN || HAVE_MEMALIGN || HAVE_ALIGNED_MALLOC */
+
void *pfs_malloc(size_t size, myf flags);
/**
diff --git a/storage/perfschema/pfs_host.cc b/storage/perfschema/pfs_host.cc
index 82b78e19ce8..09763b0bd8b 100644
--- a/storage/perfschema/pfs_host.cc
+++ b/storage/perfschema/pfs_host.cc
@@ -42,7 +42,7 @@ static PFS_single_stat *host_instr_class_waits_array= NULL;
static PFS_stage_stat *host_instr_class_stages_array= NULL;
static PFS_statement_stat *host_instr_class_statements_array= NULL;
-static LF_HASH host_hash;
+LF_HASH host_hash;
static bool host_hash_inited= false;
/**
@@ -146,10 +146,11 @@ C_MODE_END
*/
int init_host_hash(void)
{
- if (! host_hash_inited)
+ if ((! host_hash_inited) && (host_max > 0))
{
lf_hash_init(&host_hash, sizeof(PFS_host*), LF_HASH_UNIQUE,
0, 0, host_hash_get_key, &my_charset_bin);
+ host_hash.size= host_max;
host_hash_inited= true;
}
return 0;
diff --git a/storage/perfschema/pfs_host.h b/storage/perfschema/pfs_host.h
index d04b88e62f3..eb0ff6efc6f 100644
--- a/storage/perfschema/pfs_host.h
+++ b/storage/perfschema/pfs_host.h
@@ -44,7 +44,7 @@ struct PFS_host_key
uint m_key_length;
};
-struct PFS_host : PFS_connection_slice
+struct PFS_ALIGNED PFS_host : PFS_connection_slice
{
public:
inline void init_refcount(void)
@@ -105,6 +105,8 @@ extern ulong host_lost;
extern PFS_host *host_array;
+extern LF_HASH host_hash;
+
/** @} */
#endif
diff --git a/storage/perfschema/pfs_instr.cc b/storage/perfschema/pfs_instr.cc
index 39caabaf030..25e78ee7b5e 100644
--- a/storage/perfschema/pfs_instr.cc
+++ b/storage/perfschema/pfs_instr.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -82,6 +82,10 @@ uint statement_stack_max;
ulong locker_lost= 0;
/** Number of statement lost. @sa STATEMENT_STACK_SIZE. */
ulong statement_lost= 0;
+/** Size of connection attribute storage per thread */
+ulong session_connect_attrs_size_per_thread;
+/** Number of connection attributes lost */
+ulong session_connect_attrs_lost= 0;
/**
Mutex instrumentation instances array.
@@ -140,11 +144,10 @@ PFS_table *table_array= NULL;
*/
PFS_socket *socket_array= NULL;
-PFS_single_stat *global_instr_class_waits_array= NULL;
PFS_stage_stat *global_instr_class_stages_array= NULL;
PFS_statement_stat *global_instr_class_statements_array= NULL;
-static volatile uint32 thread_internal_id_counter= 0;
+static volatile uint64 thread_internal_id_counter= 0;
static uint thread_instr_class_waits_sizing;
static uint thread_instr_class_stages_sizing;
@@ -157,9 +160,10 @@ static PFS_events_waits *thread_waits_history_array= NULL;
static PFS_events_stages *thread_stages_history_array= NULL;
static PFS_events_statements *thread_statements_history_array= NULL;
static PFS_events_statements *thread_statements_stack_array= NULL;
+static char *thread_session_connect_attrs_array= NULL;
/** Hash table for instrumented files. */
-static LF_HASH filename_hash;
+LF_HASH filename_hash;
/** True if filename_hash is initialized. */
static bool filename_hash_inited= false;
@@ -174,6 +178,7 @@ int init_instruments(const PFS_global_param *param)
uint thread_stages_history_sizing;
uint thread_statements_history_sizing;
uint thread_statements_stack_sizing;
+ uint thread_session_connect_attrs_sizing;
uint index;
DBUG_ENTER("init_instruments");
@@ -221,6 +226,11 @@ int init_instruments(const PFS_global_param *param)
thread_instr_class_statements_sizing= param->m_thread_sizing
* param->m_statement_class_sizing;
+ session_connect_attrs_size_per_thread= param->m_session_connect_attrs_sizing;
+ thread_session_connect_attrs_sizing= param->m_thread_sizing
+ * session_connect_attrs_size_per_thread;
+ session_connect_attrs_lost= 0;
+
mutex_array= NULL;
rwlock_array= NULL;
cond_array= NULL;
@@ -366,6 +376,14 @@ int init_instruments(const PFS_global_param *param)
thread_instr_class_statements_array[index].reset();
}
+ if (thread_session_connect_attrs_sizing > 0)
+ {
+ thread_session_connect_attrs_array=
+ (char *)pfs_malloc(thread_session_connect_attrs_sizing, MYF(MY_ZEROFILL));
+ if (unlikely(thread_session_connect_attrs_array == NULL))
+ return 1;
+ }
+
for (index= 0; index < thread_max; index++)
{
thread_array[index].m_waits_history=
@@ -382,18 +400,8 @@ int init_instruments(const PFS_global_param *param)
&thread_statements_stack_array[index * statement_stack_max];
thread_array[index].m_instr_class_statements_stats=
&thread_instr_class_statements_array[index * statement_class_max];
- }
-
- if (wait_class_max > 0)
- {
- global_instr_class_waits_array=
- PFS_MALLOC_ARRAY(wait_class_max,
- PFS_single_stat, MYF(MY_ZEROFILL));
- if (unlikely(global_instr_class_waits_array == NULL))
- DBUG_RETURN(1);
-
- for (index= 0; index < wait_class_max; index++)
- global_instr_class_waits_array[index].reset();
+ thread_array[index].m_session_connect_attrs=
+ &thread_session_connect_attrs_array[index * session_connect_attrs_size_per_thread];
}
if (stage_class_max > 0)
@@ -461,8 +469,6 @@ void cleanup_instruments(void)
thread_statements_stack_array= NULL;
pfs_free(thread_instr_class_waits_array);
thread_instr_class_waits_array= NULL;
- pfs_free(global_instr_class_waits_array);
- global_instr_class_waits_array= NULL;
pfs_free(global_instr_class_stages_array);
global_instr_class_stages_array= NULL;
pfs_free(global_instr_class_statements_array);
@@ -471,6 +477,9 @@ void cleanup_instruments(void)
thread_instr_class_statements_array= NULL;
pfs_free(thread_instr_class_stages_array);
thread_instr_class_stages_array= NULL;
+ pfs_free(thread_session_connect_attrs_array);
+ thread_session_connect_attrs_array=NULL;
+
DBUG_VOID_RETURN;
}
@@ -502,10 +511,11 @@ int init_file_hash(void)
{
DBUG_ENTER("init_file_hash");
- if (! filename_hash_inited)
+ if ((! filename_hash_inited) && (file_max > 0))
{
lf_hash_init(&filename_hash, sizeof(PFS_file*), LF_HASH_UNIQUE,
0, 0, filename_hash_get_key, &my_charset_bin);
+ filename_hash.size= file_max;
filename_hash_inited= true;
}
DBUG_RETURN(0);
@@ -604,7 +614,7 @@ void PFS_scan::init(uint random, uint max_size)
*/
PFS_mutex* create_mutex(PFS_mutex_class *klass, const void *identity)
{
- static uint mutex_monotonic_index= 0;
+ static uint PFS_ALIGNED mutex_monotonic_index= 0;
uint index;
uint attempts= 0;
PFS_mutex *pfs;
@@ -642,8 +652,7 @@ PFS_mutex* create_mutex(PFS_mutex_class *klass, const void *identity)
pfs->m_class= klass;
pfs->m_enabled= klass->m_enabled && flag_global_instrumentation;
pfs->m_timed= klass->m_timed;
- pfs->m_wait_stat.reset();
- pfs->m_lock_stat.reset();
+ pfs->m_mutex_stat.reset();
pfs->m_owner= NULL;
pfs->m_last_locked= 0;
pfs->m_lock.dirty_to_allocated();
@@ -667,10 +676,9 @@ void destroy_mutex(PFS_mutex *pfs)
DBUG_ENTER("destroy_mutex");
DBUG_ASSERT(pfs != NULL);
PFS_mutex_class *klass= pfs->m_class;
- /* Aggregate to EVENTS_WAITS_SUMMARY_BY_EVENT_NAME */
- uint index= klass->m_event_name_index;
- global_instr_class_waits_array[index].aggregate(& pfs->m_wait_stat);
- pfs->m_wait_stat.reset();
+ /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME */
+ klass->m_mutex_stat.aggregate(& pfs->m_mutex_stat);
+ pfs->m_mutex_stat.reset();
if (klass->is_singleton())
klass->m_singleton= NULL;
pfs->m_lock.allocated_to_free();
@@ -685,7 +693,7 @@ void destroy_mutex(PFS_mutex *pfs)
*/
PFS_rwlock* create_rwlock(PFS_rwlock_class *klass, const void *identity)
{
- static uint rwlock_monotonic_index= 0;
+ static uint PFS_ALIGNED rwlock_monotonic_index= 0;
uint index;
uint attempts= 0;
PFS_rwlock *pfs;
@@ -705,10 +713,8 @@ PFS_rwlock* create_rwlock(PFS_rwlock_class *klass, const void *identity)
pfs->m_class= klass;
pfs->m_enabled= klass->m_enabled && flag_global_instrumentation;
pfs->m_timed= klass->m_timed;
- pfs->m_wait_stat.reset();
+ pfs->m_rwlock_stat.reset();
pfs->m_lock.dirty_to_allocated();
- pfs->m_read_lock_stat.reset();
- pfs->m_write_lock_stat.reset();
pfs->m_writer= NULL;
pfs->m_readers= 0;
pfs->m_last_written= 0;
@@ -733,10 +739,9 @@ void destroy_rwlock(PFS_rwlock *pfs)
DBUG_ENTER("destroy_rwlock");
DBUG_ASSERT(pfs != NULL);
PFS_rwlock_class *klass= pfs->m_class;
- /* Aggregate to EVENTS_WAITS_SUMMARY_BY_EVENT_NAME */
- uint index= klass->m_event_name_index;
- global_instr_class_waits_array[index].aggregate(& pfs->m_wait_stat);
- pfs->m_wait_stat.reset();
+ /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME */
+ klass->m_rwlock_stat.aggregate(& pfs->m_rwlock_stat);
+ pfs->m_rwlock_stat.reset();
if (klass->is_singleton())
klass->m_singleton= NULL;
pfs->m_lock.allocated_to_free();
@@ -751,7 +756,7 @@ void destroy_rwlock(PFS_rwlock *pfs)
*/
PFS_cond* create_cond(PFS_cond_class *klass, const void *identity)
{
- static uint cond_monotonic_index= 0;
+ static uint PFS_ALIGNED cond_monotonic_index= 0;
uint index;
uint attempts= 0;
PFS_cond *pfs;
@@ -796,9 +801,8 @@ void destroy_cond(PFS_cond *pfs)
DBUG_ASSERT(pfs != NULL);
PFS_cond_class *klass= pfs->m_class;
- /* Aggregate to EVENTS_WAITS_SUMMARY_BY_EVENT_NAME */
- uint index= klass->m_event_name_index;
- global_instr_class_waits_array[index].aggregate(& pfs->m_wait_stat);
+ /* Aggregate to EVENTS_WAITS_SUMMARY_GLOBAL_BY_EVENT_NAME */
+ klass->m_cond_stat.aggregate(& pfs->m_cond_stat);
pfs->m_wait_stat.reset();
if (klass->is_singleton())
klass->m_singleton= NULL;
@@ -812,19 +816,32 @@ PFS_thread* PFS_thread::get_current_thread()
return pfs;
}
+void PFS_thread::reset_session_connect_attrs()
+{
+ m_session_connect_attrs_length= 0;
+ m_session_connect_attrs_cs= NULL;
+
+ if ((m_session_connect_attrs != NULL) &&
+ (session_connect_attrs_size_per_thread > 0) )
+ {
+ /* Do not keep user data */
+ memset(m_session_connect_attrs, 0, session_connect_attrs_size_per_thread);
+ }
+}
+
/**
Create instrumentation for a thread instance.
@param klass the thread class
@param identity the thread address,
or a value characteristic of this thread
- @param thread_id the PROCESSLIST thread id,
+ @param processlist_id the PROCESSLIST id,
or 0 if unknown
@return a thread instance, or NULL
*/
PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
- ulong thread_id)
+ ulonglong processlist_id)
{
- static uint thread_monotonic_index= 0;
+ static uint PFS_ALIGNED thread_monotonic_index= 0;
uint index;
uint attempts= 0;
PFS_thread *pfs;
@@ -841,9 +858,9 @@ PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
if (pfs->m_lock.free_to_dirty())
{
pfs->m_thread_internal_id=
- PFS_atomic::add_u32(&thread_internal_id_counter, 1);
+ PFS_atomic::add_u64(&thread_internal_id_counter, 1);
pfs->m_parent_thread_internal_id= 0;
- pfs->m_thread_id= thread_id;
+ pfs->m_processlist_id= processlist_id;
pfs->m_event_id= 1;
pfs->m_enabled= true;
pfs->m_class= klass;
@@ -856,6 +873,7 @@ PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
pfs->m_statements_history_index= 0;
pfs->reset_stats();
+ pfs->reset_session_connect_attrs();
pfs->m_filename_hash_pins= NULL;
pfs->m_table_share_hash_pins= NULL;
@@ -871,8 +889,11 @@ PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
pfs->m_dbname_length= 0;
pfs->m_command= 0;
pfs->m_start_time= 0;
+ pfs->m_processlist_state_ptr= NULL;
pfs->m_processlist_state_length= 0;
+ pfs->m_processlist_info_ptr= NULL;
pfs->m_processlist_info_length= 0;
+ pfs->m_processlist_lock.set_allocated();
pfs->m_host= NULL;
pfs->m_user= NULL;
@@ -999,6 +1020,7 @@ PFS_socket *sanitize_socket(PFS_socket *unsafe)
void destroy_thread(PFS_thread *pfs)
{
DBUG_ASSERT(pfs != NULL);
+ pfs->reset_session_connect_attrs();
if (pfs->m_account != NULL)
{
pfs->m_account->release();
@@ -1084,11 +1106,12 @@ LF_PINS* get_filename_hash_pins(PFS_thread *thread)
@param klass the file class
@param filename the file name
@param len the length in bytes of filename
+ @param create create a file instance if none found
@return a file instance, or NULL
*/
PFS_file*
find_or_create_file(PFS_thread *thread, PFS_file_class *klass,
- const char *filename, uint len)
+ const char *filename, uint len, bool create)
{
PFS_file *pfs;
LF_PINS *pins;
@@ -1096,6 +1119,8 @@ find_or_create_file(PFS_thread *thread, PFS_file_class *klass,
const char *safe_filename;
DBUG_ENTER("find_or_create_file");
+ DBUG_ASSERT(klass != NULL || ! create);
+
pins= get_filename_hash_pins(thread);
if (unlikely(pins == NULL))
{
@@ -1171,7 +1196,7 @@ find_or_create_file(PFS_thread *thread, PFS_file_class *klass,
/* Append the unresolved file name to the resolved path */
char *ptr= buffer + strlen(buffer);
char *buf_end= &buffer[sizeof(buffer)-1];
- if (buf_end > ptr)
+ if ((buf_end > ptr) && (*(ptr-1) != FN_LIBCHAR))
*ptr++= FN_LIBCHAR;
if (buf_end > ptr)
strncpy(ptr, safe_filename + dirlen, buf_end - ptr);
@@ -1183,7 +1208,7 @@ find_or_create_file(PFS_thread *thread, PFS_file_class *klass,
PFS_file **entry;
uint retry_count= 0;
const uint retry_max= 3;
- static uint file_monotonic_index= 0;
+ static uint PFS_ALIGNED file_monotonic_index= 0;
uint index;
uint attempts= 0;
@@ -1202,6 +1227,12 @@ search:
lf_hash_search_unpin(pins);
+ if (! create)
+ {
+ /* No lost counter, just looking for the file existence. */
+ return NULL;
+ }
+
while (++attempts <= file_max)
{
/* See create_mutex() */
@@ -1218,7 +1249,6 @@ search:
strncpy(pfs->m_filename, normalized_filename, normalized_length);
pfs->m_filename[normalized_length]= '\0';
pfs->m_filename_length= normalized_length;
- pfs->m_wait_stat.reset();
pfs->m_file_stat.m_open_count= 1;
pfs->m_file_stat.m_io_stat.reset();
pfs->m_identity= (const void *)pfs;
@@ -1285,14 +1315,9 @@ void destroy_file(PFS_thread *thread, PFS_file *pfs)
DBUG_ASSERT(pfs != NULL);
PFS_file_class *klass= pfs->m_class;
- /* Aggregate to EVENTS_WAITS_SUMMARY_BY_EVENT_NAME */
- uint index= klass->m_event_name_index;
- global_instr_class_waits_array[index].aggregate(& pfs->m_wait_stat);
- pfs->m_wait_stat.reset();
-
/* Aggregate to FILE_SUMMARY_BY_EVENT_NAME */
- klass->m_file_stat.m_io_stat.aggregate(& pfs->m_file_stat.m_io_stat);
- pfs->m_file_stat.m_io_stat.reset();
+ klass->m_file_stat.aggregate(& pfs->m_file_stat);
+ pfs->m_file_stat.reset();
if (klass->is_singleton())
klass->m_singleton= NULL;
@@ -1318,7 +1343,7 @@ void destroy_file(PFS_thread *thread, PFS_file *pfs)
PFS_table* create_table(PFS_table_share *share, PFS_thread *opening_thread,
const void *identity)
{
- static uint table_monotonic_index= 0;
+ static uint PFS_ALIGNED table_monotonic_index= 0;
uint index;
uint attempts= 0;
PFS_table *pfs;
@@ -1364,23 +1389,33 @@ void PFS_table::sanitized_aggregate(void)
and not own the table handle.
*/
PFS_table_share *safe_share= sanitize_table_share(m_share);
- PFS_thread *safe_thread= sanitize_thread(m_thread_owner);
- if ((safe_share != NULL && safe_thread != NULL) &&
- (m_has_io_stats || m_has_lock_stats))
+ if (safe_share != NULL)
{
- safe_aggregate(& m_table_stat, safe_share, safe_thread);
- m_has_io_stats= false;
- m_has_lock_stats= false;
+ if (m_has_io_stats && m_has_lock_stats)
+ {
+ safe_aggregate(& m_table_stat, safe_share);
+ m_has_io_stats= false;
+ m_has_lock_stats= false;
+ }
+ else if (m_has_io_stats)
+ {
+ safe_aggregate_io(& m_table_stat, safe_share);
+ m_has_io_stats= false;
+ }
+ else if (m_has_lock_stats)
+ {
+ safe_aggregate_lock(& m_table_stat, safe_share);
+ m_has_lock_stats= false;
+ }
}
}
void PFS_table::sanitized_aggregate_io(void)
{
PFS_table_share *safe_share= sanitize_table_share(m_share);
- PFS_thread *safe_thread= sanitize_thread(m_thread_owner);
- if (safe_share != NULL && safe_thread != NULL && m_has_io_stats)
+ if (safe_share != NULL && m_has_io_stats)
{
- safe_aggregate_io(& m_table_stat, safe_share, safe_thread);
+ safe_aggregate_io(& m_table_stat, safe_share);
m_has_io_stats= false;
}
}
@@ -1388,96 +1423,44 @@ void PFS_table::sanitized_aggregate_io(void)
void PFS_table::sanitized_aggregate_lock(void)
{
PFS_table_share *safe_share= sanitize_table_share(m_share);
- PFS_thread *safe_thread= sanitize_thread(m_thread_owner);
- if (safe_share != NULL && safe_thread != NULL && m_has_lock_stats)
+ if (safe_share != NULL && m_has_lock_stats)
{
- safe_aggregate_lock(& m_table_stat, safe_share, safe_thread);
+ safe_aggregate_lock(& m_table_stat, safe_share);
m_has_lock_stats= false;
}
}
void PFS_table::safe_aggregate(PFS_table_stat *table_stat,
- PFS_table_share *table_share,
- PFS_thread *thread)
+ PFS_table_share *table_share)
{
DBUG_ASSERT(table_stat != NULL);
DBUG_ASSERT(table_share != NULL);
- DBUG_ASSERT(thread != NULL);
-
- if (flag_thread_instrumentation && thread->m_enabled)
- {
- PFS_single_stat *event_name_array;
- uint index;
- event_name_array= thread->m_instr_class_waits_stats;
- /*
- Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
- (for wait/io/table/sql/handler)
- */
- index= global_table_io_class.m_event_name_index;
- table_stat->sum_io(& event_name_array[index]);
-
- /*
- Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
- (for wait/lock/table/sql/handler)
- */
- index= global_table_lock_class.m_event_name_index;
- table_stat->sum_lock(& event_name_array[index]);
- }
+ uint key_count= sanitize_index_count(table_share->m_key_count);
/* Aggregate to TABLE_IO_SUMMARY, TABLE_LOCK_SUMMARY */
- table_share->m_table_stat.aggregate(table_stat);
+ table_share->m_table_stat.aggregate(table_stat, key_count);
table_stat->fast_reset();
}
void PFS_table::safe_aggregate_io(PFS_table_stat *table_stat,
- PFS_table_share *table_share,
- PFS_thread *thread)
+ PFS_table_share *table_share)
{
DBUG_ASSERT(table_stat != NULL);
DBUG_ASSERT(table_share != NULL);
- DBUG_ASSERT(thread != NULL);
-
- if (flag_thread_instrumentation && thread->m_enabled)
- {
- PFS_single_stat *event_name_array;
- uint index;
- event_name_array= thread->m_instr_class_waits_stats;
- /*
- Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
- (for wait/io/table/sql/handler)
- */
- index= global_table_io_class.m_event_name_index;
- table_stat->sum_io(& event_name_array[index]);
- }
+ uint key_count= sanitize_index_count(table_share->m_key_count);
/* Aggregate to TABLE_IO_SUMMARY */
- table_share->m_table_stat.aggregate_io(table_stat);
+ table_share->m_table_stat.aggregate_io(table_stat, key_count);
table_stat->fast_reset_io();
}
void PFS_table::safe_aggregate_lock(PFS_table_stat *table_stat,
- PFS_table_share *table_share,
- PFS_thread *thread)
+ PFS_table_share *table_share)
{
DBUG_ASSERT(table_stat != NULL);
DBUG_ASSERT(table_share != NULL);
- DBUG_ASSERT(thread != NULL);
-
- if (flag_thread_instrumentation && thread->m_enabled)
- {
- PFS_single_stat *event_name_array;
- uint index;
- event_name_array= thread->m_instr_class_waits_stats;
-
- /*
- Aggregate to EVENTS_WAITS_SUMMARY_BY_THREAD_BY_EVENT_NAME
- (for wait/lock/table/sql/handler)
- */
- index= global_table_lock_class.m_event_name_index;
- table_stat->sum_lock(& event_name_array[index]);
- }
/* Aggregate to TABLE_LOCK_SUMMARY */
table_share->m_table_stat.aggregate_lock(table_stat);
@@ -1504,47 +1487,59 @@ void destroy_table(PFS_table *pfs)
@param identity the socket descriptor
@return a socket instance, or NULL
*/
-PFS_socket* create_socket(PFS_socket_class *klass, const void *identity)
+PFS_socket* create_socket(PFS_socket_class *klass, const my_socket *fd,
+ const struct sockaddr *addr, socklen_t addr_len)
{
- PFS_scan scan;
+ static uint PFS_ALIGNED socket_monotonic_index= 0;
+ uint index;
+ uint attempts= 0;
+ PFS_socket *pfs;
DBUG_ENTER("create_socket");
- /**
- Unlike other instrumented objects, there is no socket 'object' to use as a
- unique identifier. Instead, a pointer to the PFS_socket object will be used
- to identify this socket instance. The socket descriptor will be used to
- seed the the random index assignment.
- */
- my_socket fd= likely(identity != NULL) ?
- *(reinterpret_cast<const my_socket*>(identity)) : 0;
- my_ptrdiff_t ptr= fd;
- uint random= randomized_index((const void *)ptr, socket_max);
-
- for (scan.init(random, socket_max);
- scan.has_pass();
- scan.next_pass())
- {
- PFS_socket *pfs= socket_array + scan.first();
- PFS_socket *pfs_last= socket_array + scan.last();
- for ( ; pfs < pfs_last; pfs++)
+ uint fd_used= 0;
+ uint addr_len_used= addr_len;
+
+ if (fd != NULL)
+ fd_used= *fd;
+
+ if (addr_len_used > sizeof(sockaddr_storage))
+ addr_len_used= sizeof(sockaddr_storage);
+
+ while (++attempts <= socket_max)
+ {
+ index= PFS_atomic::add_u32(& socket_monotonic_index, 1) % socket_max;
+ pfs= socket_array + index;
+
+ if (pfs->m_lock.is_free())
{
- if (pfs->m_lock.is_free())
+ if (pfs->m_lock.free_to_dirty())
{
- if (pfs->m_lock.free_to_dirty())
+ pfs->m_fd= fd_used;
+ /* There is no socket object, so we use the instrumentation. */
+ pfs->m_identity= pfs;
+ pfs->m_class= klass;
+ pfs->m_enabled= klass->m_enabled && flag_global_instrumentation;
+ pfs->m_timed= klass->m_timed;
+ pfs->m_idle= false;
+ pfs->m_socket_stat.reset();
+ pfs->m_thread_owner= NULL;
+
+ pfs->m_addr_len= addr_len_used;
+ if ((addr != NULL) && (addr_len_used > 0))
{
- pfs->m_fd= fd;
- pfs->m_identity= pfs;
- pfs->m_class= klass;
- pfs->m_enabled= klass->m_enabled && flag_global_instrumentation;
- pfs->m_timed= klass->m_timed;
- pfs->m_idle= false;
- pfs->m_socket_stat.reset();
- pfs->m_lock.dirty_to_allocated();
- pfs->m_thread_owner= NULL;
- if (klass->is_singleton())
- klass->m_singleton= pfs;
- DBUG_RETURN(pfs);
+ pfs->m_addr_len= addr_len_used;
+ memcpy(&pfs->m_sock_addr, addr, addr_len_used);
}
+ else
+ {
+ pfs->m_addr_len= 0;
+ }
+
+ pfs->m_lock.dirty_to_allocated();
+
+ if (klass->is_singleton())
+ klass->m_singleton= pfs;
+ DBUG_RETURN(pfs);
}
}
}
@@ -1598,7 +1593,7 @@ static void reset_mutex_waits_by_instance(void)
DBUG_ENTER("reset_mutex_waits_by_instance");
for ( ; pfs < pfs_last; pfs++)
- pfs->m_wait_stat.reset();
+ pfs->m_mutex_stat.reset();
DBUG_VOID_RETURN;
}
@@ -1609,7 +1604,7 @@ static void reset_rwlock_waits_by_instance(void)
DBUG_ENTER("reset_rwlock_waits_by_instance");
for ( ; pfs < pfs_last; pfs++)
- pfs->m_wait_stat.reset();
+ pfs->m_rwlock_stat.reset();
DBUG_VOID_RETURN;
}
@@ -1620,7 +1615,7 @@ static void reset_cond_waits_by_instance(void)
DBUG_ENTER("reset_cond_waits_by_instance");
for ( ; pfs < pfs_last; pfs++)
- pfs->m_wait_stat.reset();
+ pfs->m_cond_stat.reset();
DBUG_VOID_RETURN;
}
@@ -1678,15 +1673,6 @@ void reset_socket_instance_io(void)
DBUG_VOID_RETURN;
}
-void reset_global_wait_stat()
-{
- PFS_single_stat *stat= global_instr_class_waits_array;
- PFS_single_stat *stat_last= global_instr_class_waits_array + wait_class_max;
-
- for ( ; stat < stat_last; stat++)
- stat->reset();
-}
-
void aggregate_all_event_names(PFS_single_stat *from_array,
PFS_single_stat *to_array)
{
diff --git a/storage/perfschema/pfs_instr.h b/storage/perfschema/pfs_instr.h
index b579c1d7902..2ea44830d2b 100644
--- a/storage/perfschema/pfs_instr.h
+++ b/storage/perfschema/pfs_instr.h
@@ -34,6 +34,8 @@ struct PFS_socket_class;
#else
#include <arpa/inet.h>
#endif
+#include "my_global.h"
+#include "my_compiler.h"
#include "pfs_lock.h"
#include "pfs_stat.h"
#include "pfs_instr_class.h"
@@ -63,24 +65,17 @@ struct PFS_instr
bool m_enabled;
/** Timed flag. */
bool m_timed;
- /** Instrument wait statistics. */
- PFS_single_stat m_wait_stat;
};
/** Instrumented mutex implementation. @see PSI_mutex. */
-struct PFS_mutex : public PFS_instr
+struct PFS_ALIGNED PFS_mutex : public PFS_instr
{
/** Mutex identity, typically a pthread_mutex_t. */
const void *m_identity;
/** Mutex class. */
PFS_mutex_class *m_class;
- /** Instrument wait statistics. */
- PFS_single_stat m_wait_stat;
- /**
- Mutex lock usage statistics.
- This statistic is not exposed in user visible tables yet.
- */
- PFS_single_stat m_lock_stat;
+ /** Instrument statistics. */
+ PFS_mutex_stat m_mutex_stat;
/** Current owner. */
PFS_thread *m_owner;
/**
@@ -91,24 +86,14 @@ struct PFS_mutex : public PFS_instr
};
/** Instrumented rwlock implementation. @see PSI_rwlock. */
-struct PFS_rwlock : public PFS_instr
+struct PFS_ALIGNED PFS_rwlock : public PFS_instr
{
/** RWLock identity, typically a pthread_rwlock_t. */
const void *m_identity;
/** RWLock class. */
PFS_rwlock_class *m_class;
- /** Instrument wait statistics. */
- PFS_single_stat m_wait_stat;
- /**
- RWLock read lock usage statistics.
- This statistic is not exposed in user visible tables yet.
- */
- PFS_single_stat m_read_lock_stat;
- /**
- RWLock write lock usage statistics.
- This statistic is not exposed in user visible tables yet.
- */
- PFS_single_stat m_write_lock_stat;
+ /** Instrument statistics. */
+ PFS_rwlock_stat m_rwlock_stat;
/** Current writer thread. */
PFS_thread *m_writer;
/** Current count of readers. */
@@ -126,7 +111,7 @@ struct PFS_rwlock : public PFS_instr
};
/** Instrumented cond implementation. @see PSI_cond. */
-struct PFS_cond : public PFS_instr
+struct PFS_ALIGNED PFS_cond : public PFS_instr
{
/** Condition identity, typically a pthread_cond_t. */
const void *m_identity;
@@ -139,7 +124,7 @@ struct PFS_cond : public PFS_instr
};
/** Instrumented File and FILE implementation. @see PSI_file. */
-struct PFS_file : public PFS_instr
+struct PFS_ALIGNED PFS_file : public PFS_instr
{
uint32 get_version()
{ return m_lock.get_version(); }
@@ -152,14 +137,12 @@ struct PFS_file : public PFS_instr
uint m_filename_length;
/** File class. */
PFS_file_class *m_class;
- /** Instrument wait statistics. */
- PFS_single_stat m_wait_stat;
/** File usage statistics. */
PFS_file_stat m_file_stat;
};
/** Instrumented table implementation. @see PSI_table. */
-struct PFS_table
+struct PFS_ALIGNED PFS_table
{
/**
True if table io instrumentation is enabled.
@@ -196,12 +179,22 @@ public:
*/
void aggregate(void)
{
- if (likely((m_thread_owner != NULL) && (m_has_io_stats || m_has_lock_stats)))
+ if (m_has_io_stats && m_has_lock_stats)
{
- safe_aggregate(& m_table_stat, m_share, m_thread_owner);
+ safe_aggregate(& m_table_stat, m_share);
m_has_io_stats= false;
m_has_lock_stats= false;
}
+ else if (m_has_io_stats)
+ {
+ safe_aggregate_io(& m_table_stat, m_share);
+ m_has_io_stats= false;
+ }
+ else if (m_has_lock_stats)
+ {
+ safe_aggregate_lock(& m_table_stat, m_share);
+ m_has_lock_stats= false;
+ }
}
/**
@@ -238,18 +231,15 @@ public:
private:
static void safe_aggregate(PFS_table_stat *stat,
- PFS_table_share *safe_share,
- PFS_thread *safe_thread);
+ PFS_table_share *safe_share);
static void safe_aggregate_io(PFS_table_stat *stat,
- PFS_table_share *safe_share,
- PFS_thread *safe_thread);
+ PFS_table_share *safe_share);
static void safe_aggregate_lock(PFS_table_stat *stat,
- PFS_table_share *safe_share,
- PFS_thread *safe_thread);
+ PFS_table_share *safe_share);
};
/** Instrumented socket implementation. @see PSI_socket. */
-struct PFS_socket : public PFS_instr
+struct PFS_ALIGNED PFS_socket : public PFS_instr
{
uint32 get_version()
{ return m_lock.get_version(); }
@@ -371,7 +361,7 @@ private:
/** Instrumented thread implementation. @see PSI_thread. */
-struct PFS_thread : PFS_connection_slice
+struct PFS_ALIGNED PFS_thread : PFS_connection_slice
{
static PFS_thread* get_current_thread(void);
@@ -400,11 +390,11 @@ struct PFS_thread : PFS_connection_slice
/** Pins for digest_hash. */
LF_PINS *m_digest_hash_pins;
/** Internal thread identifier, unique. */
- ulong m_thread_internal_id;
+ ulonglong m_thread_internal_id;
/** Parent internal thread identifier. */
- ulong m_parent_thread_internal_id;
+ ulonglong m_parent_thread_internal_id;
/** External (SHOW PROCESSLIST) thread identifier, not unique. */
- ulong m_thread_id;
+ ulong m_processlist_id;
/** Thread class. */
PFS_thread_class *m_class;
/**
@@ -486,6 +476,8 @@ struct PFS_thread : PFS_connection_slice
int m_command;
/** Start time. */
time_t m_start_time;
+ /** Lock for Processlist state, Processlist info. */
+ pfs_lock m_processlist_lock;
/** Processlist state. */
const char *m_processlist_state_ptr;
/** Length of @c m_processlist_state_ptr. */
@@ -504,9 +496,18 @@ struct PFS_thread : PFS_connection_slice
PFS_host *m_host;
PFS_user *m_user;
PFS_account *m_account;
+
+ /** Reset session connect attributes */
+ void reset_session_connect_attrs();
+
+ /** a buffer for the connection attributes */
+ char *m_session_connect_attrs;
+ /** length used by @c m_connect_attrs */
+ uint m_session_connect_attrs_length;
+ /** character set in which @c m_connect_attrs are encoded */
+ const CHARSET_INFO *m_session_connect_attrs_cs;
};
-extern PFS_single_stat *global_instr_class_waits_array;
extern PFS_stage_stat *global_instr_class_stages_array;
extern PFS_statement_stat *global_instr_class_statements_array;
@@ -529,12 +530,12 @@ PFS_cond* create_cond(PFS_cond_class *klass, const void *identity);
void destroy_cond(PFS_cond *pfs);
PFS_thread* create_thread(PFS_thread_class *klass, const void *identity,
- ulong thread_id);
+ ulonglong processlist_id);
void destroy_thread(PFS_thread *pfs);
PFS_file* find_or_create_file(PFS_thread *thread, PFS_file_class *klass,
- const char *filename, uint len);
+ const char *filename, uint len, bool create);
void release_file(PFS_file *pfs);
void destroy_file(PFS_thread *thread, PFS_file *pfs);
@@ -542,7 +543,10 @@ PFS_table* create_table(PFS_table_share *share, PFS_thread *opening_thread,
const void *identity);
void destroy_table(PFS_table *pfs);
-PFS_socket* create_socket(PFS_socket_class *socket_class, const void *identity);
+PFS_socket* create_socket(PFS_socket_class *socket_class,
+ const my_socket *fd,
+ const struct sockaddr *addr,
+ socklen_t addr_len);
void destroy_socket(PFS_socket *pfs);
/* For iterators and show status. */
@@ -568,6 +572,8 @@ extern ulong events_stages_history_per_thread;
extern ulong events_statements_history_per_thread;
extern ulong locker_lost;
extern ulong statement_lost;
+extern ulong session_connect_attrs_lost;
+extern ulong session_connect_attrs_size_per_thread;
/* Exposing the data directly, for iterators. */
@@ -624,6 +630,8 @@ void update_socket_derived_flags();
/** Update derived flags for all instruments. */
void update_instruments_derived_flags();
+extern LF_HASH filename_hash;
+
/** @} */
#endif
diff --git a/storage/perfschema/pfs_instr_class.cc b/storage/perfschema/pfs_instr_class.cc
index 0a4b47404a4..05c85104a94 100644
--- a/storage/perfschema/pfs_instr_class.cc
+++ b/storage/perfschema/pfs_instr_class.cc
@@ -135,9 +135,12 @@ static PFS_thread_class *thread_class_array= NULL;
*/
PFS_table_share *table_share_array= NULL;
-PFS_instr_class global_table_io_class;
-PFS_instr_class global_table_lock_class;
-PFS_instr_class global_idle_class;
+PFS_ALIGNED PFS_single_stat global_idle_stat;
+PFS_ALIGNED PFS_table_io_stat global_table_io_stat;
+PFS_ALIGNED PFS_table_lock_stat global_table_lock_stat;
+PFS_ALIGNED PFS_instr_class global_table_io_class;
+PFS_ALIGNED PFS_instr_class global_table_lock_class;
+PFS_ALIGNED PFS_instr_class global_idle_class;
/** Class-timer map */
enum_timer_name *class_timers[] =
@@ -165,7 +168,7 @@ enum_timer_name *class_timers[] =
@sa table_share_hash_get_key
@sa get_table_share_hash_pins
*/
-static LF_HASH table_share_hash;
+LF_HASH table_share_hash;
/** True if table_share_hash is initialized. */
static bool table_share_hash_inited= false;
@@ -193,19 +196,17 @@ uint mutex_class_start= 0;
uint rwlock_class_start= 0;
uint cond_class_start= 0;
uint file_class_start= 0;
-uint table_class_start= 0;
uint wait_class_max= 0;
uint socket_class_start= 0;
void init_event_name_sizing(const PFS_global_param *param)
{
- mutex_class_start= 0;
+ mutex_class_start= 3; /* global table io, table lock, idle */
rwlock_class_start= mutex_class_start + param->m_mutex_class_sizing;
cond_class_start= rwlock_class_start + param->m_rwlock_class_sizing;
file_class_start= cond_class_start + param->m_cond_class_sizing;
socket_class_start= file_class_start + param->m_file_class_sizing;
- table_class_start= socket_class_start + param->m_socket_class_sizing;
- wait_class_max= table_class_start + 3; /* global table io, lock, idle */
+ wait_class_max= socket_class_start + param->m_socket_class_sizing;
}
void register_global_classes()
@@ -213,19 +214,19 @@ void register_global_classes()
/* Table IO class */
init_instr_class(&global_table_io_class, "wait/io/table/sql/handler", 25,
0, PFS_CLASS_TABLE_IO);
- global_table_io_class.m_event_name_index= table_class_start;
+ global_table_io_class.m_event_name_index= GLOBAL_TABLE_IO_EVENT_INDEX;
configure_instr_class(&global_table_io_class);
/* Table lock class */
init_instr_class(&global_table_lock_class, "wait/lock/table/sql/handler", 27,
0, PFS_CLASS_TABLE_LOCK);
- global_table_lock_class.m_event_name_index= table_class_start + 1;
+ global_table_lock_class.m_event_name_index= GLOBAL_TABLE_LOCK_EVENT_INDEX;
configure_instr_class(&global_table_lock_class);
/* Idle class */
init_instr_class(&global_idle_class, "idle", 4,
0, PFS_CLASS_IDLE);
- global_idle_class.m_event_name_index= table_class_start + 2;
+ global_idle_class.m_event_name_index= GLOBAL_IDLE_EVENT_INDEX;
configure_instr_class(&global_idle_class);
}
@@ -384,6 +385,7 @@ int init_table_share_hash(void)
{
lf_hash_init(&table_share_hash, sizeof(PFS_table_share*), LF_HASH_UNIQUE,
0, 0, table_share_hash_get_key, &my_charset_bin);
+ table_share_hash.size= table_share_max;
table_share_hash_inited= true;
}
return 0;
@@ -715,7 +717,7 @@ PFS_sync_key register_mutex_class(const char *name, uint name_length,
*/
entry= &mutex_class_array[index];
init_instr_class(entry, name, name_length, flags, PFS_CLASS_MUTEX);
- entry->m_lock_stat.reset();
+ entry->m_mutex_stat.reset();
entry->m_event_name_index= mutex_class_start + index;
entry->m_singleton= NULL;
entry->m_enabled= false; /* disabled by default */
@@ -781,8 +783,7 @@ PFS_sync_key register_rwlock_class(const char *name, uint name_length,
{
entry= &rwlock_class_array[index];
init_instr_class(entry, name, name_length, flags, PFS_CLASS_RWLOCK);
- entry->m_read_lock_stat.reset();
- entry->m_write_lock_stat.reset();
+ entry->m_rwlock_stat.reset();
entry->m_event_name_index= rwlock_class_start + index;
entry->m_singleton= NULL;
entry->m_enabled= false; /* disabled by default */
@@ -1193,7 +1194,7 @@ static void set_keys(PFS_table_share *pfs, const TABLE_SHARE *share)
pfs_key->m_name_length= len;
}
- pfs_key_last= pfs->m_keys + MAX_KEY;
+ pfs_key_last= pfs->m_keys + MAX_INDEXES;
for ( ; pfs_key < pfs_key_last; pfs_key++)
pfs_key->m_name_length= 0;
}
@@ -1256,7 +1257,7 @@ PFS_table_share* find_or_create_table_share(PFS_thread *thread,
const uint retry_max= 3;
bool enabled= true;
bool timed= true;
- static uint table_share_monotonic_index= 0;
+ static uint PFS_ALIGNED table_share_monotonic_index= 0;
uint index;
uint attempts= 0;
PFS_table_share *pfs;
@@ -1299,8 +1300,7 @@ search:
while (++attempts <= table_share_max)
{
/* See create_mutex() */
- PFS_atomic::add_u32(& table_share_monotonic_index, 1);
- index= table_share_monotonic_index % table_share_max;
+ index= PFS_atomic::add_u32(& table_share_monotonic_index, 1) % table_share_max;
pfs= table_share_array + index;
if (pfs->m_lock.is_free())
@@ -1353,17 +1353,28 @@ search:
void PFS_table_share::aggregate_io(void)
{
- uint index= global_table_io_class.m_event_name_index;
- PFS_single_stat *table_io_total= & global_instr_class_waits_array[index];
- m_table_stat.sum_io(table_io_total);
+ uint safe_key_count= sanitize_index_count(m_key_count);
+ PFS_table_io_stat *from_stat;
+ PFS_table_io_stat *from_stat_last;
+ PFS_table_io_stat sum_io;
+
+ /* Aggregate stats for each index, if any */
+ from_stat= & m_table_stat.m_index_stat[0];
+ from_stat_last= from_stat + safe_key_count;
+ for ( ; from_stat < from_stat_last ; from_stat++)
+ sum_io.aggregate(from_stat);
+
+ /* Aggregate stats for the table */
+ sum_io.aggregate(& m_table_stat.m_index_stat[MAX_INDEXES]);
+
+ /* Add this table stats to the global sink. */
+ global_table_io_stat.aggregate(& sum_io);
m_table_stat.fast_reset_io();
}
void PFS_table_share::aggregate_lock(void)
{
- uint index= global_table_lock_class.m_event_name_index;
- PFS_single_stat *table_lock_total= & global_instr_class_waits_array[index];
- m_table_stat.sum_lock(table_lock_total);
+ global_table_lock_stat.aggregate(& m_table_stat.m_lock_stat);
m_table_stat.fast_reset_lock();
}
@@ -1418,6 +1429,16 @@ PFS_table_share *sanitize_table_share(PFS_table_share *unsafe)
SANITIZE_ARRAY_BODY(PFS_table_share, table_share_array, table_share_max, unsafe);
}
+/** Reset the wait statistics per instrument class. */
+void reset_events_waits_by_class()
+{
+ reset_file_class_io();
+ reset_socket_class_io();
+ global_idle_stat.reset();
+ global_table_io_stat.reset();
+ global_table_lock_stat.reset();
+}
+
/** Reset the io statistics per file class. */
void reset_file_class_io(void)
{
diff --git a/storage/perfschema/pfs_instr_class.h b/storage/perfschema/pfs_instr_class.h
index bef25e76467..d0b90734b66 100644
--- a/storage/perfschema/pfs_instr_class.h
+++ b/storage/perfschema/pfs_instr_class.h
@@ -16,7 +16,10 @@
#ifndef PFS_INSTR_CLASS_H
#define PFS_INSTR_CLASS_H
+#include "my_global.h"
#include "mysql_com.h" /* NAME_LEN */
+#include "lf.h"
+#include "pfs_global.h"
/**
@file storage/perfschema/pfs_instr_class.h
@@ -112,7 +115,6 @@ extern uint mutex_class_start;
extern uint rwlock_class_start;
extern uint cond_class_start;
extern uint file_class_start;
-extern uint table_class_start;
extern uint socket_class_start;
extern uint wait_class_max;
@@ -166,13 +168,10 @@ struct PFS_instr_class
struct PFS_mutex;
/** Instrumentation metadata for a MUTEX. */
-struct PFS_mutex_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_mutex_class : public PFS_instr_class
{
- /**
- Lock statistics.
- This statistic is not exposed in user visible tables yet.
- */
- PFS_single_stat m_lock_stat;
+ /** Mutex usage statistics. */
+ PFS_mutex_stat m_mutex_stat;
/** Singleton instance. */
PFS_mutex *m_singleton;
};
@@ -180,18 +179,10 @@ struct PFS_mutex_class : public PFS_instr_class
struct PFS_rwlock;
/** Instrumentation metadata for a RWLOCK. */
-struct PFS_rwlock_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_rwlock_class : public PFS_instr_class
{
- /**
- Read lock statistics.
- This statistic is not exposed in user visible tables yet.
- */
- PFS_single_stat m_read_lock_stat;
- /**
- Write lock statistics.
- This statistic is not exposed in user visible tables yet.
- */
- PFS_single_stat m_write_lock_stat;
+ /** Rwlock usage statistics. */
+ PFS_rwlock_stat m_rwlock_stat;
/** Singleton instance. */
PFS_rwlock *m_singleton;
};
@@ -199,7 +190,7 @@ struct PFS_rwlock_class : public PFS_instr_class
struct PFS_cond;
/** Instrumentation metadata for a COND. */
-struct PFS_cond_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_cond_class : public PFS_instr_class
{
/**
Condition usage statistics.
@@ -211,7 +202,7 @@ struct PFS_cond_class : public PFS_instr_class
};
/** Instrumentation metadata of a thread. */
-struct PFS_thread_class
+struct PFS_ALIGNED PFS_thread_class
{
/** True if this thread instrument is enabled. */
bool m_enabled;
@@ -249,7 +240,7 @@ struct PFS_table_key
};
/** Instrumentation metadata for a table share. */
-struct PFS_table_share
+struct PFS_ALIGNED PFS_table_share
{
public:
uint32 get_version()
@@ -318,13 +309,31 @@ public:
/** Table statistics. */
PFS_table_stat m_table_stat;
/** Index names. */
- PFS_table_key m_keys[MAX_KEY];
+ PFS_table_key m_keys[MAX_INDEXES];
private:
/** Number of opened table handles. */
int m_refcount;
};
+/** Statistics for the IDLE instrument. */
+extern PFS_single_stat global_idle_stat;
+/** Statistics for dropped table io. */
+extern PFS_table_io_stat global_table_io_stat;
+/** Statistics for dropped table lock. */
+extern PFS_table_lock_stat global_table_lock_stat;
+
+inline uint sanitize_index_count(uint count)
+{
+ if (likely(count <= MAX_INDEXES))
+ return count;
+ return 0;
+}
+
+#define GLOBAL_TABLE_IO_EVENT_INDEX 0
+#define GLOBAL_TABLE_LOCK_EVENT_INDEX 1
+#define GLOBAL_IDLE_EVENT_INDEX 2
+
/**
Instrument controlling all table io.
This instrument is used with table SETUP_OBJECTS.
@@ -345,7 +354,7 @@ extern PFS_instr_class global_idle_class;
struct PFS_file;
/** Instrumentation metadata for a file. */
-struct PFS_file_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_file_class : public PFS_instr_class
{
/** File usage statistics. */
PFS_file_stat m_file_stat;
@@ -354,21 +363,21 @@ struct PFS_file_class : public PFS_instr_class
};
/** Instrumentation metadata for a stage. */
-struct PFS_stage_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_stage_class : public PFS_instr_class
{
/** Stage usage statistics. */
PFS_stage_stat m_stage_stat;
};
/** Instrumentation metadata for a statement. */
-struct PFS_statement_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_statement_class : public PFS_instr_class
{
};
struct PFS_socket;
/** Instrumentation metadata for a socket. */
-struct PFS_socket_class : public PFS_instr_class
+struct PFS_ALIGNED PFS_socket_class : public PFS_instr_class
{
/** Socket usage statistics. */
PFS_socket_stat m_socket_stat;
@@ -483,12 +492,15 @@ extern PFS_cond_class *cond_class_array;
extern PFS_file_class *file_class_array;
extern PFS_table_share *table_share_array;
+void reset_events_waits_by_class();
void reset_file_class_io();
void reset_socket_class_io();
/** Update derived flags for all table shares. */
void update_table_share_derived_flags(PFS_thread *thread);
+extern LF_HASH table_share_hash;
+
/** @} */
#endif
diff --git a/storage/perfschema/pfs_lock.h b/storage/perfschema/pfs_lock.h
index 65937e94ece..09efecd1c5f 100644
--- a/storage/perfschema/pfs_lock.h
+++ b/storage/perfschema/pfs_lock.h
@@ -33,7 +33,7 @@
Values of a free record should not be read by a reader.
Writers can concurrently attempt to allocate a free record.
*/
-#define PFS_LOCK_FREE 0
+#define PFS_LOCK_FREE 0x00
/**
State of a dirty record.
Values of a dirty record should not be read by a reader,
@@ -41,14 +41,18 @@
Only one writer, the writer which owns the record, should
modify the record content.
*/
-#define PFS_LOCK_DIRTY 1
+#define PFS_LOCK_DIRTY 0x01
/**
State of an allocated record.
Values of an allocated record are safe to read by a reader.
A writer may modify some but not all properties of the record:
only modifying values that can never cause the reader to crash is allowed.
*/
-#define PFS_LOCK_ALLOCATED 2
+#define PFS_LOCK_ALLOCATED 0x02
+
+#define VERSION_MASK 0xFFFFFFFC
+#define STATE_MASK 0x00000003
+#define VERSION_INC 4
/**
A 'lock' protecting performance schema internal buffers.
@@ -60,15 +64,11 @@
struct pfs_lock
{
/**
- The record internal state.
+ The record internal version and state
@sa PFS_LOCK_FREE
@sa PFS_LOCK_DIRTY
@sa PFS_LOCK_ALLOCATED
- */
- volatile int32 m_state;
- /**
- The record internal version number.
- This version number is to transform the 'ABA' problem
+ The version number is to transform the 'ABA' problem
(see http://en.wikipedia.org/wiki/ABA_problem)
into an 'A(n)BA(n + 1)' problem, where 'n' is the m_version number.
When the performance schema instrumentation deletes a record,
@@ -76,21 +76,23 @@ struct pfs_lock
the version number is incremented, so that a reader can detect that
the record was changed. Note that the version number is never
reset to zero when a new record is created.
+ The version number is stored in the high 30 bits.
+ The state is stored in the low 2 bits.
*/
- volatile uint32 m_version;
+ volatile uint32 m_version_state;
/** Returns true if the record is free. */
bool is_free(void)
{
- /* This is a dirty read */
- return (m_state == PFS_LOCK_FREE);
+ uint32 copy= m_version_state; /* non volatile copy, and dirty read */
+ return ((copy & STATE_MASK) == PFS_LOCK_FREE);
}
/** Returns true if the record contains values that can be read. */
bool is_populated(void)
{
- int32 copy= m_state; /* non volatile copy, and dirty read */
- return (copy == PFS_LOCK_ALLOCATED);
+ uint32 copy= m_version_state; /* non volatile copy, and dirty read */
+ return ((copy & STATE_MASK) == PFS_LOCK_ALLOCATED);
}
/**
@@ -101,10 +103,11 @@ struct pfs_lock
*/
bool free_to_dirty(void)
{
- int32 old_state= PFS_LOCK_FREE;
- int32 new_state= PFS_LOCK_DIRTY;
+ uint32 copy= m_version_state; /* non volatile copy, and dirty read */
+ uint32 old_val= (copy & VERSION_MASK) + PFS_LOCK_FREE;
+ uint32 new_val= (copy & VERSION_MASK) + PFS_LOCK_DIRTY;
- return (PFS_atomic::cas_32(&m_state, &old_state, new_state));
+ return (PFS_atomic::cas_u32(&m_version_state, &old_val, new_val));
}
/**
@@ -114,8 +117,13 @@ struct pfs_lock
*/
void allocated_to_dirty(void)
{
- DBUG_ASSERT(m_state == PFS_LOCK_ALLOCATED);
- PFS_atomic::store_32(&m_state, PFS_LOCK_DIRTY);
+ uint32 copy= PFS_atomic::load_u32(&m_version_state);
+ /* Make sure the record was ALLOCATED. */
+ DBUG_ASSERT((copy & STATE_MASK) == PFS_LOCK_ALLOCATED);
+ /* Keep the same version, set the DIRTY state */
+ uint32 new_val= (copy & VERSION_MASK) + PFS_LOCK_DIRTY;
+ /* We own the record, no need to use compare and swap. */
+ PFS_atomic::store_u32(&m_version_state, new_val);
}
/**
@@ -125,9 +133,26 @@ struct pfs_lock
*/
void dirty_to_allocated(void)
{
- DBUG_ASSERT(m_state == PFS_LOCK_DIRTY);
- PFS_atomic::add_u32(&m_version, 1);
- PFS_atomic::store_32(&m_state, PFS_LOCK_ALLOCATED);
+ uint32 copy= PFS_atomic::load_u32(&m_version_state);
+ /* Make sure the record was DIRTY. */
+ DBUG_ASSERT((copy & STATE_MASK) == PFS_LOCK_DIRTY);
+ /* Increment the version, set the ALLOCATED state */
+ uint32 new_val= (copy & VERSION_MASK) + VERSION_INC + PFS_LOCK_ALLOCATED;
+ PFS_atomic::store_u32(&m_version_state, new_val);
+ }
+
+ /**
+ Initialize a lock to allocated.
+ This transition should be executed by the writer that owns the record and the lock,
+ after the record is in a state ready to be read.
+ */
+ void set_allocated(void)
+ {
+ /* Do not set the version to 0, read the previous value. */
+ uint32 copy= PFS_atomic::load_u32(&m_version_state);
+ /* Increment the version, set the ALLOCATED state */
+ uint32 new_val= (copy & VERSION_MASK) + VERSION_INC + PFS_LOCK_ALLOCATED;
+ PFS_atomic::store_u32(&m_version_state, new_val);
}
/**
@@ -136,8 +161,12 @@ struct pfs_lock
*/
void dirty_to_free(void)
{
- DBUG_ASSERT(m_state == PFS_LOCK_DIRTY);
- PFS_atomic::store_32(&m_state, PFS_LOCK_FREE);
+ uint32 copy= PFS_atomic::load_u32(&m_version_state);
+ /* Make sure the record was DIRTY. */
+ DBUG_ASSERT((copy & STATE_MASK) == PFS_LOCK_DIRTY);
+ /* Keep the same version, set the FREE state */
+ uint32 new_val= (copy & VERSION_MASK) + PFS_LOCK_FREE;
+ PFS_atomic::store_u32(&m_version_state, new_val);
}
/**
@@ -153,8 +182,12 @@ struct pfs_lock
The correct assert to use here to guarantee data integrity is simply:
DBUG_ASSERT(m_state == PFS_LOCK_ALLOCATED);
*/
- DBUG_ASSERT(m_state == PFS_LOCK_ALLOCATED);
- PFS_atomic::store_32(&m_state, PFS_LOCK_FREE);
+ uint32 copy= PFS_atomic::load_u32(&m_version_state);
+ /* Make sure the record was ALLOCATED. */
+ DBUG_ASSERT(((copy & STATE_MASK) == PFS_LOCK_ALLOCATED));
+ /* Keep the same version, set the FREE state */
+ uint32 new_val= (copy & VERSION_MASK) + PFS_LOCK_FREE;
+ PFS_atomic::store_u32(&m_version_state, new_val);
}
/**
@@ -163,8 +196,7 @@ struct pfs_lock
*/
void begin_optimistic_lock(struct pfs_lock *copy)
{
- copy->m_version= PFS_atomic::load_u32(&m_version);
- copy->m_state= PFS_atomic::load_32(&m_state);
+ copy->m_version_state= PFS_atomic::load_u32(&m_version_state);
}
/**
@@ -174,19 +206,20 @@ struct pfs_lock
*/
bool end_optimistic_lock(struct pfs_lock *copy)
{
- /*
- return true if:
- - the version + state has not changed
- - and there was valid data to look at
- */
- return ((copy->m_version == PFS_atomic::load_u32(&m_version)) &&
- (copy->m_state == PFS_atomic::load_32(&m_state)) &&
- (copy->m_state == PFS_LOCK_ALLOCATED));
+ /* Check there was valid data to look at. */
+ if ((copy->m_version_state & STATE_MASK) != PFS_LOCK_ALLOCATED)
+ return false;
+
+ /* Check the version + state has not changed. */
+ if (copy->m_version_state != PFS_atomic::load_u32(&m_version_state))
+ return false;
+
+ return true;
}
uint32 get_version()
{
- return PFS_atomic::load_u32(&m_version);
+ return (PFS_atomic::load_u32(&m_version_state) & VERSION_MASK);
}
};
diff --git a/storage/perfschema/pfs_server.cc b/storage/perfschema/pfs_server.cc
index 3df0f27f652..383a46785fb 100644
--- a/storage/perfschema/pfs_server.cc
+++ b/storage/perfschema/pfs_server.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -50,11 +50,16 @@ static void cleanup_performance_schema(void);
void cleanup_instrument_config(void);
struct PSI_bootstrap*
-initialize_performance_schema(const PFS_global_param *param)
+initialize_performance_schema(PFS_global_param *param)
{
pfs_initialized= false;
PFS_table_stat::g_reset_template.reset();
+ global_idle_stat.reset();
+ global_table_io_stat.reset();
+ global_table_lock_stat.reset();
+
+ pfs_automated_sizing(param);
if (! param->m_enabled)
{
diff --git a/storage/perfschema/pfs_server.h b/storage/perfschema/pfs_server.h
index f65febdeb6d..e0c782fde58 100644
--- a/storage/perfschema/pfs_server.h
+++ b/storage/perfschema/pfs_server.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -24,96 +24,50 @@
#ifndef PFS_MAX_MUTEX_CLASS
#define PFS_MAX_MUTEX_CLASS 200
#endif
-#ifndef PFS_MAX_MUTEX
- #define PFS_MAX_MUTEX 1000000
-#endif
#ifndef PFS_MAX_RWLOCK_CLASS
#define PFS_MAX_RWLOCK_CLASS 30
#endif
-#ifndef PFS_MAX_RWLOCK
- #define PFS_MAX_RWLOCK 1000000
-#endif
#ifndef PFS_MAX_COND_CLASS
#define PFS_MAX_COND_CLASS 80
#endif
-#ifndef PFS_MAX_COND
- #define PFS_MAX_COND 1000
-#endif
#ifndef PFS_MAX_THREAD_CLASS
#define PFS_MAX_THREAD_CLASS 50
#endif
-#ifndef PFS_MAX_THREAD
- #define PFS_MAX_THREAD 1000
-#endif
#ifndef PFS_MAX_FILE_CLASS
#define PFS_MAX_FILE_CLASS 50
#endif
-#ifndef PFS_MAX_FILE
- #define PFS_MAX_FILE 10000
-#endif
#ifndef PFS_MAX_FILE_HANDLE
#define PFS_MAX_FILE_HANDLE 32768
#endif
-#ifndef PFS_MAX_SOCKETS
- #define PFS_MAX_SOCKETS 1000
-#endif
#ifndef PFS_MAX_SOCKET_CLASS
#define PFS_MAX_SOCKET_CLASS 10
#endif
-#ifndef PFS_MAX_TABLE_SHARE
- #define PFS_MAX_TABLE_SHARE 1000
-#endif
-#ifndef PFS_MAX_TABLE
- #define PFS_MAX_TABLE 10000
-#endif
-#ifndef PFS_WAITS_HISTORY_SIZE
- #define PFS_WAITS_HISTORY_SIZE 10
-#endif
-#ifndef PFS_WAITS_HISTORY_LONG_SIZE
- #define PFS_WAITS_HISTORY_LONG_SIZE 10000
-#endif
#ifndef PFS_MAX_SETUP_ACTOR
#define PFS_MAX_SETUP_ACTOR 100
#endif
#ifndef PFS_MAX_SETUP_OBJECT
#define PFS_MAX_SETUP_OBJECT 100
#endif
-#ifndef PFS_MAX_HOST
- #define PFS_MAX_HOST 100
-#endif
-#ifndef PFS_MAX_USER
- #define PFS_MAX_USER 100
-#endif
-#ifndef PFS_MAX_ACCOUNT
- #define PFS_MAX_ACCOUNT 100
-#endif
#ifndef PFS_MAX_STAGE_CLASS
#define PFS_MAX_STAGE_CLASS 150
#endif
-#ifndef PFS_STAGES_HISTORY_SIZE
- #define PFS_STAGES_HISTORY_SIZE 10
-#endif
-#ifndef PFS_STAGES_HISTORY_LONG_SIZE
- #define PFS_STAGES_HISTORY_LONG_SIZE 10000
-#endif
-#ifndef PFS_STATEMENTS_HISTORY_SIZE
- #define PFS_STATEMENTS_HISTORY_SIZE 10
-#endif
-#ifndef PFS_STATEMENTS_HISTORY_LONG_SIZE
- #define PFS_STATEMENTS_HISTORY_LONG_SIZE 10000
-#endif
#ifndef PFS_STATEMENTS_STACK_SIZE
#define PFS_STATEMENTS_STACK_SIZE 10
#endif
-#ifndef PFS_DIGEST_SIZE
- #define PFS_DIGEST_SIZE 200
-#endif
+
+struct PFS_sizing_hints
+{
+ long m_table_definition_cache;
+ long m_table_open_cache;
+ long m_max_connections;
+ long m_open_files_limit;
+};
/** Performance schema global sizing parameters. */
struct PFS_global_param
{
/** True if the performance schema is enabled. */
- bool m_enabled;
+ bool m_enabled;
/** Default values for SETUP_CONSUMERS. */
bool m_consumer_events_stages_current_enabled;
bool m_consumer_events_stages_history_enabled;
@@ -155,7 +109,7 @@ struct PFS_global_param
Maximum number of instrumented table share.
@sa table_share_lost.
*/
- ulong m_table_share_sizing;
+ long m_table_share_sizing;
/**
Maximum number of instrumented file classes.
@sa file_class_lost.
@@ -165,81 +119,86 @@ struct PFS_global_param
Maximum number of instrumented mutex instances.
@sa mutex_lost.
*/
- ulong m_mutex_sizing;
+ long m_mutex_sizing;
/**
Maximum number of instrumented rwlock instances.
@sa rwlock_lost.
*/
- ulong m_rwlock_sizing;
+ long m_rwlock_sizing;
/**
Maximum number of instrumented cond instances.
@sa cond_lost.
*/
- ulong m_cond_sizing;
+ long m_cond_sizing;
/**
Maximum number of instrumented thread instances.
@sa thread_lost.
*/
- ulong m_thread_sizing;
+ long m_thread_sizing;
/**
Maximum number of instrumented table handles.
@sa table_lost.
*/
- ulong m_table_sizing;
+ long m_table_sizing;
/**
Maximum number of instrumented file instances.
@sa file_lost.
*/
- ulong m_file_sizing;
+ long m_file_sizing;
/**
Maximum number of instrumented file handles.
@sa file_handle_lost.
*/
- ulong m_file_handle_sizing;
+ long m_file_handle_sizing;
/**
Maxium number of instrumented socket instances
@sa socket_lost
*/
- ulong m_socket_sizing;
+ long m_socket_sizing;
/**
Maximum number of instrumented socket classes.
@sa socket_class_lost.
*/
ulong m_socket_class_sizing;
/** Maximum number of rows per thread in table EVENTS_WAITS_HISTORY. */
- ulong m_events_waits_history_sizing;
+ long m_events_waits_history_sizing;
/** Maximum number of rows in table EVENTS_WAITS_HISTORY_LONG. */
- ulong m_events_waits_history_long_sizing;
+ long m_events_waits_history_long_sizing;
/** Maximum number of rows in table SETUP_ACTORS. */
ulong m_setup_actor_sizing;
/** Maximum number of rows in table SETUP_OBJECTS. */
ulong m_setup_object_sizing;
/** Maximum number of rows in table HOSTS. */
- ulong m_host_sizing;
+ long m_host_sizing;
/** Maximum number of rows in table USERS. */
- ulong m_user_sizing;
+ long m_user_sizing;
/** Maximum number of rows in table ACCOUNTS. */
- ulong m_account_sizing;
+ long m_account_sizing;
/**
Maximum number of instrumented stage classes.
@sa stage_class_lost.
*/
ulong m_stage_class_sizing;
/** Maximum number of rows per thread in table EVENTS_STAGES_HISTORY. */
- ulong m_events_stages_history_sizing;
+ long m_events_stages_history_sizing;
/** Maximum number of rows in table EVENTS_STAGES_HISTORY_LONG. */
- ulong m_events_stages_history_long_sizing;
+ long m_events_stages_history_long_sizing;
/**
Maximum number of instrumented statement classes.
@sa statement_class_lost.
*/
ulong m_statement_class_sizing;
/** Maximum number of rows per thread in table EVENTS_STATEMENT_HISTORY. */
- ulong m_events_statements_history_sizing;
+ long m_events_statements_history_sizing;
/** Maximum number of rows in table EVENTS_STATEMENTS_HISTORY_LONG. */
- ulong m_events_statements_history_long_sizing;
+ long m_events_statements_history_long_sizing;
/** Maximum number of digests to be captured */
- ulong m_digest_sizing;
+ long m_digest_sizing;
+ /** Maximum number of session attribute strings per thread */
+ long m_session_connect_attrs_sizing;
+
+ /** Sizing hints, for auto tuning. */
+ PFS_sizing_hints m_hints;
};
/**
@@ -254,7 +213,9 @@ extern PFS_global_param pfs_param;
@return A boostrap handle, or NULL.
*/
struct PSI_bootstrap*
-initialize_performance_schema(const PFS_global_param *param);
+initialize_performance_schema(PFS_global_param *param);
+
+void pfs_automated_sizing(PFS_global_param *param);
/**
Initialize the performance schema ACL.
diff --git a/storage/perfschema/pfs_setup_actor.cc b/storage/perfschema/pfs_setup_actor.cc
index a587d3643d2..943654ce1c9 100644
--- a/storage/perfschema/pfs_setup_actor.cc
+++ b/storage/perfschema/pfs_setup_actor.cc
@@ -43,7 +43,7 @@ ulong setup_actor_max;
PFS_setup_actor *setup_actor_array= NULL;
/** Hash table for setup_actor records. */
-static LF_HASH setup_actor_hash;
+LF_HASH setup_actor_hash;
/** True if @c setup_actor_hash is initialized. */
static bool setup_actor_hash_inited= false;
@@ -100,10 +100,11 @@ C_MODE_END
*/
int init_setup_actor_hash(void)
{
- if (! setup_actor_hash_inited)
+ if ((! setup_actor_hash_inited) && (setup_actor_max > 0))
{
lf_hash_init(&setup_actor_hash, sizeof(PFS_setup_actor*), LF_HASH_UNIQUE,
0, 0, setup_actor_hash_get_key, &my_charset_bin);
+ setup_actor_hash.size= setup_actor_max;
setup_actor_hash_inited= true;
}
return 0;
@@ -167,7 +168,7 @@ int insert_setup_actor(const String *user, const String *host, const String *rol
if (unlikely(pins == NULL))
return HA_ERR_OUT_OF_MEM;
- static uint setup_actor_monotonic_index= 0;
+ static uint PFS_ALIGNED setup_actor_monotonic_index= 0;
uint index;
uint attempts= 0;
PFS_setup_actor *pfs;
@@ -175,8 +176,7 @@ int insert_setup_actor(const String *user, const String *host, const String *rol
while (++attempts <= setup_actor_max)
{
/* See create_mutex() */
- PFS_atomic::add_u32(& setup_actor_monotonic_index, 1);
- index= setup_actor_monotonic_index % setup_actor_max;
+ index= PFS_atomic::add_u32(& setup_actor_monotonic_index, 1) % setup_actor_max;
pfs= setup_actor_array + index;
if (pfs->m_lock.is_free())
diff --git a/storage/perfschema/pfs_setup_actor.h b/storage/perfschema/pfs_setup_actor.h
index 8b0ee8a485c..baebd27f0ad 100644
--- a/storage/perfschema/pfs_setup_actor.h
+++ b/storage/perfschema/pfs_setup_actor.h
@@ -49,7 +49,7 @@ struct PFS_setup_actor_key
};
/** A setup_actor record. */
-struct PFS_setup_actor
+struct PFS_ALIGNED PFS_setup_actor
{
/** Internal lock. */
pfs_lock m_lock;
@@ -92,6 +92,8 @@ extern ulong setup_actor_max;
extern PFS_setup_actor *setup_actor_array;
+extern LF_HASH setup_actor_hash;
+
/** @} */
#endif
diff --git a/storage/perfschema/pfs_setup_object.cc b/storage/perfschema/pfs_setup_object.cc
index a9e9bb7881b..0ca7986e818 100644
--- a/storage/perfschema/pfs_setup_object.cc
+++ b/storage/perfschema/pfs_setup_object.cc
@@ -39,7 +39,7 @@ ulong setup_object_max;
PFS_setup_object *setup_object_array= NULL;
-static LF_HASH setup_object_hash;
+LF_HASH setup_object_hash;
static bool setup_object_hash_inited= false;
/**
@@ -95,10 +95,11 @@ C_MODE_END
*/
int init_setup_object_hash(void)
{
- if (! setup_object_hash_inited)
+ if ((! setup_object_hash_inited) && (setup_object_max > 0))
{
lf_hash_init(&setup_object_hash, sizeof(PFS_setup_object*), LF_HASH_UNIQUE,
0, 0, setup_object_hash_get_key, &my_charset_bin);
+ setup_object_hash.size= setup_object_max;
setup_object_hash_inited= true;
}
return 0;
@@ -161,7 +162,7 @@ int insert_setup_object(enum_object_type object_type, const String *schema,
if (unlikely(pins == NULL))
return HA_ERR_OUT_OF_MEM;
- static uint setup_object_monotonic_index= 0;
+ static uint PFS_ALIGNED setup_object_monotonic_index= 0;
uint index;
uint attempts= 0;
PFS_setup_object *pfs;
@@ -169,8 +170,7 @@ int insert_setup_object(enum_object_type object_type, const String *schema,
while (++attempts <= setup_object_max)
{
/* See create_mutex() */
- PFS_atomic::add_u32(& setup_object_monotonic_index, 1);
- index= setup_object_monotonic_index % setup_object_max;
+ index= PFS_atomic::add_u32(& setup_object_monotonic_index, 1) % setup_object_max;
pfs= setup_object_array + index;
if (pfs->m_lock.is_free())
diff --git a/storage/perfschema/pfs_setup_object.h b/storage/perfschema/pfs_setup_object.h
index 44d2b76c627..2615802fe01 100644
--- a/storage/perfschema/pfs_setup_object.h
+++ b/storage/perfschema/pfs_setup_object.h
@@ -45,7 +45,7 @@ struct PFS_setup_object_key
};
/** A setup_object record. */
-struct PFS_setup_object
+struct PFS_ALIGNED PFS_setup_object
{
enum_object_type get_object_type()
{
@@ -96,6 +96,8 @@ extern ulong setup_object_max;
extern PFS_setup_object *setup_object_array;
+extern LF_HASH setup_object_hash;
+
/** @} */
#endif
diff --git a/storage/perfschema/pfs_stat.h b/storage/perfschema/pfs_stat.h
index 32c462b8ba2..2a255a9e5b2 100644
--- a/storage/perfschema/pfs_stat.h
+++ b/storage/perfschema/pfs_stat.h
@@ -140,13 +140,90 @@ struct PFS_byte_stat : public PFS_single_stat
}
};
+/** Statistics for mutex usage. */
+struct PFS_mutex_stat
+{
+ /** Wait statistics. */
+ PFS_single_stat m_wait_stat;
+ /**
+ Lock statistics.
+ This statistic is not exposed in user visible tables yet.
+ */
+ PFS_single_stat m_lock_stat;
+
+ inline void aggregate(const PFS_mutex_stat *stat)
+ {
+ m_wait_stat.aggregate(&stat->m_wait_stat);
+ m_lock_stat.aggregate(&stat->m_lock_stat);
+ }
+
+ inline void reset(void)
+ {
+ m_wait_stat.reset();
+ m_lock_stat.reset();
+ }
+};
+
+/** Statistics for rwlock usage. */
+struct PFS_rwlock_stat
+{
+ /** Wait statistics. */
+ PFS_single_stat m_wait_stat;
+ /**
+ RWLock read lock usage statistics.
+ This statistic is not exposed in user visible tables yet.
+ */
+ PFS_single_stat m_read_lock_stat;
+ /**
+ RWLock write lock usage statistics.
+ This statistic is not exposed in user visible tables yet.
+ */
+ PFS_single_stat m_write_lock_stat;
+
+ inline void aggregate(const PFS_rwlock_stat *stat)
+ {
+ m_wait_stat.aggregate(&stat->m_wait_stat);
+ m_read_lock_stat.aggregate(&stat->m_read_lock_stat);
+ m_write_lock_stat.aggregate(&stat->m_write_lock_stat);
+ }
+
+ inline void reset(void)
+ {
+ m_wait_stat.reset();
+ m_read_lock_stat.reset();
+ m_write_lock_stat.reset();
+ }
+};
+
/** Statistics for COND usage. */
struct PFS_cond_stat
{
- /** Number of times a condition was signalled. */
+ /** Wait statistics. */
+ PFS_single_stat m_wait_stat;
+ /**
+ Number of times a condition was signalled.
+ This statistic is not exposed in user visible tables yet.
+ */
ulonglong m_signal_count;
- /** Number of times a condition was broadcasted. */
+ /**
+ Number of times a condition was broadcast.
+ This statistic is not exposed in user visible tables yet.
+ */
ulonglong m_broadcast_count;
+
+ inline void aggregate(const PFS_cond_stat *stat)
+ {
+ m_wait_stat.aggregate(&stat->m_wait_stat);
+ m_signal_count+= stat->m_signal_count;
+ m_broadcast_count+= stat->m_broadcast_count;
+ }
+
+ inline void reset(void)
+ {
+ m_wait_stat.reset();
+ m_signal_count= 0;
+ m_broadcast_count= 0;
+ }
};
/** Statistics for FILE IO. Used for both waits and byte counts. */
@@ -198,6 +275,11 @@ struct PFS_file_stat
/** File IO statistics. */
PFS_file_io_stat m_io_stat;
+ inline void aggregate(const PFS_file_stat *stat)
+ {
+ m_io_stat.aggregate(&stat->m_io_stat);
+ }
+
/** Reset file statistics. */
inline void reset(void)
{
@@ -329,6 +411,7 @@ struct PFS_statement_stat
/** Single table io statistic. */
struct PFS_table_io_stat
{
+ bool m_has_data;
/** FETCH statistics */
PFS_single_stat m_fetch;
/** INSERT statistics */
@@ -338,8 +421,14 @@ struct PFS_table_io_stat
/** DELETE statistics */
PFS_single_stat m_delete;
+ PFS_table_io_stat()
+ {
+ m_has_data= false;
+ }
+
inline void reset(void)
{
+ m_has_data= false;
m_fetch.reset();
m_insert.reset();
m_update.reset();
@@ -348,18 +437,25 @@ struct PFS_table_io_stat
inline void aggregate(const PFS_table_io_stat *stat)
{
- m_fetch.aggregate(&stat->m_fetch);
- m_insert.aggregate(&stat->m_insert);
- m_update.aggregate(&stat->m_update);
- m_delete.aggregate(&stat->m_delete);
+ if (stat->m_has_data)
+ {
+ m_has_data= true;
+ m_fetch.aggregate(&stat->m_fetch);
+ m_insert.aggregate(&stat->m_insert);
+ m_update.aggregate(&stat->m_update);
+ m_delete.aggregate(&stat->m_delete);
+ }
}
inline void sum(PFS_single_stat *result)
{
- result->aggregate(& m_fetch);
- result->aggregate(& m_insert);
- result->aggregate(& m_update);
- result->aggregate(& m_delete);
+ if (m_has_data)
+ {
+ result->aggregate(& m_fetch);
+ result->aggregate(& m_insert);
+ result->aggregate(& m_update);
+ result->aggregate(& m_delete);
+ }
}
};
@@ -419,10 +515,10 @@ struct PFS_table_stat
{
/**
Statistics, per index.
- Each index stat is in [0, MAX_KEY-1],
- stats when using no index are in [MAX_KEY].
+ Each index stat is in [0, MAX_INDEXES-1],
+ stats when using no index are in [MAX_INDEXES].
*/
- PFS_table_io_stat m_index_stat[MAX_KEY + 1];
+ PFS_table_io_stat m_index_stat[MAX_INDEXES + 1];
/**
Statistics, per lock type.
@@ -433,7 +529,7 @@ struct PFS_table_stat
inline void reset_io(void)
{
PFS_table_io_stat *stat= & m_index_stat[0];
- PFS_table_io_stat *stat_last= & m_index_stat[MAX_KEY + 1];
+ PFS_table_io_stat *stat_last= & m_index_stat[MAX_INDEXES + 1];
for ( ; stat < stat_last ; stat++)
stat->reset();
}
@@ -466,13 +562,25 @@ struct PFS_table_stat
memcpy(this, & g_reset_template, sizeof(*this));
}
- inline void aggregate_io(const PFS_table_stat *stat)
+ inline void aggregate_io(const PFS_table_stat *stat, uint key_count)
{
- PFS_table_io_stat *to_stat= & m_index_stat[0];
- PFS_table_io_stat *to_stat_last= & m_index_stat[MAX_KEY + 1];
- const PFS_table_io_stat *from_stat= & stat->m_index_stat[0];
+ PFS_table_io_stat *to_stat;
+ PFS_table_io_stat *to_stat_last;
+ const PFS_table_io_stat *from_stat;
+
+ DBUG_ASSERT(key_count <= MAX_INDEXES);
+
+ /* Aggregate stats for each index, if any */
+ to_stat= & m_index_stat[0];
+ to_stat_last= to_stat + key_count;
+ from_stat= & stat->m_index_stat[0];
for ( ; to_stat < to_stat_last ; from_stat++, to_stat++)
to_stat->aggregate(from_stat);
+
+ /* Aggregate stats for the table */
+ to_stat= & m_index_stat[MAX_INDEXES];
+ from_stat= & stat->m_index_stat[MAX_INDEXES];
+ to_stat->aggregate(from_stat);
}
inline void aggregate_lock(const PFS_table_stat *stat)
@@ -480,18 +588,27 @@ struct PFS_table_stat
m_lock_stat.aggregate(& stat->m_lock_stat);
}
- inline void aggregate(const PFS_table_stat *stat)
+ inline void aggregate(const PFS_table_stat *stat, uint key_count)
{
- aggregate_io(stat);
+ aggregate_io(stat, key_count);
aggregate_lock(stat);
}
- inline void sum_io(PFS_single_stat *result)
+ inline void sum_io(PFS_single_stat *result, uint key_count)
{
- PFS_table_io_stat *stat= & m_index_stat[0];
- PFS_table_io_stat *stat_last= & m_index_stat[MAX_KEY + 1];
+ PFS_table_io_stat *stat;
+ PFS_table_io_stat *stat_last;
+
+ DBUG_ASSERT(key_count <= MAX_INDEXES);
+
+ /* Sum stats for each index, if any */
+ stat= & m_index_stat[0];
+ stat_last= stat + key_count;
for ( ; stat < stat_last ; stat++)
stat->sum(result);
+
+ /* Sum stats for the table */
+ m_index_stat[MAX_INDEXES].sum(result);
}
inline void sum_lock(PFS_single_stat *result)
@@ -499,9 +616,9 @@ struct PFS_table_stat
m_lock_stat.sum(result);
}
- inline void sum(PFS_single_stat *result)
+ inline void sum(PFS_single_stat *result, uint key_count)
{
- sum_io(result);
+ sum_io(result, key_count);
sum_lock(result);
}
diff --git a/storage/perfschema/pfs_timer.cc b/storage/perfschema/pfs_timer.cc
index 3d8d2e07ce5..8c3553db2b2 100644
--- a/storage/perfschema/pfs_timer.cc
+++ b/storage/perfschema/pfs_timer.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -117,6 +117,75 @@ void init_timers(void)
to_pico_data[TIMER_NAME_TICK].m_v0= tick_v0;
to_pico_data[TIMER_NAME_TICK].m_factor= tick_to_pico;
+
+ /*
+ Depending on the platform and build options,
+ some timers may not be available.
+ Pick best replacements.
+ */
+
+ /*
+ For STAGE and STATEMENT, a timer with a fixed frequency is better.
+ The prefered timer is nanosecond, or lower resolutions.
+ */
+
+ if (nanosec_to_pico != 0)
+ {
+ /* Normal case. */
+ stage_timer= TIMER_NAME_NANOSEC;
+ statement_timer= TIMER_NAME_NANOSEC;
+ }
+ else if (microsec_to_pico != 0)
+ {
+ /* Windows. */
+ stage_timer= TIMER_NAME_MICROSEC;
+ statement_timer= TIMER_NAME_MICROSEC;
+ }
+ else if (millisec_to_pico != 0)
+ {
+ /* Robustness, no known cases. */
+ stage_timer= TIMER_NAME_MILLISEC;
+ statement_timer= TIMER_NAME_MILLISEC;
+ }
+ else if (tick_to_pico != 0)
+ {
+ /* Robustness, no known cases. */
+ stage_timer= TIMER_NAME_TICK;
+ statement_timer= TIMER_NAME_TICK;
+ }
+ else
+ {
+ /* Robustness, no known cases. */
+ stage_timer= TIMER_NAME_CYCLE;
+ statement_timer= TIMER_NAME_CYCLE;
+ }
+
+ /*
+ For IDLE, a timer with a fixed frequency is critical,
+ as the CPU clock may slow down a lot if the server is completely idle.
+ The prefered timer is microsecond, or lower resolutions.
+ */
+
+ if (microsec_to_pico != 0)
+ {
+ /* Normal case. */
+ idle_timer= TIMER_NAME_MICROSEC;
+ }
+ else if (millisec_to_pico != 0)
+ {
+ /* Robustness, no known cases. */
+ idle_timer= TIMER_NAME_MILLISEC;
+ }
+ else if (tick_to_pico != 0)
+ {
+ /* Robustness, no known cases. */
+ idle_timer= TIMER_NAME_TICK;
+ }
+ else
+ {
+ /* Robustness, no known cases. */
+ idle_timer= TIMER_NAME_CYCLE;
+ }
}
ulonglong get_timer_raw_value(enum_timer_name timer_name)
diff --git a/storage/perfschema/pfs_user.cc b/storage/perfschema/pfs_user.cc
index d7794a131a1..697b5af2f0d 100644
--- a/storage/perfschema/pfs_user.cc
+++ b/storage/perfschema/pfs_user.cc
@@ -42,7 +42,7 @@ static PFS_single_stat *user_instr_class_waits_array= NULL;
static PFS_stage_stat *user_instr_class_stages_array= NULL;
static PFS_statement_stat *user_instr_class_statements_array= NULL;
-static LF_HASH user_hash;
+LF_HASH user_hash;
static bool user_hash_inited= false;
/**
@@ -146,10 +146,11 @@ C_MODE_END
*/
int init_user_hash(void)
{
- if (! user_hash_inited)
+ if ((! user_hash_inited) && (user_max > 0))
{
lf_hash_init(&user_hash, sizeof(PFS_user*), LF_HASH_UNIQUE,
0, 0, user_hash_get_key, &my_charset_bin);
+ user_hash.size= user_max;
user_hash_inited= true;
}
return 0;
diff --git a/storage/perfschema/pfs_user.h b/storage/perfschema/pfs_user.h
index 0f937c6c927..dda7e221ca8 100644
--- a/storage/perfschema/pfs_user.h
+++ b/storage/perfschema/pfs_user.h
@@ -44,7 +44,7 @@ struct PFS_user_key
uint m_key_length;
};
-struct PFS_user : public PFS_connection_slice
+struct PFS_ALIGNED PFS_user : public PFS_connection_slice
{
public:
inline void init_refcount(void)
@@ -108,6 +108,8 @@ extern ulong user_lost;
extern PFS_user *user_array;
+extern LF_HASH user_hash;
+
/** @} */
#endif
diff --git a/storage/perfschema/pfs_visitor.cc b/storage/perfschema/pfs_visitor.cc
index fe2b16a2f76..616bc27900a 100644
--- a/storage/perfschema/pfs_visitor.cc
+++ b/storage/perfschema/pfs_visitor.cc
@@ -666,7 +666,7 @@ void PFS_connection_wait_visitor::visit_global()
it is more efficient.
*/
DBUG_ASSERT(m_index == global_idle_class.m_event_name_index);
- m_stat.aggregate(& global_instr_class_waits_array[m_index]);
+ m_stat.aggregate(& global_idle_stat);
}
void PFS_connection_wait_visitor::visit_host(PFS_host *pfs)
@@ -883,54 +883,44 @@ PFS_instance_wait_visitor::PFS_instance_wait_visitor()
PFS_instance_wait_visitor::~PFS_instance_wait_visitor()
{}
-void PFS_instance_wait_visitor::visit_mutex_class(PFS_mutex_class *pfs)
+void PFS_instance_wait_visitor::visit_mutex_class(PFS_mutex_class *pfs)
{
- uint index= pfs->m_event_name_index;
- m_stat.aggregate(& global_instr_class_waits_array[index]);
+ m_stat.aggregate(&pfs->m_mutex_stat.m_wait_stat);
}
-void PFS_instance_wait_visitor::visit_rwlock_class(PFS_rwlock_class *pfs)
+void PFS_instance_wait_visitor::visit_rwlock_class(PFS_rwlock_class *pfs)
{
- uint index= pfs->m_event_name_index;
- m_stat.aggregate(& global_instr_class_waits_array[index]);
+ m_stat.aggregate(&pfs->m_rwlock_stat.m_wait_stat);
}
-void PFS_instance_wait_visitor::visit_cond_class(PFS_cond_class *pfs)
+void PFS_instance_wait_visitor::visit_cond_class(PFS_cond_class *pfs)
{
- uint index= pfs->m_event_name_index;
- m_stat.aggregate(& global_instr_class_waits_array[index]);
+ m_stat.aggregate(&pfs->m_cond_stat.m_wait_stat);
}
-void PFS_instance_wait_visitor::visit_file_class(PFS_file_class *pfs)
+void PFS_instance_wait_visitor::visit_file_class(PFS_file_class *pfs)
{
- uint index= pfs->m_event_name_index;
- m_stat.aggregate(& global_instr_class_waits_array[index]);
+ pfs->m_file_stat.m_io_stat.sum_waits(&m_stat);
}
-void PFS_instance_wait_visitor::visit_socket_class(PFS_socket_class *pfs)
+void PFS_instance_wait_visitor::visit_socket_class(PFS_socket_class *pfs)
{
- /* Collect global wait stats */
- uint index= pfs->m_event_name_index;
- m_stat.aggregate(&global_instr_class_waits_array[index]);
-
- /* If deferred, then pull wait stats directly from the socket class. */
- if (pfs->is_deferred())
- pfs->m_socket_stat.m_io_stat.sum_waits(&m_stat);
+ pfs->m_socket_stat.m_io_stat.sum_waits(&m_stat);
}
-void PFS_instance_wait_visitor::visit_mutex(PFS_mutex *pfs)
+void PFS_instance_wait_visitor::visit_mutex(PFS_mutex *pfs)
{
- m_stat.aggregate(& pfs->m_wait_stat);
+ m_stat.aggregate(& pfs->m_mutex_stat.m_wait_stat);
}
-void PFS_instance_wait_visitor::visit_rwlock(PFS_rwlock *pfs)
+void PFS_instance_wait_visitor::visit_rwlock(PFS_rwlock *pfs)
{
- m_stat.aggregate(& pfs->m_wait_stat);
+ m_stat.aggregate(& pfs->m_rwlock_stat.m_wait_stat);
}
-void PFS_instance_wait_visitor::visit_cond(PFS_cond *pfs)
+void PFS_instance_wait_visitor::visit_cond(PFS_cond *pfs)
{
- m_stat.aggregate(& pfs->m_wait_stat);
+ m_stat.aggregate(& pfs->m_cond_stat.m_wait_stat);
}
void PFS_instance_wait_visitor::visit_file(PFS_file *pfs)
@@ -959,23 +949,24 @@ PFS_object_wait_visitor::~PFS_object_wait_visitor()
void PFS_object_wait_visitor::visit_global()
{
- uint index;
-
- index= global_table_io_class.m_event_name_index;
- m_stat.aggregate(& global_instr_class_waits_array[index]);
-
- index= global_table_lock_class.m_event_name_index;
- m_stat.aggregate(& global_instr_class_waits_array[index]);
+ global_table_io_stat.sum(& m_stat);
+ global_table_lock_stat.sum(& m_stat);
}
void PFS_object_wait_visitor::visit_table_share(PFS_table_share *pfs)
{
- pfs->m_table_stat.sum(& m_stat);
+ uint safe_key_count= sanitize_index_count(pfs->m_key_count);
+ pfs->m_table_stat.sum(& m_stat, safe_key_count);
}
void PFS_object_wait_visitor::visit_table(PFS_table *pfs)
{
- pfs->m_table_stat.sum(& m_stat);
+ PFS_table_share *table_share= sanitize_table_share(pfs->m_share);
+ if (table_share != NULL)
+ {
+ uint safe_key_count= sanitize_index_count(table_share->m_key_count);
+ pfs->m_table_stat.sum(& m_stat, safe_key_count);
+ }
}
PFS_table_io_wait_visitor::PFS_table_io_wait_visitor()
@@ -986,21 +977,21 @@ PFS_table_io_wait_visitor::~PFS_table_io_wait_visitor()
void PFS_table_io_wait_visitor::visit_global()
{
- uint index= global_table_io_class.m_event_name_index;
- m_stat.aggregate(& global_instr_class_waits_array[index]);
+ global_table_io_stat.sum(& m_stat);
}
void PFS_table_io_wait_visitor::visit_table_share(PFS_table_share *pfs)
{
PFS_table_io_stat io_stat;
+ uint safe_key_count= sanitize_index_count(pfs->m_key_count);
uint index;
/* Aggregate index stats */
- for (index= 0; index < pfs->m_key_count; index++)
+ for (index= 0; index < safe_key_count; index++)
io_stat.aggregate(& pfs->m_table_stat.m_index_stat[index]);
/* Aggregate global stats */
- io_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_KEY]);
+ io_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_INDEXES]);
io_stat.sum(& m_stat);
}
@@ -1012,14 +1003,15 @@ void PFS_table_io_wait_visitor::visit_table(PFS_table *pfs)
if (likely(safe_share != NULL))
{
PFS_table_io_stat io_stat;
+ uint safe_key_count= sanitize_index_count(safe_share->m_key_count);
uint index;
/* Aggregate index stats */
- for (index= 0; index < safe_share->m_key_count; index++)
+ for (index= 0; index < safe_key_count; index++)
io_stat.aggregate(& pfs->m_table_stat.m_index_stat[index]);
/* Aggregate global stats */
- io_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_KEY]);
+ io_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_INDEXES]);
io_stat.sum(& m_stat);
}
@@ -1035,14 +1027,15 @@ PFS_table_io_stat_visitor::~PFS_table_io_stat_visitor()
void PFS_table_io_stat_visitor::visit_table_share(PFS_table_share *pfs)
{
+ uint safe_key_count= sanitize_index_count(pfs->m_key_count);
uint index;
/* Aggregate index stats */
- for (index= 0; index < pfs->m_key_count; index++)
+ for (index= 0; index < safe_key_count; index++)
m_stat.aggregate(& pfs->m_table_stat.m_index_stat[index]);
/* Aggregate global stats */
- m_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_KEY]);
+ m_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_INDEXES]);
}
void PFS_table_io_stat_visitor::visit_table(PFS_table *pfs)
@@ -1051,14 +1044,15 @@ void PFS_table_io_stat_visitor::visit_table(PFS_table *pfs)
if (likely(safe_share != NULL))
{
+ uint safe_key_count= sanitize_index_count(safe_share->m_key_count);
uint index;
/* Aggregate index stats */
- for (index= 0; index < safe_share->m_key_count; index++)
+ for (index= 0; index < safe_key_count; index++)
m_stat.aggregate(& pfs->m_table_stat.m_index_stat[index]);
/* Aggregate global stats */
- m_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_KEY]);
+ m_stat.aggregate(& pfs->m_table_stat.m_index_stat[MAX_INDEXES]);
}
}
@@ -1090,8 +1084,7 @@ PFS_table_lock_wait_visitor::~PFS_table_lock_wait_visitor()
void PFS_table_lock_wait_visitor::visit_global()
{
- uint index= global_table_lock_class.m_event_name_index;
- m_stat.aggregate(& global_instr_class_waits_array[index]);
+ global_table_lock_stat.sum(& m_stat);
}
void PFS_table_lock_wait_visitor::visit_table_share(PFS_table_share *pfs)
diff --git a/storage/perfschema/table_esgs_by_thread_by_event_name.cc b/storage/perfschema/table_esgs_by_thread_by_event_name.cc
index 2a69ec24277..eeef6c3fbb2 100644
--- a/storage/perfschema/table_esgs_by_thread_by_event_name.cc
+++ b/storage/perfschema/table_esgs_by_thread_by_event_name.cc
@@ -33,7 +33,7 @@ static const TABLE_FIELD_TYPE field_types[]=
{
{
{ C_STRING_WITH_LEN("THREAD_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
},
{
@@ -212,7 +212,7 @@ int table_esgs_by_thread_by_event_name
switch(f->field_index)
{
case 0: /* THREAD_ID */
- set_field_ulong(f, m_row.m_thread_internal_id);
+ set_field_ulonglong(f, m_row.m_thread_internal_id);
break;
case 1: /* NAME */
m_row.m_event_name.set_field(f);
diff --git a/storage/perfschema/table_esgs_by_thread_by_event_name.h b/storage/perfschema/table_esgs_by_thread_by_event_name.h
index 049c8997396..5295a9eacdf 100644
--- a/storage/perfschema/table_esgs_by_thread_by_event_name.h
+++ b/storage/perfschema/table_esgs_by_thread_by_event_name.h
@@ -39,7 +39,7 @@
struct row_esgs_by_thread_by_event_name
{
/** Column THREAD_ID. */
- ulong m_thread_internal_id;
+ ulonglong m_thread_internal_id;
/** Column EVENT_NAME. */
PFS_event_name_row m_event_name;
/** Columns COUNT_STAR, SUM/MIN/AVG/MAX TIMER_WAIT. */
diff --git a/storage/perfschema/table_esgs_global_by_event_name.cc b/storage/perfschema/table_esgs_global_by_event_name.cc
index 2ac22fb1551..276ac8d7704 100644
--- a/storage/perfschema/table_esgs_global_by_event_name.cc
+++ b/storage/perfschema/table_esgs_global_by_event_name.cc
@@ -95,6 +95,9 @@ int
table_esgs_global_by_event_name::delete_all_rows(void)
{
reset_events_stages_by_thread();
+ reset_events_stages_by_account();
+ reset_events_stages_by_user();
+ reset_events_stages_by_host();
reset_events_stages_global();
return 0;
}
diff --git a/storage/perfschema/table_esms_by_digest.cc b/storage/perfschema/table_esms_by_digest.cc
index dac8d3b01dc..d0250c14e5d 100644
--- a/storage/perfschema/table_esms_by_digest.cc
+++ b/storage/perfschema/table_esms_by_digest.cc
@@ -36,6 +36,11 @@ THR_LOCK table_esms_by_digest::m_table_lock;
static const TABLE_FIELD_TYPE field_types[]=
{
{
+ { C_STRING_WITH_LEN("SCHEMA_NAME") },
+ { C_STRING_WITH_LEN("varchar(64)") },
+ { NULL, 0}
+ },
+ {
{ C_STRING_WITH_LEN("DIGEST") },
{ C_STRING_WITH_LEN("varchar(32)") },
{ NULL, 0}
@@ -45,7 +50,7 @@ static const TABLE_FIELD_TYPE field_types[]=
{ C_STRING_WITH_LEN("longtext") },
{ NULL, 0}
},
- {
+ {
{ C_STRING_WITH_LEN("COUNT_STAR") },
{ C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
@@ -170,7 +175,7 @@ static const TABLE_FIELD_TYPE field_types[]=
{ C_STRING_WITH_LEN("timestamp") },
{ NULL, 0}
},
- {
+ {
{ C_STRING_WITH_LEN("LAST_SEEN") },
{ C_STRING_WITH_LEN("timestamp") },
{ NULL, 0}
@@ -179,7 +184,7 @@ static const TABLE_FIELD_TYPE field_types[]=
TABLE_FIELD_DEF
table_esms_by_digest::m_field_def=
-{ 28, field_types };
+{ 29, field_types };
PFS_engine_table_share
table_esms_by_digest::m_share=
@@ -303,18 +308,19 @@ int table_esms_by_digest
{
switch(f->field_index)
{
- case 0: /* DIGEST */
- case 1: /* DIGEST_TEXT */
+ case 0: /* SCHEMA_NAME */
+ case 1: /* DIGEST */
+ case 2: /* DIGEST_TEXT */
m_row.m_digest.set_field(f->field_index, f);
break;
- case 26: /* FIRST_SEEN */
+ case 27: /* FIRST_SEEN */
set_field_timestamp(f, m_row.m_first_seen);
break;
- case 27: /* LAST_SEEN */
+ case 28: /* LAST_SEEN */
set_field_timestamp(f, m_row.m_last_seen);
break;
- default: /* 1, ... COUNT/SUM/MIN/AVG/MAX */
- m_row.m_stat.set_field(f->field_index - 2, f);
+ default: /* 3, ... COUNT/SUM/MIN/AVG/MAX */
+ m_row.m_stat.set_field(f->field_index - 3, f);
break;
}
}
diff --git a/storage/perfschema/table_esms_by_thread_by_event_name.cc b/storage/perfschema/table_esms_by_thread_by_event_name.cc
index 5a7faca1b79..fccdf5dea60 100644
--- a/storage/perfschema/table_esms_by_thread_by_event_name.cc
+++ b/storage/perfschema/table_esms_by_thread_by_event_name.cc
@@ -33,7 +33,7 @@ static const TABLE_FIELD_TYPE field_types[]=
{
{
{ C_STRING_WITH_LEN("THREAD_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
},
{
@@ -308,7 +308,7 @@ int table_esms_by_thread_by_event_name
switch(f->field_index)
{
case 0: /* THREAD_ID */
- set_field_ulong(f, m_row.m_thread_internal_id);
+ set_field_ulonglong(f, m_row.m_thread_internal_id);
break;
case 1: /* EVENT_NAME */
m_row.m_event_name.set_field(f);
diff --git a/storage/perfschema/table_esms_by_thread_by_event_name.h b/storage/perfschema/table_esms_by_thread_by_event_name.h
index 2f36606a5e1..9fb9f7c58dc 100644
--- a/storage/perfschema/table_esms_by_thread_by_event_name.h
+++ b/storage/perfschema/table_esms_by_thread_by_event_name.h
@@ -39,7 +39,7 @@
struct row_esms_by_thread_by_event_name
{
/** Column THREAD_ID. */
- ulong m_thread_internal_id;
+ ulonglong m_thread_internal_id;
/** Column EVENT_NAME. */
PFS_event_name_row m_event_name;
/** Columns COUNT_STAR, SUM/MIN/AVG/MAX TIMER_WAIT. */
diff --git a/storage/perfschema/table_esms_global_by_event_name.cc b/storage/perfschema/table_esms_global_by_event_name.cc
index 22c87f09137..efcb5b6fa7c 100644
--- a/storage/perfschema/table_esms_global_by_event_name.cc
+++ b/storage/perfschema/table_esms_global_by_event_name.cc
@@ -190,6 +190,9 @@ int
table_esms_global_by_event_name::delete_all_rows(void)
{
reset_events_statements_by_thread();
+ reset_events_statements_by_account();
+ reset_events_statements_by_user();
+ reset_events_statements_by_host();
reset_events_statements_global();
return 0;
}
diff --git a/storage/perfschema/table_events_stages.cc b/storage/perfschema/table_events_stages.cc
index e438249fbd3..854e1be15cd 100644
--- a/storage/perfschema/table_events_stages.cc
+++ b/storage/perfschema/table_events_stages.cc
@@ -32,7 +32,7 @@ static const TABLE_FIELD_TYPE field_types[]=
{
{
{ C_STRING_WITH_LEN("THREAD_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
},
{
@@ -207,7 +207,7 @@ int table_events_stages_common::read_row_values(TABLE *table,
switch(f->field_index)
{
case 0: /* THREAD_ID */
- set_field_ulong(f, m_row.m_thread_internal_id);
+ set_field_ulonglong(f, m_row.m_thread_internal_id);
break;
case 1: /* EVENT_ID */
set_field_ulonglong(f, m_row.m_event_id);
diff --git a/storage/perfschema/table_events_stages.h b/storage/perfschema/table_events_stages.h
index 6bc712c15a5..09c555c80fd 100644
--- a/storage/perfschema/table_events_stages.h
+++ b/storage/perfschema/table_events_stages.h
@@ -36,7 +36,7 @@ struct PFS_thread;
struct row_events_stages
{
/** Column THREAD_ID. */
- ulong m_thread_internal_id;
+ ulonglong m_thread_internal_id;
/** Column EVENT_ID. */
ulonglong m_event_id;
/** Column END_EVENT_ID. */
diff --git a/storage/perfschema/table_events_statements.cc b/storage/perfschema/table_events_statements.cc
index d453b14470f..fb2b4b242d4 100644
--- a/storage/perfschema/table_events_statements.cc
+++ b/storage/perfschema/table_events_statements.cc
@@ -35,7 +35,7 @@ static const TABLE_FIELD_TYPE field_types[]=
{
{
{ C_STRING_WITH_LEN("THREAD_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
},
{
@@ -372,7 +372,7 @@ void table_events_statements_common::make_row(PFS_events_statements *statement)
PSI_digest_storage *digest= & statement->m_digest_storage;
if (digest->m_byte_count > 0)
{
- PFS_digest_hash md5;
+ PFS_digest_key md5;
compute_md5_hash((char *) md5.m_md5,
(char *) digest->m_token_array,
digest->m_byte_count);
@@ -420,7 +420,7 @@ int table_events_statements_common::read_row_values(TABLE *table,
switch(f->field_index)
{
case 0: /* THREAD_ID */
- set_field_ulong(f, m_row.m_thread_internal_id);
+ set_field_ulonglong(f, m_row.m_thread_internal_id);
break;
case 1: /* EVENT_ID */
set_field_ulonglong(f, m_row.m_event_id);
diff --git a/storage/perfschema/table_events_statements.h b/storage/perfschema/table_events_statements.h
index acd82de4fcf..dcc6611f555 100644
--- a/storage/perfschema/table_events_statements.h
+++ b/storage/perfschema/table_events_statements.h
@@ -37,7 +37,7 @@ struct PFS_thread;
struct row_events_statements
{
/** Column THREAD_ID. */
- ulong m_thread_internal_id;
+ ulonglong m_thread_internal_id;
/** Column EVENT_ID. */
ulonglong m_event_id;
/** Column END_EVENT_ID. */
diff --git a/storage/perfschema/table_events_waits.cc b/storage/perfschema/table_events_waits.cc
index d1c82e81f75..82d8ba2a0cc 100644
--- a/storage/perfschema/table_events_waits.cc
+++ b/storage/perfschema/table_events_waits.cc
@@ -34,7 +34,7 @@ static const TABLE_FIELD_TYPE field_types[]=
{
{
{ C_STRING_WITH_LEN("THREAD_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
},
{
@@ -239,7 +239,8 @@ int table_events_waits_common::make_table_object_columns(volatile PFS_events_wai
/* INDEX NAME */
safe_index= wait->m_index;
- if (safe_index < MAX_KEY && safe_index < safe_table_share->m_key_count)
+ uint safe_key_count= sanitize_index_count(safe_table_share->m_key_count);
+ if (safe_index < safe_key_count)
{
PFS_table_key *key= & safe_table_share->m_keys[safe_index];
m_row.m_index_name_length= key->m_name_length;
@@ -602,7 +603,7 @@ int table_events_waits_common::read_row_values(TABLE *table,
switch(f->field_index)
{
case 0: /* THREAD_ID */
- set_field_ulong(f, m_row.m_thread_internal_id);
+ set_field_ulonglong(f, m_row.m_thread_internal_id);
break;
case 1: /* EVENT_ID */
set_field_ulonglong(f, m_row.m_event_id);
diff --git a/storage/perfschema/table_events_waits.h b/storage/perfschema/table_events_waits.h
index 72065c765ca..065bf95e5a6 100644
--- a/storage/perfschema/table_events_waits.h
+++ b/storage/perfschema/table_events_waits.h
@@ -36,7 +36,7 @@ struct PFS_thread;
struct row_events_waits
{
/** Column THREAD_ID. */
- ulong m_thread_internal_id;
+ ulonglong m_thread_internal_id;
/** Column EVENT_ID. */
ulonglong m_event_id;
/** Column END_EVENT_ID. */
diff --git a/storage/perfschema/table_events_waits_summary.cc b/storage/perfschema/table_events_waits_summary.cc
index 2a144a07344..f437e83f3ff 100644
--- a/storage/perfschema/table_events_waits_summary.cc
+++ b/storage/perfschema/table_events_waits_summary.cc
@@ -139,7 +139,7 @@ void table_events_waits_summary_by_instance::make_mutex_row(PFS_mutex *pfs)
if (unlikely(safe_class == NULL))
return;
- make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_wait_stat);
+ make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_mutex_stat.m_wait_stat);
}
/**
@@ -153,7 +153,7 @@ void table_events_waits_summary_by_instance::make_rwlock_row(PFS_rwlock *pfs)
if (unlikely(safe_class == NULL))
return;
- make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_wait_stat);
+ make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_rwlock_stat.m_wait_stat);
}
/**
@@ -167,7 +167,7 @@ void table_events_waits_summary_by_instance::make_cond_row(PFS_cond *pfs)
if (unlikely(safe_class == NULL))
return;
- make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_wait_stat);
+ make_instr_row(pfs, safe_class, pfs->m_identity, &pfs->m_cond_stat.m_wait_stat);
}
/**
@@ -181,11 +181,13 @@ void table_events_waits_summary_by_instance::make_file_row(PFS_file *pfs)
if (unlikely(safe_class == NULL))
return;
+ PFS_single_stat sum;
+ pfs->m_file_stat.m_io_stat.sum_waits(& sum);
/*
Files don't have a in memory structure associated to it,
so we use the address of the PFS_file buffer as object_instance_begin
*/
- make_instr_row(pfs, safe_class, pfs, &pfs->m_wait_stat);
+ make_instr_row(pfs, safe_class, pfs, & sum);
}
/**
diff --git a/storage/perfschema/table_ews_by_thread_by_event_name.cc b/storage/perfschema/table_ews_by_thread_by_event_name.cc
index 25e3cf395c4..4db97b1c98c 100644
--- a/storage/perfschema/table_ews_by_thread_by_event_name.cc
+++ b/storage/perfschema/table_ews_by_thread_by_event_name.cc
@@ -33,7 +33,7 @@ static const TABLE_FIELD_TYPE field_types[]=
{
{
{ C_STRING_WITH_LEN("THREAD_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
},
{
@@ -282,7 +282,7 @@ int table_ews_by_thread_by_event_name
switch(f->field_index)
{
case 0: /* THREAD_ID */
- set_field_ulong(f, m_row.m_thread_internal_id);
+ set_field_ulonglong(f, m_row.m_thread_internal_id);
break;
case 1: /* EVENT_NAME */
m_row.m_event_name.set_field(f);
diff --git a/storage/perfschema/table_ews_by_thread_by_event_name.h b/storage/perfschema/table_ews_by_thread_by_event_name.h
index b0710bb8a57..989356be646 100644
--- a/storage/perfschema/table_ews_by_thread_by_event_name.h
+++ b/storage/perfschema/table_ews_by_thread_by_event_name.h
@@ -39,7 +39,7 @@
struct row_ews_by_thread_by_event_name
{
/** Column THREAD_ID. */
- ulong m_thread_internal_id;
+ ulonglong m_thread_internal_id;
/** Column EVENT_NAME. */
PFS_event_name_row m_event_name;
/** Columns COUNT_STAR, SUM/MIN/AVG/MAX TIMER_WAIT. */
diff --git a/storage/perfschema/table_ews_global_by_event_name.cc b/storage/perfschema/table_ews_global_by_event_name.cc
index c71a1ed479e..1e165c36bc5 100644
--- a/storage/perfschema/table_ews_global_by_event_name.cc
+++ b/storage/perfschema/table_ews_global_by_event_name.cc
@@ -97,7 +97,7 @@ table_ews_global_by_event_name::delete_all_rows(void)
reset_events_waits_by_instance();
reset_table_waits_by_table_handle();
reset_table_waits_by_table();
- reset_events_waits_global();
+ reset_events_waits_by_class();
return 0;
}
@@ -121,9 +121,6 @@ int table_ews_global_by_event_name::rnd_next(void)
PFS_socket_class *socket_class;
PFS_instr_class *instr_class;
- if (global_instr_class_waits_array == NULL)
- return HA_ERR_END_OF_FILE;
-
for (m_pos.set_at(&m_next_pos);
m_pos.has_more_view();
m_pos.next_view())
@@ -218,9 +215,6 @@ table_ews_global_by_event_name::rnd_pos(const void *pos)
set_position(pos);
- if (global_instr_class_waits_array == NULL)
- return HA_ERR_END_OF_FILE;
-
switch (m_pos.m_index_1)
{
case pos_ews_global_by_event_name::VIEW_MUTEX:
diff --git a/storage/perfschema/table_helper.cc b/storage/perfschema/table_helper.cc
index d3954179539..9f803434ab6 100644
--- a/storage/perfschema/table_helper.cc
+++ b/storage/perfschema/table_helper.cc
@@ -110,26 +110,30 @@ int PFS_digest_row::make_row(PFS_statements_digest_stat* pfs)
*/
if (pfs->m_digest_storage.m_byte_count != 0)
{
+ m_schema_name_length= pfs->m_digest_key.m_schema_name_length;
+ if (m_schema_name_length > 0)
+ memcpy(m_schema_name, pfs->m_digest_key.m_schema_name, m_schema_name_length);
/*
Calculate digest from MD5 HASH collected to be shown as
DIGEST in this row.
*/
- MD5_HASH_TO_STRING(pfs->m_digest_hash.m_md5, m_digest);
+ MD5_HASH_TO_STRING(pfs->m_digest_key.m_md5, m_digest);
m_digest_length= MD5_HASH_TO_STRING_LENGTH;
- /*
- Caclulate digest_text information from the token array collected
+ /*
+ Calculate digest_text information from the token array collected
to be shown as DIGEST_TEXT column.
- */
+ */
get_digest_text(m_digest_text, &pfs->m_digest_storage);
m_digest_text_length= strlen(m_digest_text);
}
else
{
+ m_schema_name_length= 0;
m_digest_length= 0;
m_digest_text_length= 0;
}
-
+
return 0;
}
@@ -137,14 +141,21 @@ void PFS_digest_row::set_field(uint index, Field *f)
{
switch (index)
{
- case 0: /* DIGEST */
+ case 0: /* SCHEMA_NAME */
+ if (m_schema_name_length > 0)
+ PFS_engine_table::set_field_varchar_utf8(f, m_schema_name,
+ m_schema_name_length);
+ else
+ f->set_null();
+ break;
+ case 1: /* DIGEST */
if (m_digest_length > 0)
PFS_engine_table::set_field_varchar_utf8(f, m_digest,
m_digest_length);
else
f->set_null();
break;
- case 1: /* DIGEST_TEXT */
+ case 2: /* DIGEST_TEXT */
if (m_digest_text_length > 0)
PFS_engine_table::set_field_longtext_utf8(f, m_digest_text,
m_digest_text_length);
@@ -199,7 +210,7 @@ int PFS_index_row::make_row(PFS_table_share *pfs, uint table_index)
if (m_object_row.make_row(pfs))
return 1;
- if (table_index < MAX_KEY)
+ if (table_index < MAX_INDEXES)
{
PFS_table_key *key= &pfs->m_keys[table_index];
m_index_name_length= key->m_name_length;
diff --git a/storage/perfschema/table_helper.h b/storage/perfschema/table_helper.h
index 798ff16f4e5..769122570eb 100644
--- a/storage/perfschema/table_helper.h
+++ b/storage/perfschema/table_helper.h
@@ -127,6 +127,10 @@ struct PFS_account_row
/** Row fragment for columns DIGEST, DIGEST_TEXT. */
struct PFS_digest_row
{
+ /** Column SCHEMA_NAME. */
+ char m_schema_name[NAME_LEN];
+ /** Length in bytes of @c m_schema_name. */
+ uint m_schema_name_length;
/** Column DIGEST. */
char m_digest[COL_DIGEST_SIZE];
/** Length in bytes of @c m_digest. */
diff --git a/storage/perfschema/table_host_cache.cc b/storage/perfschema/table_host_cache.cc
index d243204ddcd..9c44a1fe235 100644
--- a/storage/perfschema/table_host_cache.cc
+++ b/storage/perfschema/table_host_cache.cc
@@ -266,9 +266,6 @@ void table_host_cache::materialize(THD *thd)
index++;
row++;
current= current->next();
- /* Host cache is a circular linked list. */
- if (current == first)
- break;
}
m_all_rows= rows;
diff --git a/storage/perfschema/table_os_global_by_type.cc b/storage/perfschema/table_os_global_by_type.cc
index 82d176cd5b2..70d9d6819ac 100644
--- a/storage/perfschema/table_os_global_by_type.cc
+++ b/storage/perfschema/table_os_global_by_type.cc
@@ -174,6 +174,7 @@ void table_os_global_by_type::make_row(PFS_table_share *share)
{
pfs_lock lock;
PFS_single_stat cumulated_stat;
+ uint safe_key_count;
m_row_exists= false;
@@ -184,7 +185,11 @@ void table_os_global_by_type::make_row(PFS_table_share *share)
m_row.m_schema_name_length= share->m_schema_name_length;
memcpy(m_row.m_object_name, share->m_table_name, share->m_table_name_length);
m_row.m_object_name_length= share->m_table_name_length;
- share->m_table_stat.sum(& cumulated_stat);
+
+ /* This is a dirty read, some thread can write data while we are reading it */
+ safe_key_count= sanitize_index_count(share->m_key_count);
+
+ share->m_table_stat.sum(& cumulated_stat, safe_key_count);
if (! share->m_lock.end_optimistic_lock(&lock))
return;
@@ -204,7 +209,7 @@ void table_os_global_by_type::make_row(PFS_table_share *share)
If the opened table handle is for this table share,
aggregate the table handle statistics.
*/
- table->m_table_stat.sum(& cumulated_stat);
+ table->m_table_stat.sum(& cumulated_stat, safe_key_count);
}
}
}
diff --git a/storage/perfschema/table_session_account_connect_attrs.cc b/storage/perfschema/table_session_account_connect_attrs.cc
new file mode 100644
index 00000000000..4a3fcc22341
--- /dev/null
+++ b/storage/perfschema/table_session_account_connect_attrs.cc
@@ -0,0 +1,70 @@
+/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#include "table_session_account_connect_attrs.h"
+
+THR_LOCK table_session_account_connect_attrs::m_table_lock;
+
+PFS_engine_table_share
+table_session_account_connect_attrs::m_share=
+{
+ { C_STRING_WITH_LEN("session_account_connect_attrs") },
+ &pfs_readonly_acl,
+ &table_session_account_connect_attrs::create,
+ NULL, /* write_row */
+ NULL, /* delete_all_rows */
+ NULL, /* get_row_count */
+ 1000, /* records */
+ sizeof(pos_connect_attr_by_thread_by_attr), /* ref length */
+ &m_table_lock,
+ &m_field_def,
+ false /* checked */
+};
+
+PFS_engine_table* table_session_account_connect_attrs::create()
+{
+ return new table_session_account_connect_attrs();
+}
+
+table_session_account_connect_attrs::table_session_account_connect_attrs()
+ : table_session_connect(&m_share)
+{}
+
+bool
+table_session_account_connect_attrs::thread_fits(PFS_thread *thread)
+{
+ PFS_thread *current_thread= PFS_thread::get_current_thread();
+ /* The current thread may not have instrumentation attached. */
+ if (current_thread == NULL)
+ return false;
+
+ /* The thread we compare to, by definition, has some instrumentation. */
+ DBUG_ASSERT(thread != NULL);
+
+ uint username_length= current_thread->m_username_length;
+ uint hostname_length= current_thread->m_hostname_length;
+
+ if ( (thread->m_username_length != username_length)
+ || (thread->m_hostname_length != hostname_length))
+ return false;
+
+ if (memcmp(thread->m_username, current_thread->m_username, username_length) != 0)
+ return false;
+
+ if (memcmp(thread->m_hostname, current_thread->m_hostname, hostname_length) != 0)
+ return false;
+
+ return true;
+}
diff --git a/storage/perfschema/table_session_account_connect_attrs.h b/storage/perfschema/table_session_account_connect_attrs.h
new file mode 100644
index 00000000000..ba8893e7cad
--- /dev/null
+++ b/storage/perfschema/table_session_account_connect_attrs.h
@@ -0,0 +1,50 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#ifndef TABLE_SESSION_ACCOUNT_CONNECT_ATTRS_H
+#define TABLE_SESSION_ACCOUNT_CONNECT_ATTRS_H
+
+#include "table_session_connect.h"
+/**
+ \addtogroup Performance_schema_tables
+ @{
+*/
+
+/** Table PERFORMANCE_SCHEMA.SESSION_ACCOUNT_CONNECT_ATTRS. */
+class table_session_account_connect_attrs : public table_session_connect
+{
+public:
+ /** Table share */
+ static PFS_engine_table_share m_share;
+ /** Table builder */
+ static PFS_engine_table* create();
+
+protected:
+ table_session_account_connect_attrs();
+
+public:
+ ~table_session_account_connect_attrs()
+ {}
+
+protected:
+ virtual bool thread_fits(PFS_thread *thread);
+
+private:
+ /** Table share lock. */
+ static THR_LOCK m_table_lock;
+};
+
+/** @} */
+#endif
diff --git a/storage/perfschema/table_session_connect.cc b/storage/perfschema/table_session_connect.cc
new file mode 100644
index 00000000000..bd905b5756c
--- /dev/null
+++ b/storage/perfschema/table_session_connect.cc
@@ -0,0 +1,268 @@
+/* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#include "table_session_connect.h"
+
+static const TABLE_FIELD_TYPE field_types[]=
+{
+ {
+ { C_STRING_WITH_LEN("PROCESSLIST_ID") },
+ { C_STRING_WITH_LEN("int(11)") },
+ { NULL, 0}
+ },
+ {
+ { C_STRING_WITH_LEN("ATTR_NAME") },
+ { C_STRING_WITH_LEN("varchar(32)") },
+ { NULL, 0}
+ },
+ {
+ { C_STRING_WITH_LEN("ATTR_VALUE") },
+ { C_STRING_WITH_LEN("varchar(1024)") },
+ { NULL, 0}
+ },
+ {
+ { C_STRING_WITH_LEN("ORDINAL_POSITION") },
+ { C_STRING_WITH_LEN("int(11)") },
+ { NULL, 0}
+ }
+};
+
+TABLE_FIELD_DEF table_session_connect::m_field_def=
+{ 4, field_types };
+
+table_session_connect::table_session_connect(const PFS_engine_table_share *share) :
+ cursor_by_thread_connect_attr(share)
+{}
+
+/**
+ Take a length encoded string
+
+ @arg ptr inout the input string array
+ @arg dest where to store the result
+ @arg dest_size max size of @c dest
+ @arg copied_len the actual length of the data copied
+ @arg start_ptr pointer to the start of input
+ @arg input_length the length of the incoming data
+ @arg copy_data copy the data or just skip the input
+ @arg from_cs character set in which @c ptr is encoded
+ @arg nchars_max maximum number of characters to read
+ @return status
+ @retval true parsing failed
+ @retval false parsing succeeded
+*/
+bool parse_length_encoded_string(const char **ptr,
+ char *dest, uint dest_size,
+ uint *copied_len,
+ const char *start_ptr, uint input_length,
+ bool copy_data,
+ const CHARSET_INFO *from_cs,
+ uint nchars_max)
+{
+ ulong copy_length, data_length;
+ const char *well_formed_error_pos= NULL, *cannot_convert_error_pos= NULL,
+ *from_end_pos= NULL;
+
+ copy_length= data_length= net_field_length((uchar **) ptr);
+
+ /* we don't tolerate NULL as a length */
+ if (data_length == NULL_LENGTH)
+ return true;
+
+ if (*ptr - start_ptr + data_length > input_length)
+ return true;
+
+ copy_length= well_formed_copy_nchars(&my_charset_utf8_bin, dest, dest_size,
+ from_cs, *ptr, data_length, nchars_max,
+ &well_formed_error_pos,
+ &cannot_convert_error_pos,
+ &from_end_pos);
+ *copied_len= copy_length;
+ (*ptr)+= data_length;
+
+ return false;
+}
+
+/**
+ Take the nth attribute name/value pair
+
+ Parse the attributes blob form the beginning, skipping the attributes
+ whose number is lower than the one we seek.
+ When we reach the attribute at an index we're looking for the values
+ are copied to the output parameters.
+ If parsing fails or no more attributes are found the function stops
+ and returns an error code.
+
+ @arg connect_attrs pointer to the connect attributes blob
+ @arg connect_attrs_length length of @c connect_attrs
+ @arg connect_attrs_cs character set used to encode @c connect_attrs
+ @arg ordinal index of the attribute we need
+ @arg attr_name [out] buffer to receive the attribute name
+ @arg max_attr_name max size of @c attr_name in bytes
+ @arg attr_name_length [out] number of bytes written in @attr_name
+ @arg attr_value [out] buffer to receive the attribute name
+ @arg max_attr_value max size of @c attr_value in bytes
+ @arg attr_value_length [out] number of bytes written in @attr_value
+ @return status
+ @retval true requested attribute pair is found and copied
+ @retval false error. Either because of parsing or too few attributes.
+*/
+bool read_nth_attr(const char *connect_attrs,
+ uint connect_attrs_length,
+ const CHARSET_INFO *connect_attrs_cs,
+ uint ordinal,
+ char *attr_name, uint max_attr_name,
+ uint *attr_name_length,
+ char *attr_value, uint max_attr_value,
+ uint *attr_value_length)
+{
+ uint idx;
+ const char *ptr;
+
+ for (ptr= connect_attrs, idx= 0;
+ (uint)(ptr - connect_attrs) < connect_attrs_length && idx <= ordinal;
+ idx++)
+ {
+ uint copy_length;
+ /* do the copying only if we absolutely have to */
+ bool fill_in_attr_name= idx == ordinal;
+ bool fill_in_attr_value= idx == ordinal;
+
+ /* read the key */
+ if (parse_length_encoded_string(&ptr,
+ attr_name, max_attr_name, &copy_length,
+ connect_attrs,
+ connect_attrs_length,
+ fill_in_attr_name,
+ connect_attrs_cs, 32) ||
+ !copy_length
+ )
+ return false;
+
+ if (idx == ordinal)
+ *attr_name_length= copy_length;
+
+ /* read the value */
+ if (parse_length_encoded_string(&ptr,
+ attr_value, max_attr_value, &copy_length,
+ connect_attrs,
+ connect_attrs_length,
+ fill_in_attr_value,
+ connect_attrs_cs, 1024))
+ return false;
+
+ if (idx == ordinal)
+ *attr_value_length= copy_length;
+
+ if (idx == ordinal)
+ return true;
+ }
+
+ return false;
+}
+
+void table_session_connect::make_row(PFS_thread *pfs, uint ordinal)
+{
+ pfs_lock lock;
+ PFS_thread_class *safe_class;
+
+ m_row_exists= false;
+
+ /* Protect this reader against thread termination */
+ pfs->m_lock.begin_optimistic_lock(&lock);
+ safe_class= sanitize_thread_class(pfs->m_class);
+ if (unlikely(safe_class == NULL))
+ return;
+
+ /* Filtering threads must be done under the protection of the optimistic lock. */
+ if (! thread_fits(pfs))
+ return;
+
+ /* populate the row */
+ if (read_nth_attr(pfs->m_session_connect_attrs,
+ pfs->m_session_connect_attrs_length,
+ pfs->m_session_connect_attrs_cs,
+ ordinal,
+ m_row.m_attr_name, (uint) sizeof(m_row.m_attr_name),
+ &m_row.m_attr_name_length,
+ m_row.m_attr_value, (uint) sizeof(m_row.m_attr_value),
+ &m_row.m_attr_value_length))
+ {
+ /* we don't expect internal threads to have connection attributes */
+ DBUG_ASSERT(pfs->m_processlist_id != 0);
+
+ m_row.m_ordinal_position= ordinal;
+ m_row.m_process_id= pfs->m_processlist_id;
+ }
+ else
+ return;
+
+ if (pfs->m_lock.end_optimistic_lock(& lock))
+ m_row_exists= true;
+}
+
+int table_session_connect::read_row_values(TABLE *table,
+ unsigned char *buf,
+ Field **fields,
+ bool read_all)
+{
+ Field *f;
+
+ if (unlikely(!m_row_exists))
+ return HA_ERR_RECORD_DELETED;
+
+ /* Set the null bits */
+ DBUG_ASSERT(table->s->null_bytes == 1);
+ buf[0]= 0;
+
+ for (; (f= *fields) ; fields++)
+ {
+ if (read_all || bitmap_is_set(table->read_set, f->field_index))
+ {
+ switch(f->field_index)
+ {
+ case FO_PROCESS_ID:
+ if (m_row.m_process_id != 0)
+ set_field_ulong(f, m_row.m_process_id);
+ else
+ f->set_null();
+ break;
+ case FO_ATTR_NAME:
+ set_field_varchar_utf8(f, m_row.m_attr_name,
+ m_row.m_attr_name_length);
+ break;
+ case FO_ATTR_VALUE:
+ if (m_row.m_attr_value_length)
+ set_field_varchar_utf8(f, m_row.m_attr_value,
+ m_row.m_attr_value_length);
+ else
+ f->set_null();
+ break;
+ case FO_ORDINAL_POSITION:
+ set_field_ulong(f, m_row.m_ordinal_position);
+ break;
+ default:
+ DBUG_ASSERT(false);
+ }
+ }
+ }
+ return 0;
+}
+
+bool
+table_session_connect::thread_fits(PFS_thread *thread)
+{
+ return true;
+}
+
diff --git a/storage/perfschema/table_session_connect.h b/storage/perfschema/table_session_connect.h
new file mode 100644
index 00000000000..097623d2c80
--- /dev/null
+++ b/storage/perfschema/table_session_connect.h
@@ -0,0 +1,77 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#ifndef TABLE_SESSION_CONNECT_H
+#define TABLE_SESSION_CONNECT_H
+
+#include "pfs_column_types.h"
+#include "cursor_by_thread_connect_attr.h"
+#include "table_helper.h"
+
+#define MAX_ATTR_NAME_CHARS 32
+#define MAX_ATTR_VALUE_CHARS 1024
+#define MAX_UTF8_BYTES 6
+
+/** symbolic names for field offsets, keep in sync with field_types */
+enum field_offsets {
+ FO_PROCESS_ID,
+ FO_ATTR_NAME,
+ FO_ATTR_VALUE,
+ FO_ORDINAL_POSITION
+};
+
+/**
+ A row of PERFORMANCE_SCHEMA.SESSION_CONNECT_ATTRS and
+ PERFORMANCE_SCHEMA.SESSION_ACCOUNT_CONNECT_ATTRS.
+*/
+struct row_session_connect_attrs
+{
+ /** Column PROCESS_ID. */
+ ulong m_process_id;
+ /** Column ATTR_NAME. In UTF-8 */
+ char m_attr_name[MAX_ATTR_NAME_CHARS * MAX_UTF8_BYTES];
+ /** Length in bytes of @c m_attr_name. */
+ uint m_attr_name_length;
+ /** Column ATTR_VALUE. In UTF-8 */
+ char m_attr_value[MAX_ATTR_VALUE_CHARS * MAX_UTF8_BYTES];
+ /** Length in bytes of @c m_attr_name. */
+ uint m_attr_value_length;
+ /** Column ORDINAL_POSITION. */
+ ulong m_ordinal_position;
+};
+
+class table_session_connect : public cursor_by_thread_connect_attr
+{
+protected:
+ table_session_connect(const PFS_engine_table_share *share);
+
+public:
+ ~table_session_connect()
+ {}
+
+protected:
+ virtual void make_row(PFS_thread *pfs, uint ordinal);
+ virtual bool thread_fits(PFS_thread *thread);
+ virtual int read_row_values(TABLE *table, unsigned char *buf,
+ Field **fields, bool read_all);
+protected:
+ /** Fields definition. */
+ static TABLE_FIELD_DEF m_field_def;
+ /** Current row. */
+ row_session_connect_attrs m_row;
+};
+
+/** @} */
+#endif
diff --git a/storage/perfschema/table_session_connect_attrs.cc b/storage/perfschema/table_session_connect_attrs.cc
new file mode 100644
index 00000000000..9e1804b7294
--- /dev/null
+++ b/storage/perfschema/table_session_connect_attrs.cc
@@ -0,0 +1,43 @@
+/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#include "table_session_connect_attrs.h"
+
+THR_LOCK table_session_connect_attrs::m_table_lock;
+
+PFS_engine_table_share
+table_session_connect_attrs::m_share=
+{
+ { C_STRING_WITH_LEN("session_connect_attrs") },
+ &pfs_readonly_acl,
+ &table_session_connect_attrs::create,
+ NULL, /* write_row */
+ NULL, /* delete_all_rows */
+ NULL, /* get_row_count */
+ 1000, /* records */
+ sizeof(pos_connect_attr_by_thread_by_attr), /* ref length */
+ &m_table_lock,
+ &m_field_def,
+ false /* checked */
+};
+
+PFS_engine_table* table_session_connect_attrs::create()
+{
+ return new table_session_connect_attrs();
+}
+
+table_session_connect_attrs::table_session_connect_attrs()
+ : table_session_connect(&m_share)
+{}
diff --git a/storage/perfschema/table_session_connect_attrs.h b/storage/perfschema/table_session_connect_attrs.h
new file mode 100644
index 00000000000..b10b106ba0d
--- /dev/null
+++ b/storage/perfschema/table_session_connect_attrs.h
@@ -0,0 +1,47 @@
+/* Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#ifndef TABLE_SESSION_CONNECT_ATTRS_H
+#define TABLE_SESSION_CONNECT_ATTRS_H
+
+#include "table_session_connect.h"
+/**
+ \addtogroup Performance_schema_tables
+ @{
+*/
+
+/** Table PERFORMANCE_SCHEMA.SESSION_CONNECT_ATTRS. */
+class table_session_connect_attrs : public table_session_connect
+{
+public:
+ /** Table share */
+ static PFS_engine_table_share m_share;
+ /** Table builder */
+ static PFS_engine_table* create();
+
+protected:
+ table_session_connect_attrs();
+
+public:
+ ~table_session_connect_attrs()
+ {}
+
+private:
+ /** Table share lock. */
+ static THR_LOCK m_table_lock;
+};
+
+/** @} */
+#endif
diff --git a/storage/perfschema/table_setup_actors.cc b/storage/perfschema/table_setup_actors.cc
index 15d3d9d22a8..91dbb942ead 100644
--- a/storage/perfschema/table_setup_actors.cc
+++ b/storage/perfschema/table_setup_actors.cc
@@ -105,6 +105,9 @@ int table_setup_actors::write_row(TABLE *table, unsigned char *buf,
}
}
+ if (user->length() == 0 || host->length() == 0 || role->length() == 0)
+ return HA_ERR_WRONG_COMMAND;
+
return insert_setup_actor(user, host, role);
}
@@ -264,39 +267,13 @@ int table_setup_actors::delete_row_values(TABLE *table,
const unsigned char *buf,
Field **fields)
{
- Field *f;
- String user_data("", 0, &my_charset_utf8_bin);
- String host_data("", 0, &my_charset_utf8_bin);
- String role_data("", 0, &my_charset_utf8_bin);
- String *user= NULL;
- String *host= NULL;
- String *role= NULL;
-
- for (; (f= *fields) ; fields++)
- {
- if (bitmap_is_set(table->read_set, f->field_index))
- {
- switch(f->field_index)
- {
- case 0: /* HOST */
- host= get_field_char_utf8(f, &host_data);
- break;
- case 1: /* USER */
- user= get_field_char_utf8(f, &user_data);
- break;
- case 2: /* ROLE */
- role= get_field_char_utf8(f, &role_data);
- break;
- default:
- DBUG_ASSERT(false);
- }
- }
- }
+ DBUG_ASSERT(m_row_exists);
- DBUG_ASSERT(user != NULL);
- DBUG_ASSERT(host != NULL);
- DBUG_ASSERT(role != NULL);
+ CHARSET_INFO *cs= &my_charset_utf8_bin;
+ String user(m_row.m_username, m_row.m_username_length, cs);
+ String role(m_row.m_rolename, m_row.m_rolename_length, cs);
+ String host(m_row.m_hostname, m_row.m_hostname_length, cs);
- return delete_setup_actor(user, host, role);
+ return delete_setup_actor(&user, &host, &role);
}
diff --git a/storage/perfschema/table_setup_objects.cc b/storage/perfschema/table_setup_objects.cc
index 33e360e989b..11fab913ac4 100644
--- a/storage/perfschema/table_setup_objects.cc
+++ b/storage/perfschema/table_setup_objects.cc
@@ -339,42 +339,15 @@ int table_setup_objects::delete_row_values(TABLE *table,
const unsigned char *buf,
Field **fields)
{
- int result;
- Field *f;
- enum_object_type object_type= OBJECT_TYPE_TABLE;
- String object_schema_data("", 0, &my_charset_utf8_bin);
- String object_name_data("", 0, &my_charset_utf8_bin);
- String *object_schema= NULL;
- String *object_name= NULL;
+ DBUG_ASSERT(m_row_exists);
- for (; (f= *fields) ; fields++)
- {
- if (bitmap_is_set(table->read_set, f->field_index))
- {
- switch(f->field_index)
- {
- case 0: /* OBJECT_TYPE */
- object_type= (enum_object_type) get_field_enum(f);
- break;
- case 1: /* OBJECT_SCHEMA */
- object_schema= get_field_varchar_utf8(f, &object_schema_data);
- break;
- case 2: /* OBJECT_NAME */
- object_name= get_field_varchar_utf8(f, &object_name_data);
- break;
- case 3: /* ENABLED */
- case 4: /* TIMED */
- break;
- default:
- DBUG_ASSERT(false);
- }
- }
- }
+ CHARSET_INFO *cs= &my_charset_utf8_bin;
+ enum_object_type object_type= OBJECT_TYPE_TABLE;
+ String object_schema(m_row.m_schema_name, m_row.m_schema_name_length, cs);
+ String object_name(m_row.m_object_name, m_row.m_object_name_length, cs);
- DBUG_ASSERT(object_schema != NULL);
- DBUG_ASSERT(object_name != NULL);
+ int result= delete_setup_object(object_type, &object_schema, &object_name);
- result= delete_setup_object(object_type, object_schema, object_name);
if (result == 0)
result= update_derived_flags();
return result;
diff --git a/storage/perfschema/table_socket_instances.cc b/storage/perfschema/table_socket_instances.cc
index f913c8fcc65..0fa1d2b1a3a 100644
--- a/storage/perfschema/table_socket_instances.cc
+++ b/storage/perfschema/table_socket_instances.cc
@@ -42,7 +42,7 @@ static const TABLE_FIELD_TYPE field_types[]=
},
{
{ C_STRING_WITH_LEN("THREAD_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
},
{
@@ -205,7 +205,7 @@ int table_socket_instances::read_row_values(TABLE *table,
break;
case 2: /* THREAD_ID */
if (m_row.m_thread_id_set)
- set_field_ulong(f, m_row.m_thread_id);
+ set_field_ulonglong(f, m_row.m_thread_id);
else
f->set_null();
break;
diff --git a/storage/perfschema/table_socket_instances.h b/storage/perfschema/table_socket_instances.h
index 2a80aeaa76a..080f11c1ba8 100644
--- a/storage/perfschema/table_socket_instances.h
+++ b/storage/perfschema/table_socket_instances.h
@@ -39,7 +39,7 @@ struct row_socket_instances
/** Column OBJECT_INSTANCE_BEGIN */
const void *m_identity;
/** Column THREAD_ID */
- uint m_thread_id;
+ ulonglong m_thread_id;
/** True if thread_is is set */
bool m_thread_id_set;
/** Column SOCKET_ID */
diff --git a/storage/perfschema/table_sync_instances.cc b/storage/perfschema/table_sync_instances.cc
index 9631c5fb205..4d7c48efdc1 100644
--- a/storage/perfschema/table_sync_instances.cc
+++ b/storage/perfschema/table_sync_instances.cc
@@ -43,7 +43,7 @@ static const TABLE_FIELD_TYPE mutex_field_types[]=
},
{
{ C_STRING_WITH_LEN("LOCKED_BY_THREAD_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
}
};
@@ -178,7 +178,7 @@ int table_mutex_instances::read_row_values(TABLE *table,
break;
case 2: /* LOCKED_BY_THREAD_ID */
if (m_row.m_locked)
- set_field_ulong(f, m_row.m_locked_by_thread_id);
+ set_field_ulonglong(f, m_row.m_locked_by_thread_id);
else
f->set_null();
break;
@@ -207,7 +207,7 @@ static const TABLE_FIELD_TYPE rwlock_field_types[]=
},
{
{ C_STRING_WITH_LEN("WRITE_LOCKED_BY_THREAD_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
},
{
@@ -351,7 +351,7 @@ int table_rwlock_instances::read_row_values(TABLE *table,
break;
case 2: /* WRITE_LOCKED_BY_THREAD_ID */
if (m_row.m_write_locked)
- set_field_ulong(f, m_row.m_write_locked_by_thread_id);
+ set_field_ulonglong(f, m_row.m_write_locked_by_thread_id);
else
f->set_null();
break;
diff --git a/storage/perfschema/table_sync_instances.h b/storage/perfschema/table_sync_instances.h
index b6fc78e1cd5..ff7b2765a11 100644
--- a/storage/perfschema/table_sync_instances.h
+++ b/storage/perfschema/table_sync_instances.h
@@ -45,7 +45,7 @@ struct row_mutex_instances
/** True if column LOCKED_BY_THREAD_ID is not null. */
bool m_locked;
/** Column LOCKED_BY_THREAD_ID. */
- ulong m_locked_by_thread_id;
+ ulonglong m_locked_by_thread_id;
};
/** Table PERFORMANCE_SCHEMA.MUTEX_INSTANCES. */
@@ -102,7 +102,7 @@ struct row_rwlock_instances
/** True if column WRITE_LOCKED_BY_THREAD_ID is not null. */
bool m_write_locked;
/** Column WRITE_LOCKED_BY_THREAD_ID. */
- ulong m_write_locked_by_thread_id;
+ ulonglong m_write_locked_by_thread_id;
/** Column READ_LOCKED_BY_COUNT. */
ulong m_readers;
};
diff --git a/storage/perfschema/table_threads.cc b/storage/perfschema/table_threads.cc
index 91300d6b67e..b1ec2ad754e 100644
--- a/storage/perfschema/table_threads.cc
+++ b/storage/perfschema/table_threads.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+/* Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -26,7 +26,7 @@ static const TABLE_FIELD_TYPE field_types[]=
{
{
{ C_STRING_WITH_LEN("THREAD_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
},
{
@@ -41,7 +41,7 @@ static const TABLE_FIELD_TYPE field_types[]=
},
{
{ C_STRING_WITH_LEN("PROCESSLIST_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
},
{
@@ -81,7 +81,7 @@ static const TABLE_FIELD_TYPE field_types[]=
},
{
{ C_STRING_WITH_LEN("PARENT_THREAD_ID") },
- { C_STRING_WITH_LEN("int(11)") },
+ { C_STRING_WITH_LEN("bigint(20)") },
{ NULL, 0}
},
{
@@ -129,6 +129,7 @@ table_threads::table_threads()
void table_threads::make_row(PFS_thread *pfs)
{
pfs_lock lock;
+ pfs_lock processlist_lock;
PFS_thread_class *safe_class;
m_row_exists= false;
@@ -142,7 +143,7 @@ void table_threads::make_row(PFS_thread *pfs)
m_row.m_thread_internal_id= pfs->m_thread_internal_id;
m_row.m_parent_thread_internal_id= pfs->m_parent_thread_internal_id;
- m_row.m_thread_id= pfs->m_thread_id;
+ m_row.m_processlist_id= pfs->m_processlist_id;
m_row.m_name= safe_class->m_name;
m_row.m_name_length= safe_class->m_name_length;
@@ -166,12 +167,30 @@ void table_threads::make_row(PFS_thread *pfs)
m_row.m_command= pfs->m_command;
m_row.m_start_time= pfs->m_start_time;
+
+ /* Protect this reader against attribute changes. */
+ pfs->m_processlist_lock.begin_optimistic_lock(&processlist_lock);
+
/* FIXME: need to copy it ? */
m_row.m_processlist_state_ptr= pfs->m_processlist_state_ptr;
m_row.m_processlist_state_length= pfs->m_processlist_state_length;
/* FIXME: need to copy it ? */
m_row.m_processlist_info_ptr= pfs->m_processlist_info_ptr;
m_row.m_processlist_info_length= pfs->m_processlist_info_length;
+
+ if (! pfs->m_processlist_lock.end_optimistic_lock(& processlist_lock))
+ {
+ /*
+ Columns PROCESSLIST_STATE or PROCESSLIST_INFO are being
+ updated while we read them, and are unsafe to use.
+ Do not discard the entire row.
+ Do not loop waiting for a stable value.
+ Just return NULL values for these columns.
+ */
+ m_row.m_processlist_state_length= 0;
+ m_row.m_processlist_info_length= 0;
+ }
+
m_row.m_enabled_ptr= &pfs->m_enabled;
if (pfs->m_lock.end_optimistic_lock(& lock))
@@ -200,20 +219,20 @@ int table_threads::read_row_values(TABLE *table,
switch(f->field_index)
{
case 0: /* THREAD_ID */
- set_field_ulong(f, m_row.m_thread_internal_id);
+ set_field_ulonglong(f, m_row.m_thread_internal_id);
break;
case 1: /* NAME */
set_field_varchar_utf8(f, m_row.m_name, m_row.m_name_length);
break;
case 2: /* TYPE */
- if (m_row.m_thread_id != 0)
+ if (m_row.m_processlist_id != 0)
set_field_varchar_utf8(f, "FOREGROUND", 10);
else
set_field_varchar_utf8(f, "BACKGROUND", 10);
break;
case 3: /* PROCESSLIST_ID */
- if (m_row.m_thread_id != 0)
- set_field_ulong(f, m_row.m_thread_id);
+ if (m_row.m_processlist_id != 0)
+ set_field_ulonglong(f, m_row.m_processlist_id);
else
f->set_null();
break;
@@ -239,7 +258,7 @@ int table_threads::read_row_values(TABLE *table,
f->set_null();
break;
case 7: /* PROCESSLIST_COMMAND */
- if (m_row.m_thread_id != 0)
+ if (m_row.m_processlist_id != 0)
set_field_varchar_utf8(f, command_name[m_row.m_command].str,
command_name[m_row.m_command].length);
else
@@ -271,7 +290,7 @@ int table_threads::read_row_values(TABLE *table,
break;
case 11: /* PARENT_THREAD_ID */
if (m_row.m_parent_thread_internal_id != 0)
- set_field_ulong(f, m_row.m_parent_thread_internal_id);
+ set_field_ulonglong(f, m_row.m_parent_thread_internal_id);
else
f->set_null();
break;
diff --git a/storage/perfschema/table_threads.h b/storage/perfschema/table_threads.h
index 9819822f8c8..bce45c0cbce 100644
--- a/storage/perfschema/table_threads.h
+++ b/storage/perfschema/table_threads.h
@@ -32,9 +32,9 @@ struct PFS_thread;
struct row_threads
{
/** Column THREAD_ID. */
- ulong m_thread_internal_id;
+ ulonglong m_thread_internal_id;
/** Column PROCESSLIST_ID. */
- ulong m_thread_id;
+ ulonglong m_processlist_id;
/** Column NAME. */
const char* m_name;
/** Length in bytes of @c m_name. */
@@ -66,7 +66,7 @@ struct row_threads
/** Column INSTRUMENTED. */
bool *m_enabled_ptr;
/** Column PARENT_THREAD_ID. */
- ulong m_parent_thread_internal_id;
+ ulonglong m_parent_thread_internal_id;
};
/** Table PERFORMANCE_SCHEMA.THREADS. */
diff --git a/storage/perfschema/table_tiws_by_index_usage.cc b/storage/perfschema/table_tiws_by_index_usage.cc
index d354c40d3ed..71455793516 100644
--- a/storage/perfschema/table_tiws_by_index_usage.cc
+++ b/storage/perfschema/table_tiws_by_index_usage.cc
@@ -290,15 +290,16 @@ int table_tiws_by_index_usage::rnd_next(void)
table_share= &table_share_array[m_pos.m_index_1];
if (table_share->m_lock.is_populated())
{
- if (m_pos.m_index_2 < table_share->m_key_count)
+ uint safe_key_count= sanitize_index_count(table_share->m_key_count);
+ if (m_pos.m_index_2 < safe_key_count)
{
make_row(table_share, m_pos.m_index_2);
m_next_pos.set_after(&m_pos);
return 0;
}
- if (m_pos.m_index_2 <= MAX_KEY)
+ if (m_pos.m_index_2 <= MAX_INDEXES)
{
- m_pos.m_index_2= MAX_KEY;
+ m_pos.m_index_2= MAX_INDEXES;
make_row(table_share, m_pos.m_index_2);
m_next_pos.set_after(&m_pos);
return 0;
@@ -319,12 +320,13 @@ table_tiws_by_index_usage::rnd_pos(const void *pos)
table_share= &table_share_array[m_pos.m_index_1];
if (table_share->m_lock.is_populated())
{
- if (m_pos.m_index_2 < table_share->m_key_count)
+ uint safe_key_count= sanitize_index_count(table_share->m_key_count);
+ if (m_pos.m_index_2 < safe_key_count)
{
make_row(table_share, m_pos.m_index_2);
return 0;
}
- if (m_pos.m_index_2 == MAX_KEY)
+ if (m_pos.m_index_2 == MAX_INDEXES)
{
make_row(table_share, m_pos.m_index_2);
return 0;
diff --git a/storage/perfschema/unittest/CMakeLists.txt b/storage/perfschema/unittest/CMakeLists.txt
index 757bc24c566..c3a7fe5c72f 100644
--- a/storage/perfschema/unittest/CMakeLists.txt
+++ b/storage/perfschema/unittest/CMakeLists.txt
@@ -1,5 +1,4 @@
-# Copyright (c) 2009, 2010 Sun Microsystems, Inc.
-# Use is subject to license terms.
+# Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -11,18 +10,44 @@
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02111-1307 USA
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/include/mysql
${CMAKE_SOURCE_DIR}/regex
${CMAKE_SOURCE_DIR}/sql
- ${CMAKE_SOURCE_DIR}/extra/yassl/include
+ ${SSL_INCLUDE_DIRS}
${CMAKE_SOURCE_DIR}/unittest/mytap
${CMAKE_SOURCE_DIR}/storage/perfschema)
-ADD_DEFINITIONS(-DMYSQL_SERVER)
+ADD_DEFINITIONS(-DMYSQL_SERVER ${SSL_DEFINES})
MY_ADD_TESTS(pfs_instr_class pfs_instr_class-oom pfs_instr pfs_instr-oom pfs_account-oom pfs_host-oom pfs_user-oom pfs
EXT "cc" LINK_LIBRARIES perfschema mysys)
+
+IF(WIN32)
+ SET(MYSQLD_EXTRA_SOURCES ${CMAKE_SOURCE_DIR}/sql/nt_servc.cc)
+ENDIF()
+
+# We need the server libs to test the blob parser.
+# Add sql_builtin.cc here, to force linkage of plugins below.
+# Also add mysys/string.c (see Bug#45488)
+ADD_EXECUTABLE(pfs_connect_attr-t
+ pfs_connect_attr-t.cc
+ ${CMAKE_BINARY_DIR}/sql/sql_builtin.cc
+ ${CMAKE_SOURCE_DIR}/mysys/string.c
+ ${MYSQLD_EXTRA_SOURCES}
+)
+ADD_DEPENDENCIES(pfs_connect_attr-t GenServerSource)
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t mytap perfschema)
+# We need to explicitly link in everything referenced in sql/sql_builtin.cc
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t ${MYSQLD_STATIC_PLUGIN_LIBS})
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t sql binlog rpl master slave sql)
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t mysys mysys_ssl)
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t vio ${SSL_LIBRARIES})
+TARGET_LINK_LIBRARIES(pfs_connect_attr-t strings dbug regex mysys zlib)
+ADD_TEST(pfs_connect_attr pfs_connect_attr-t)
+
+# On windows, pfs_connect_attr-t may depend on openssl dlls.
+COPY_OPENSSL_DLLS(copy_openssl_pfs_unittest)
diff --git a/storage/perfschema/unittest/pfs-t.cc b/storage/perfschema/unittest/pfs-t.cc
index 6b30c0cc498..31f68195f1b 100644
--- a/storage/perfschema/unittest/pfs-t.cc
+++ b/storage/perfschema/unittest/pfs-t.cc
@@ -111,6 +111,7 @@ void test_bootstrap()
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
param.m_digest_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
boot= initialize_performance_schema(& param);
ok(boot != NULL, "boot");
@@ -168,6 +169,7 @@ PSI * load_perfschema()
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
param.m_digest_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
/* test_bootstrap() covered this, assuming it just works */
boot= initialize_performance_schema(& param);
@@ -759,21 +761,21 @@ void test_init_disabled()
/* disabled S-A + disabled T-1: no instrumentation */
socket_class_A->m_enabled= false;
- socket_A1= psi->init_socket(socket_key_A, NULL);
+ socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
ok(socket_A1 == NULL, "socket_A1 not instrumented");
/* enabled S-A + disabled T-1: instrumentation (for later) */
socket_class_A->m_enabled= true;
- socket_A1= psi->init_socket(socket_key_A, NULL);
+ socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
ok(socket_A1 != NULL, "socket_A1 instrumented");
/* broken key + disabled T-1: no instrumentation */
socket_class_A->m_enabled= true;
- socket_A1= psi->init_socket(0, NULL);
+ socket_A1= psi->init_socket(0, NULL, NULL, 0);
ok(socket_A1 == NULL, "socket key 0 not instrumented");
- socket_A1= psi->init_socket(99, NULL);
+ socket_A1= psi->init_socket(99, NULL, NULL, 0);
ok(socket_A1 == NULL, "broken socket key not instrumented");
/* Pretend thread T-1 is enabled */
@@ -892,16 +894,16 @@ void test_init_disabled()
/* enabled S-A + enabled T-1: instrumentation */
socket_class_A->m_enabled= true;
- socket_A1= psi->init_socket(socket_key_A, NULL);
+ socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
ok(socket_A1 != NULL, "instrumented");
psi->destroy_socket(socket_A1);
/* broken key + enabled T-1: no instrumentation */
socket_class_A->m_enabled= true;
- socket_A1= psi->init_socket(0, NULL);
+ socket_A1= psi->init_socket(0, NULL, NULL, 0);
ok(socket_A1 == NULL, "not instrumented");
- socket_A1= psi->init_socket(99, NULL);
+ socket_A1= psi->init_socket(99, NULL, NULL, 0);
ok(socket_A1 == NULL, "not instrumented");
/* Pretend the running thread is not instrumented */
@@ -996,21 +998,21 @@ void test_init_disabled()
/* disabled S-A + unknown thread: no instrumentation */
socket_class_A->m_enabled= false;
- socket_A1= psi->init_socket(socket_key_A, NULL);
+ socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
ok(socket_A1 == NULL, "socket_A1 not instrumented");
/* enabled S-A + unknown thread: instrumentation (for later) */
socket_class_A->m_enabled= true;
- socket_A1= psi->init_socket(socket_key_A, NULL);
+ socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
ok(socket_A1 != NULL, "socket_A1 instrumented");
/* broken key + unknown thread: no instrumentation */
socket_class_A->m_enabled= true;
- socket_A1= psi->init_socket(0, NULL);
+ socket_A1= psi->init_socket(0, NULL, NULL, 0);
ok(socket_A1 == NULL, "socket key 0 not instrumented");
- socket_A1= psi->init_socket(99, NULL);
+ socket_A1= psi->init_socket(99, NULL, NULL, 0);
ok(socket_A1 == NULL, "broken socket key not instrumented");
shutdown_performance_schema();
@@ -1126,7 +1128,7 @@ void test_locker_disabled()
ok(file_A1 != NULL, "instrumented");
socket_class_A->m_enabled= true;
- socket_A1= psi->init_socket(socket_key_A, NULL);
+ socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
ok(socket_A1 != NULL, "instrumented");
/* Socket lockers require a thread owner */
@@ -1294,10 +1296,10 @@ void test_locker_disabled()
cond_locker= psi->start_cond_wait(&cond_state, cond_A1, mutex_A1, PSI_COND_WAIT, __FILE__, __LINE__);
ok(cond_locker != NULL, "locker");
psi->end_cond_wait(cond_locker, 0);
- file_locker= psi->get_thread_file_name_locker(&file_state, file_key_A, PSI_FILE_OPEN, "xxx", NULL);
+ file_locker= psi->get_thread_file_name_locker(&file_state, file_key_A, PSI_FILE_STREAM_OPEN, "xxx", NULL);
ok(file_locker != NULL, "locker");
psi->start_file_open_wait(file_locker, __FILE__, __LINE__);
- psi->end_file_open_wait(file_locker);
+ psi->end_file_open_wait(file_locker, NULL);
file_locker= psi->get_thread_file_stream_locker(&file_state, file_A1, PSI_FILE_READ);
ok(file_locker != NULL, "locker");
psi->start_file_wait(file_locker, 10, __FILE__, __LINE__);
@@ -1314,7 +1316,7 @@ void test_locker_disabled()
/* ---------------------------------------------- */
socket_class_A->m_enabled= true;
- socket_A1= psi->init_socket(socket_key_A, NULL);
+ socket_A1= psi->init_socket(socket_key_A, NULL, NULL, 0);
ok(socket_A1 != NULL, "instrumented");
/* Socket thread owner has not been set */
socket_locker= psi->start_socket_wait(&socket_state, socket_A1, PSI_SOCKET_SEND, 12, "foo.cc", 12);
@@ -1485,6 +1487,8 @@ void test_event_name_index()
memset(& param, 0xFF, sizeof(param));
param.m_enabled= true;
+ /* NOTE: Need to add 3 to each index: table io, table lock, idle */
+
/* Per mutex info waits should be at [0..9] */
param.m_mutex_class_sizing= 10;
/* Per rwlock info waits should be at [10..29] */
@@ -1509,6 +1513,7 @@ void test_event_name_index()
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
param.m_digest_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
param.m_mutex_sizing= 0;
param.m_rwlock_sizing= 0;
@@ -1540,10 +1545,10 @@ void test_event_name_index()
psi->register_mutex("X", dummy_mutexes, 2);
mutex_class= find_mutex_class(dummy_mutex_key_1);
ok(mutex_class != NULL, "mutex class 1");
- ok(mutex_class->m_event_name_index == 0, "index 0");
+ ok(mutex_class->m_event_name_index == 3, "index 3");
mutex_class= find_mutex_class(dummy_mutex_key_2);
ok(mutex_class != NULL, "mutex class 2");
- ok(mutex_class->m_event_name_index == 1, "index 1");
+ ok(mutex_class->m_event_name_index == 4, "index 4");
PFS_rwlock_class *rwlock_class;
PSI_rwlock_key dummy_rwlock_key_1;
@@ -1557,10 +1562,10 @@ void test_event_name_index()
psi->register_rwlock("X", dummy_rwlocks, 2);
rwlock_class= find_rwlock_class(dummy_rwlock_key_1);
ok(rwlock_class != NULL, "rwlock class 1");
- ok(rwlock_class->m_event_name_index == 10, "index 10");
+ ok(rwlock_class->m_event_name_index == 13, "index 13");
rwlock_class= find_rwlock_class(dummy_rwlock_key_2);
ok(rwlock_class != NULL, "rwlock class 2");
- ok(rwlock_class->m_event_name_index == 11, "index 11");
+ ok(rwlock_class->m_event_name_index == 14, "index 14");
PFS_cond_class *cond_class;
PSI_cond_key dummy_cond_key_1;
@@ -1574,10 +1579,10 @@ void test_event_name_index()
psi->register_cond("X", dummy_conds, 2);
cond_class= find_cond_class(dummy_cond_key_1);
ok(cond_class != NULL, "cond class 1");
- ok(cond_class->m_event_name_index == 30, "index 30");
+ ok(cond_class->m_event_name_index == 33, "index 33");
cond_class= find_cond_class(dummy_cond_key_2);
ok(cond_class != NULL, "cond class 2");
- ok(cond_class->m_event_name_index == 31, "index 31");
+ ok(cond_class->m_event_name_index == 34, "index 34");
PFS_file_class *file_class;
PSI_file_key dummy_file_key_1;
@@ -1591,10 +1596,10 @@ void test_event_name_index()
psi->register_file("X", dummy_files, 2);
file_class= find_file_class(dummy_file_key_1);
ok(file_class != NULL, "file class 1");
- ok(file_class->m_event_name_index == 70, "index 70");
+ ok(file_class->m_event_name_index == 73, "index 73");
file_class= find_file_class(dummy_file_key_2);
ok(file_class != NULL, "file class 2");
- ok(file_class->m_event_name_index == 71, "index 71");
+ ok(file_class->m_event_name_index == 74, "index 74");
PFS_socket_class *socket_class;
PSI_socket_key dummy_socket_key_1;
@@ -1608,13 +1613,13 @@ void test_event_name_index()
psi->register_socket("X", dummy_sockets, 2);
socket_class= find_socket_class(dummy_socket_key_1);
ok(socket_class != NULL, "socket class 1");
- ok(socket_class->m_event_name_index == 150, "index 150");
+ ok(socket_class->m_event_name_index == 153, "index 153");
socket_class= find_socket_class(dummy_socket_key_2);
ok(socket_class != NULL, "socket class 2");
- ok(socket_class->m_event_name_index == 151, "index 151");
+ ok(socket_class->m_event_name_index == 154, "index 154");
- ok(global_table_io_class.m_event_name_index == 310, "index 310");
- ok(global_table_lock_class.m_event_name_index == 311, "index 311");
+ ok(global_table_io_class.m_event_name_index == 0, "index 0");
+ ok(global_table_lock_class.m_event_name_index == 1, "index 1");
ok(wait_class_max= 313, "313 event names"); // 3 global classes
}
diff --git a/storage/perfschema/unittest/pfs_account-oom-t.cc b/storage/perfschema/unittest/pfs_account-oom-t.cc
index d0c139476b0..0e48ab68ef7 100644
--- a/storage/perfschema/unittest/pfs_account-oom-t.cc
+++ b/storage/perfschema/unittest/pfs_account-oom-t.cc
@@ -59,6 +59,7 @@ void test_oom()
param.m_statement_class_sizing= 50;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
/* Setup */
diff --git a/storage/perfschema/unittest/pfs_connect_attr-t.cc b/storage/perfschema/unittest/pfs_connect_attr-t.cc
new file mode 100644
index 00000000000..7bee1d063a1
--- /dev/null
+++ b/storage/perfschema/unittest/pfs_connect_attr-t.cc
@@ -0,0 +1,345 @@
+/* Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
+
+#include <my_global.h>
+#include <my_pthread.h>
+#include <pfs_server.h>
+#include <pfs_instr_class.h>
+#include <pfs_instr.h>
+#include <pfs_global.h>
+#include <tap.h>
+
+
+#include <string.h>
+#include <memory.h>
+
+/* test helpers, to inspect data */
+bool read_nth_attr(const char *connect_attrs, uint connect_attrs_length,
+ const CHARSET_INFO *connect_attrs_cs,
+ uint ordinal,
+ char *attr_name, uint max_attr_name,
+ uint *attr_name_length,
+ char *attr_value, uint max_attr_value,
+ uint *attr_value_length);
+
+void test_blob_parser()
+{
+ char name[100], value[4096];
+ unsigned char packet[10000], *ptr;
+ uint name_len, value_len, idx, packet_length;
+ bool result;
+ const CHARSET_INFO *cs= &my_charset_utf8_bin;
+
+ diag("test_blob_parser");
+
+ result= read_nth_attr("", 0, cs, 0,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == false, "zero length blob");
+
+
+ result= read_nth_attr("\x1", 1, cs, 0,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == false, "invalid key length");
+
+
+ result= read_nth_attr("\x2k1\x1", 4, cs, 0,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == false, "invalid value length");
+
+
+ result= read_nth_attr("\x2k1\x2v1", 6, cs, 0,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == true, "one pair return");
+ ok(name_len == 2, "one pair attr name length");
+ ok(!strncmp(name, "k1", name_len), "one pair attr name");
+ ok(value_len == 2, "one pair value length");
+ ok(!strncmp(value, "v1", value_len), "one pair value");
+
+ result= read_nth_attr("\x2k1\x2v1", 6, cs, 1,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == false, "no second arg");
+
+ result= read_nth_attr("\x2k1\x2v1\x2k2\x2v2", 12, cs, 1,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == true, "two pairs return");
+ ok(name_len == 2, "two pairs attr name length");
+ ok(!strncmp(name, "k2", name_len), "two pairs attr name");
+ ok(value_len == 2, "two pairs value length");
+ ok(!strncmp(value, "v2", value_len), "two pairs value");
+
+ result= read_nth_attr("\x2k1\xff\x2k2\x2v2", 12, cs, 1,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == false, "two pairs first value bad return");
+
+ result= read_nth_attr("\x2k1\x2v1\x2k2\x2v2", 10, cs, 1,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == false, "two pairs wrong global length");
+
+ result= read_nth_attr("\x21z123456789z123456789z123456789z12\x2v1", 37, cs, 0,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == true, "attr name overflow");
+ ok(name_len == 32, "attr name overflow length");
+ ok(!strncmp(name, "z123456789z123456789z123456789z1", name_len),
+ "attr name overflow name");
+ ok(value_len == 2, "attr name overflow value length");
+ ok(!strncmp(value, "v1", value_len), "attr name overflow value");
+
+ packet[0]= 2;
+ packet[1]= 'k';
+ packet[2]= '1';
+ ptr= net_store_length(packet + 3, 1025);
+ for (idx= 0; idx < 1025; idx++)
+ *ptr++= '0' + (idx % 10);
+ packet_length= (uint) (ptr - packet);
+ result= read_nth_attr((char *) packet, packet_length, cs, 0,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == true, "attr value overflow");
+ ok(name_len == 2, "attr value overflow length");
+ ok(!strncmp(name, "k1", name_len), "attr value overflow name");
+ ok(value_len == 1024, "attr value overflow value length");
+ for (idx= 0; idx < 1024; idx++)
+ {
+ if (value[idx] != (char) ('0' + (idx % 10)))
+ break;
+ }
+ ok (idx == 1024, "attr value overflow value");
+
+ result= read_nth_attr("\x21z123456789z123456789z123456789z12\x2v1\x2k2\x2v2",
+ 43, cs, 1,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == true, "prev attr name overflow");
+ ok(name_len == 2, "prev attr name overflow length");
+ ok(!strncmp(name, "k2", name_len),
+ "prev attr name overflow name");
+ ok(value_len == 2, "prev attr name overflow value length");
+ ok(!strncmp(value, "v2", value_len), "prev attr name overflow value");
+
+
+ packet[1]= 'k';
+ packet[2]= '1';
+ packet[3]= 2;
+ packet[4]= 'v';
+ packet[5]= '1';
+
+ for(idx= 251; idx < 256; idx++)
+ {
+ packet[0]= idx;
+ result= read_nth_attr((char *) packet, 6, cs, 0,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == false, "invalid string length %d", idx);
+ }
+
+ memset(packet, 0, sizeof(packet));
+ for (idx=0; idx < 1660 /* *6 = 9960 */; idx++)
+ memcpy(packet + idx * 6, "\x2k1\x2v1", 6);
+ result= read_nth_attr((char *) packet, 8192, cs, 1364,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == true, "last valid attribute %d", 1364);
+ result= read_nth_attr((char *) packet, 8192, cs, 1365,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == false, "first attribute that's cut %d", 1365);
+}
+
+void test_multibyte_lengths()
+{
+ char name[100], value[4096];
+ uint name_len, value_len;
+ bool result;
+ const CHARSET_INFO *cs= &my_charset_utf8_bin;
+
+ unsigned char var_len_packet[] = {
+ 252, 2, 0, 'k', '1',
+ 253, 2, 0, 0, 'v', '1',
+ 254, 2, 0, 0, 0, 0, 0, 0, 0, 'k', '2',
+ 254, 2, 0, 0, 0, 0, 0, 0, 0, 'v', '2'
+ };
+
+ result= read_nth_attr((char *) var_len_packet, sizeof(var_len_packet), cs, 0,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == true, "multibyte lengths return");
+ ok(name_len == 2, "multibyte lengths name length");
+ ok(!strncmp(name, "k1", name_len), "multibyte lengths attr name");
+ ok(value_len == 2, "multibyte lengths value length");
+ ok(!strncmp(value, "v1", value_len), "multibyte lengths value");
+
+ result= read_nth_attr((char *) var_len_packet, sizeof(var_len_packet), cs, 1,
+ name, 32, &name_len, value, 1024, &value_len);
+ ok(result == true, "multibyte lengths second attr return");
+ ok(name_len == 2, "multibyte lengths second attr name length");
+ ok(!strncmp(name, "k2", name_len), "multibyte lengths second attr attr name");
+ ok(value_len == 2, "multibyte lengths value length");
+ ok(!strncmp(value, "v2", value_len), "multibyte lengths second attr value");
+}
+
+
+void test_utf8_parser()
+{
+ /* utf8 max byte length per character is 6 */
+ char name[33 * 6], value[1024 * 6], packet[1500 * 6], *ptr;
+ uint name_len, value_len;
+ bool result;
+ const CHARSET_INFO *cs= &my_charset_utf8_bin;
+
+ /* note : this is encoded in utf-8 */
+ const char *attr1= "Георги";
+ const char *val1= "Кодинов";
+ const char *attr2= "Пловдив";
+ const char *val2= "България";
+
+ ptr= packet;
+ *ptr++= strlen(attr1);
+ memcpy(ptr, attr1, strlen(attr1));
+ ptr+= strlen(attr1);
+ *ptr++= strlen(val1);
+ memcpy(ptr, val1, strlen(val1));
+ ptr+= strlen(val1);
+
+ *ptr++= strlen(attr2);
+ memcpy(ptr, attr2, strlen(attr2));
+ ptr+= strlen(attr2);
+ *ptr++= strlen(val2);
+ memcpy(ptr, val2, strlen(val2));
+ ptr+= strlen(val2);
+
+ diag("test_utf8_parser attr pair #1");
+
+ result= read_nth_attr((char *) packet, ptr - packet, cs, 0,
+ name, sizeof(name), &name_len,
+ value, sizeof(value), &value_len);
+ ok(result == true, "return");
+ ok(name_len == strlen(attr1), "name length");
+ ok(!strncmp(name, attr1, name_len), "attr name");
+ ok(value_len == strlen(val1), "value length");
+ ok(!strncmp(value, val1, value_len), "value");
+
+ diag("test_utf8_parser attr pair #2");
+ result= read_nth_attr((char *) packet, ptr - packet, cs, 1,
+ name, sizeof(name), &name_len,
+ value, sizeof(value), &value_len);
+ ok(result == true, "return");
+ ok(name_len == strlen(attr2), "name length");
+ ok(!strncmp(name, attr2, name_len), "attr name");
+ ok(value_len == strlen(val2), "value length");
+ ok(!strncmp(value, val2, value_len), "value");
+}
+
+
+void test_utf8_parser_bad_encoding()
+{
+ /* utf8 max byte length per character is 3*/
+ char name[33 * 3], value[1024 * 3], packet[1500 * 3], *ptr;
+ uint name_len, value_len;
+ bool result;
+ const CHARSET_INFO *cs= &my_charset_utf8_bin;
+
+ /* note : this is encoded in utf-8 */
+ const char *attr= "Георги";
+ const char *val= "Кодинов";
+
+ ptr= packet;
+ *ptr++= strlen(attr);
+ memcpy(ptr, attr, strlen(attr));
+ ptr[0]= 0xFA; // invalid UTF-8 char
+ ptr+= strlen(attr);
+ *ptr++= strlen(val);
+ memcpy(ptr, val, strlen(val));
+ ptr+= strlen(val);
+
+ diag("test_utf8_parser_bad_encoding");
+
+ result= read_nth_attr((char *) packet, ptr - packet, cs, 0,
+ name, sizeof(name), &name_len,
+ value, sizeof(value), &value_len);
+ ok(result == false, "return");
+}
+
+const CHARSET_INFO *cs_cp1251;
+
+void test_cp1251_parser()
+{
+ /* utf8 max byte length per character is 3*/
+ char name[33 * 3], value[1024 * 3], packet[1500 * 3], *ptr;
+ uint name_len, value_len;
+ bool result;
+
+ /* note : this is Георги in windows-1251 */
+ const char *attr1= "\xc3\xe5\xee\xf0\xe3\xe8";
+ /* note : this is Кодинов in windows-1251 */
+ const char *val1= "\xca\xee\xe4\xe8\xed\xee\xe2";
+ /* note : this is Пловдив in windows-1251 */
+ const char *attr2= "\xcf\xeb\xee\xe2\xe4\xe8\xe2";
+ /* note : this is България in windows-1251 */
+ const char *val2= "\xc1\xfa\xeb\xe3\xe0\xf0\xe8\xff";
+
+ ptr= packet;
+ *ptr++= strlen(attr1);
+ memcpy(ptr, attr1, strlen(attr1));
+ ptr+= strlen(attr1);
+ *ptr++= strlen(val1);
+ memcpy(ptr, val1, strlen(val1));
+ ptr+= strlen(val1);
+
+ *ptr++= strlen(attr2);
+ memcpy(ptr, attr2, strlen(attr2));
+ ptr+= strlen(attr2);
+ *ptr++= strlen(val2);
+ memcpy(ptr, val2, strlen(val2));
+ ptr+= strlen(val2);
+
+ diag("test_cp1251_parser attr pair #1");
+
+ result= read_nth_attr((char *) packet, ptr - packet, cs_cp1251, 0,
+ name, sizeof(name), &name_len,
+ value, sizeof(value), &value_len);
+ ok(result == true, "return");
+ /* need to compare to the UTF-8 equivalents */
+ ok(name_len == strlen("Георги"), "name length");
+ ok(!strncmp(name, "Георги", name_len), "attr name");
+ ok(value_len == strlen("Кодинов"), "value length");
+ ok(!strncmp(value, "Кодинов", value_len), "value");
+
+ diag("test_cp1251_parser attr pair #2");
+ result= read_nth_attr((char *) packet, ptr - packet, cs_cp1251, 1,
+ name, sizeof(name), &name_len,
+ value, sizeof(value), &value_len);
+ ok(result == true, "return");
+ /* need to compare to the UTF-8 equivalents */
+ ok(name_len == strlen("Пловдив"), "name length");
+ ok(!strncmp(name, "Пловдив", name_len), "attr name");
+ ok(value_len == strlen("България"), "value length");
+ ok(!strncmp(value, "България", value_len), "value");
+}
+
+
+void do_all_tests()
+{
+ test_blob_parser();
+ test_multibyte_lengths();
+ test_utf8_parser();
+ test_utf8_parser_bad_encoding();
+ test_cp1251_parser();
+}
+
+int main(int, char **)
+{
+ MY_INIT("pfs_connect_attr-t");
+
+ cs_cp1251= get_charset_by_csname("cp1251", MY_CS_PRIMARY, MYF(0));
+ if (!cs_cp1251)
+ diag("skipping the cp1251 tests : missing character set");
+ plan(59 + (cs_cp1251 ? 10 : 0));
+ do_all_tests();
+ return 0;
+}
diff --git a/storage/perfschema/unittest/pfs_host-oom-t.cc b/storage/perfschema/unittest/pfs_host-oom-t.cc
index a4fb36b0b08..cc445620496 100644
--- a/storage/perfschema/unittest/pfs_host-oom-t.cc
+++ b/storage/perfschema/unittest/pfs_host-oom-t.cc
@@ -59,6 +59,7 @@ void test_oom()
param.m_statement_class_sizing= 50;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
/* Setup */
diff --git a/storage/perfschema/unittest/pfs_instr-oom-t.cc b/storage/perfschema/unittest/pfs_instr-oom-t.cc
index 41bb4ed6c5a..5d9873d7927 100644
--- a/storage/perfschema/unittest/pfs_instr-oom-t.cc
+++ b/storage/perfschema/unittest/pfs_instr-oom-t.cc
@@ -63,6 +63,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
init_event_name_sizing(& param);
rc= init_instruments(& param);
@@ -98,6 +99,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
init_event_name_sizing(& param);
rc= init_instruments(& param);
@@ -133,6 +135,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
init_event_name_sizing(& param);
rc= init_instruments(& param);
@@ -168,6 +171,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
init_event_name_sizing(& param);
rc= init_instruments(& param);
@@ -201,6 +205,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
init_event_name_sizing(& param);
rc= init_instruments(& param);
@@ -236,6 +241,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
init_event_name_sizing(& param);
rc= init_instruments(& param);
@@ -271,6 +277,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
init_event_name_sizing(& param);
rc= init_instruments(& param);
@@ -308,6 +315,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
stub_alloc_fails_after_count= 2;
init_event_name_sizing(& param);
@@ -341,6 +349,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
stub_alloc_fails_after_count= 2;
init_event_name_sizing(& param);
@@ -383,8 +392,9 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
- stub_alloc_fails_after_count= 2;
+ stub_alloc_fails_after_count= 1;
init_event_name_sizing(& param);
rc= init_instruments(& param);
ok(rc == 1, "oom (per thread waits)");
@@ -417,6 +427,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
stub_alloc_fails_after_count= 3;
init_event_name_sizing(& param);
@@ -451,6 +462,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
stub_alloc_fails_after_count= 2;
init_event_name_sizing(& param);
@@ -485,6 +497,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 10;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
stub_alloc_fails_after_count= 2;
init_event_name_sizing(& param);
@@ -519,6 +532,7 @@ void test_oom()
param.m_statement_class_sizing= 50;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
stub_alloc_fails_after_count= 2;
init_event_name_sizing(& param);
@@ -553,6 +567,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
stub_alloc_fails_after_count= 1;
init_event_name_sizing(& param);
@@ -587,6 +602,7 @@ void test_oom()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
stub_alloc_fails_after_count= 3;
init_event_name_sizing(& param);
@@ -624,6 +640,7 @@ void test_oom()
param.m_statement_class_sizing= 20;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
stub_alloc_fails_after_count= 3;
init_event_name_sizing(& param);
diff --git a/storage/perfschema/unittest/pfs_instr-t.cc b/storage/perfschema/unittest/pfs_instr-t.cc
index b0839de70b2..4ef240ea819 100644
--- a/storage/perfschema/unittest/pfs_instr-t.cc
+++ b/storage/perfschema/unittest/pfs_instr-t.cc
@@ -60,6 +60,8 @@ void test_no_instruments()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_digest_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
init_event_name_sizing(& param);
rc= init_instruments(& param);
@@ -117,6 +119,8 @@ void test_no_instances()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_digest_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
init_event_name_sizing(& param);
rc= init_instruments(& param);
@@ -153,19 +157,19 @@ void test_no_instances()
PFS_thread fake_thread;
fake_thread.m_filename_hash_pins= NULL;
- file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+ file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
ok(file == NULL, "no file");
ok(file_lost == 1, "lost 1");
- file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+ file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
ok(file == NULL, "no file");
ok(file_lost == 2, "lost 2");
init_file_hash();
- file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+ file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
ok(file == NULL, "no file");
ok(file_lost == 3, "lost 3");
- file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+ file= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
ok(file == NULL, "no file");
ok(file_lost == 4, "lost 4");
@@ -173,7 +177,7 @@ void test_no_instances()
int size= sizeof(long_file_name);
memset(long_file_name, 'X', size);
- file= find_or_create_file(& fake_thread, & dummy_file_class, long_file_name, size);
+ file= find_or_create_file(& fake_thread, & dummy_file_class, long_file_name, size, true);
ok(file == NULL, "no file");
ok(file_lost == 5, "lost 5");
@@ -184,10 +188,10 @@ void test_no_instances()
ok(table == NULL, "no table");
ok(table_lost == 2, "lost 2");
- socket= create_socket(& dummy_socket_class, NULL);
+ socket= create_socket(& dummy_socket_class, NULL, NULL, 0);
ok(socket == NULL, "no socket");
ok(socket_lost == 1, "lost 1");
- socket= create_socket(& dummy_socket_class, NULL);
+ socket= create_socket(& dummy_socket_class, NULL, NULL, 0);
ok(socket == NULL, "no socket");
ok(socket_lost == 2, "lost 2");
@@ -255,6 +259,8 @@ void test_with_instances()
param.m_statement_class_sizing= 0;
param.m_events_statements_history_sizing= 0;
param.m_events_statements_history_long_sizing= 0;
+ param.m_digest_sizing= 0;
+ param.m_session_connect_attrs_sizing= 0;
init_event_name_sizing(& param);
rc= init_instruments(& param);
@@ -325,50 +331,50 @@ void test_with_instances()
PFS_thread fake_thread;
fake_thread.m_filename_hash_pins= NULL;
- file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+ file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
ok(file_1 == NULL, "no file");
ok(file_lost == 1, "lost 1");
- file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5);
+ file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy", 5, true);
ok(file_1 == NULL, "no file");
ok(file_lost == 2, "lost 2");
init_file_hash();
file_lost= 0;
- file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_A", 7);
+ file_1= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_A", 7, true);
ok(file_1 != NULL, "file");
ok(file_1->m_file_stat.m_open_count == 1, "open count 1");
ok(file_lost == 0, "not lost");
- file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_A", 7);
+ file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_A", 7, true);
ok(file_1 == file_2, "same file");
ok(file_1->m_file_stat.m_open_count == 2, "open count 2");
ok(file_lost == 0, "not lost");
release_file(file_2);
ok(file_1->m_file_stat.m_open_count == 1, "open count 1");
- file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_B", 7);
+ file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_B", 7, true);
ok(file_2 != NULL, "file");
ok(file_lost == 0, "not lost");
- file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_C", 7);
+ file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_C", 7, true);
ok(file_2 == NULL, "no file");
ok(file_lost == 1, "lost");
release_file(file_1);
/* the file still exists, not destroyed */
ok(file_1->m_file_stat.m_open_count == 0, "open count 0");
- file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_D", 7);
+ file_2= find_or_create_file(& fake_thread, & dummy_file_class, "dummy_D", 7, true);
ok(file_2 == NULL, "no file");
ok(file_lost == 2, "lost");
- socket_1= create_socket(& dummy_socket_class, NULL);
+ socket_1= create_socket(& dummy_socket_class, NULL, NULL, 0);
ok(socket_1 != NULL, "socket");
ok(socket_lost == 0, "not lost");
- socket_2= create_socket(& dummy_socket_class, NULL);
+ socket_2= create_socket(& dummy_socket_class, NULL, NULL, 0);
ok(socket_2 != NULL, "socket");
ok(socket_lost == 0, "not lost");
- socket_2= create_socket(& dummy_socket_class, NULL);
+ socket_2= create_socket(& dummy_socket_class, NULL, NULL, 0);
ok(socket_2 == NULL, "no socket");
ok(socket_lost == 1, "lost 1");
destroy_socket(socket_1);
- socket_2= create_socket(& dummy_socket_class, NULL);
+ socket_2= create_socket(& dummy_socket_class, NULL, NULL, 0);
ok(socket_2 != NULL, "socket");
ok(socket_lost == 1, "no new loss");
diff --git a/storage/perfschema/unittest/pfs_instr_class-t.cc b/storage/perfschema/unittest/pfs_instr_class-t.cc
index 9e3efde656e..7b3ffccffcc 100644
--- a/storage/perfschema/unittest/pfs_instr_class-t.cc
+++ b/storage/perfschema/unittest/pfs_instr_class-t.cc
@@ -475,6 +475,7 @@ void test_table_registration()
#endif
}
+#ifdef LATER
void set_wait_stat(PFS_instr_class *klass)
{
PFS_single_stat *stat;
@@ -501,6 +502,7 @@ bool is_empty_stat(PFS_instr_class *klass)
return false;
return true;
}
+#endif
void test_instruments_reset()
{
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index 7c7d8c7b2f5..f77e705525c 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -870,7 +870,7 @@ static int my_strnncoll_big5(CHARSET_INFO *cs __attribute__((unused)),
const uchar *b, size_t b_length,
my_bool b_is_prefix)
{
- size_t length= min(a_length, b_length);
+ size_t length= MY_MIN(a_length, b_length);
int res= my_strnncoll_big5_internal(&a, &b, length);
return res ? res : (int)((b_is_prefix ? length : a_length) - b_length);
}
@@ -883,7 +883,7 @@ static int my_strnncollsp_big5(CHARSET_INFO * cs __attribute__((unused)),
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference)
{
- size_t length= min(a_length, b_length);
+ size_t length= MY_MIN(a_length, b_length);
int res= my_strnncoll_big5_internal(&a, &b, length);
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index d28d576c661..80ed047ebf2 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -80,7 +80,7 @@ static int my_strnncoll_binary(CHARSET_INFO * cs __attribute__((unused)),
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
- size_t len=min(slen,tlen);
+ size_t len=MY_MIN(slen,tlen);
int cmp= memcmp(s,t,len);
return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
}
@@ -131,7 +131,7 @@ static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
- size_t len=min(slen,tlen);
+ size_t len=MY_MIN(slen,tlen);
int cmp= memcmp(s,t,len);
return cmp ? cmp : (int)((t_is_prefix ? len : slen) - tlen);
}
@@ -175,7 +175,7 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
diff_if_only_endspace_difference= 0;
#endif
- end= a + (length= min(a_length, b_length));
+ end= a + (length= MY_MIN(a_length, b_length));
while (a < end)
{
if (*a++ != *b++)
@@ -401,7 +401,7 @@ static size_t my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *src, size_t srclen)
{
if (dest != src)
- memcpy(dest, src, min(dstlen,srclen));
+ memcpy(dest, src, MY_MIN(dstlen,srclen));
if (dstlen > srclen)
bfill(dest + srclen, dstlen - srclen, 0);
return dstlen;
@@ -414,7 +414,7 @@ size_t my_strnxfrm_8bit_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *src, size_t srclen)
{
if (dest != src)
- memcpy(dest, src, min(dstlen,srclen));
+ memcpy(dest, src, MY_MIN(dstlen,srclen));
if (dstlen > srclen)
bfill(dest + srclen, dstlen - srclen, ' ');
return dstlen;
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 8b37de4a5e7..e21c406d2a9 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -3470,7 +3470,7 @@ int my_strnncoll_gbk(CHARSET_INFO *cs __attribute__((unused)),
const uchar *b, size_t b_length,
my_bool b_is_prefix)
{
- size_t length= min(a_length, b_length);
+ size_t length= MY_MIN(a_length, b_length);
int res= my_strnncoll_gbk_internal(&a, &b, length);
return res ? res : (int) ((b_is_prefix ? length : a_length) - b_length);
}
@@ -3481,7 +3481,7 @@ static int my_strnncollsp_gbk(CHARSET_INFO * cs __attribute__((unused)),
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference)
{
- size_t length= min(a_length, b_length);
+ size_t length= MY_MIN(a_length, b_length);
int res= my_strnncoll_gbk_internal(&a, &b, length);
#ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
index 5879bdf7978..5efc6348516 100644
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@@ -473,7 +473,7 @@ my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
- size_t len=min(slen,tlen);
+ size_t len=MY_MIN(slen,tlen);
int cmp= memcmp(s,t,len);
return cmp ? cmp : (int) ((t_is_prefix ? len : slen) - tlen);
}
@@ -518,7 +518,7 @@ my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
diff_if_only_endspace_difference= 0;
#endif
- end= a + (length= min(a_length, b_length));
+ end= a + (length= MY_MIN(a_length, b_length));
while (a < end)
{
if (*a++ != *b++)
@@ -557,7 +557,7 @@ static size_t my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *src, size_t srclen)
{
if (dest != src)
- memcpy(dest, src, min(dstlen, srclen));
+ memcpy(dest, src, MY_MIN(dstlen, srclen));
if (dstlen > srclen)
bfill(dest + srclen, dstlen - srclen, ' ');
return dstlen;
diff --git a/strings/ctype-simple.c b/strings/ctype-simple.c
index e25c0783abf..4b47996e315 100644
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -160,7 +160,7 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length,
diff_if_only_endspace_difference= 0;
#endif
- end= a + (length= min(a_length, b_length));
+ end= a + (length= MY_MIN(a_length, b_length));
while (a < end)
{
if (map[*a++] != map[*b++])
@@ -770,7 +770,7 @@ size_t my_long10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
val= new_val;
}
- len= min(len, (size_t) (e-p));
+ len= MY_MIN(len, (size_t) (e-p));
memcpy(dst, p, len);
return len+sign;
}
@@ -824,7 +824,7 @@ size_t my_longlong10_to_str_8bit(CHARSET_INFO *cs __attribute__((unused)),
long_val= quo;
}
- len= min(len, (size_t) (e-p));
+ len= MY_MIN(len, (size_t) (e-p));
cnv:
memcpy(dst, p, len);
return len+sign;
@@ -1055,7 +1055,7 @@ size_t my_well_formed_len_8bit(CHARSET_INFO *cs __attribute__((unused)),
{
size_t nbytes= (size_t) (end-start);
*error= 0;
- return min(nbytes, nchars);
+ return MY_MIN(nbytes, nchars);
}
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index d97f8e5ff08..d84d43a67bd 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -566,7 +566,7 @@ int my_strnncollsp_tis620(CHARSET_INFO * cs __attribute__((unused)),
a_length= thai2sortable(a, a_length);
b_length= thai2sortable(b, b_length);
- end= a + (length= min(a_length, b_length));
+ end= a + (length= MY_MIN(a_length, b_length));
while (a < end)
{
if (*a++ != *b++)
@@ -623,7 +623,7 @@ size_t my_strnxfrm_tis620(CHARSET_INFO *cs __attribute__((unused)),
const uchar *src, size_t srclen)
{
size_t dstlen= len;
- len= (size_t) (strmake((char*) dest, (char*) src, min(len, srclen)) -
+ len= (size_t) (strmake((char*) dest, (char*) src, MY_MIN(len, srclen)) -
(char*) dest);
len= thai2sortable(dest, len);
if (dstlen > len)
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index e4a8d7a4067..5feeb661d2a 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -7726,7 +7726,7 @@ static void my_coll_lexem_print_error(MY_COLL_LEXEM *lexem,
{
char tail[30];
size_t len= lexem->end - lexem->prev;
- strmake (tail, lexem->prev, (size_t) min(len, sizeof(tail)-1));
+ strmake (tail, lexem->prev, (size_t) MY_MIN(len, sizeof(tail)-1));
errstr[errsize-1]= '\0';
my_snprintf(errstr,errsize-1,"%s at '%s'", txt, tail);
}
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 6ebbae8fb5a..8e9b0ab7a04 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -58,7 +58,7 @@ my_bincmp(const uchar *s, const uchar *se,
const uchar *t, const uchar *te)
{
int slen= (int) (se - s), tlen= (int) (te - t);
- int len= min(slen, tlen);
+ int len= MY_MIN(slen, tlen);
int cmp= memcmp(s, t, len);
return cmp ? cmp : slen - tlen;
}
@@ -2433,7 +2433,7 @@ my_strnncollsp_utf32_bin(CHARSET_INFO *cs __attribute__((unused)),
se= s + slen;
te= t + tlen;
- for (minlen= min(slen, tlen); minlen; minlen-= 4)
+ for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 4)
{
my_wc_t s_wc= my_utf32_get(s);
my_wc_t t_wc= my_utf32_get(t);
@@ -2860,7 +2860,7 @@ static int my_strnncollsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
se= s + slen;
te= t + tlen;
- for (minlen= min(slen, tlen); minlen; minlen-= 2)
+ for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
{
int s_wc = uni_plane[s[0]] ? (int) uni_plane[s[0]][s[1]].sort :
(((int) s[0]) << 8) + (int) s[1];
@@ -2937,7 +2937,7 @@ size_t my_well_formed_len_ucs2(CHARSET_INFO *cs __attribute__((unused)),
size_t nbytes= ((size_t) (e-b)) & ~(size_t) 1;
*error= 0;
nchars*= 2;
- return min(nbytes, nchars);
+ return MY_MIN(nbytes, nchars);
}
@@ -3012,7 +3012,7 @@ static int my_strnncollsp_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
se= s + slen;
te= t + tlen;
- for (minlen= min(slen, tlen); minlen; minlen-= 2)
+ for (minlen= MY_MIN(slen, tlen); minlen; minlen-= 2)
{
int s_wc= s[0] * 256 + s[1];
int t_wc= t[0] * 256 + t[1];
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 88bab1fac76..6020f9c962f 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2244,7 +2244,7 @@ static inline int bincmp(const uchar *s, const uchar *se,
const uchar *t, const uchar *te)
{
int slen= (int) (se-s), tlen= (int) (te-t);
- int len=min(slen,tlen);
+ int len=MY_MIN(slen,tlen);
int cmp= memcmp(s,t,len);
return cmp ? cmp : slen-tlen;
}
@@ -4667,7 +4667,7 @@ bincmp_utf8mb4(const uchar *s, const uchar *se,
const uchar *t, const uchar *te)
{
int slen= (int) (se - s), tlen= (int) (te - t);
- int len= min(slen, tlen);
+ int len= MY_MIN(slen, tlen);
int cmp= memcmp(s, t, len);
return cmp ? cmp : slen - tlen;
}
diff --git a/strings/ctype.c b/strings/ctype.c
index 6b6983ada60..adff69ad680 100644
--- a/strings/ctype.c
+++ b/strings/ctype.c
@@ -428,3 +428,144 @@ my_charset_is_ascii_compatible(CHARSET_INFO *cs)
}
return 1;
}
+
+
+/*
+ Convert a string between two character sets.
+ 'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
+
+ @param to[OUT] Store result here
+ @param to_length Size of "to" buffer
+ @param to_cs Character set of result string
+ @param from Copy from here
+ @param from_length Length of the "from" string
+ @param from_cs Character set of the "from" string
+ @param errors[OUT] Number of conversion errors
+
+ @return Number of bytes copied to 'to' string
+*/
+
+static uint32
+my_convert_internal(char *to, uint32 to_length,
+ const CHARSET_INFO *to_cs,
+ const char *from, uint32 from_length,
+ const CHARSET_INFO *from_cs, uint *errors)
+{
+ int cnvres;
+ my_wc_t wc;
+ const uchar *from_end= (const uchar*) from + from_length;
+ char *to_start= to;
+ uchar *to_end= (uchar*) to + to_length;
+ my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
+ my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
+ uint error_count= 0;
+
+ while (1)
+ {
+ if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
+ from+= cnvres;
+ else if (cnvres == MY_CS_ILSEQ)
+ {
+ error_count++;
+ from++;
+ wc= '?';
+ }
+ else if (cnvres > MY_CS_TOOSMALL)
+ {
+ /*
+ A correct multibyte sequence detected
+ But it doesn't have Unicode mapping.
+ */
+ error_count++;
+ from+= (-cnvres);
+ wc= '?';
+ }
+ else
+ break; // Not enough characters
+
+outp:
+ if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
+ to+= cnvres;
+ else if (cnvres == MY_CS_ILUNI && wc != '?')
+ {
+ error_count++;
+ wc= '?';
+ goto outp;
+ }
+ else
+ break;
+ }
+ *errors= error_count;
+ return (uint32) (to - to_start);
+}
+
+
+/*
+ Convert a string between two character sets.
+ Optimized for quick copying of ASCII characters in the range 0x00..0x7F.
+ 'to' must be large enough to store (form_length * to_cs->mbmaxlen) bytes.
+
+ @param to[OUT] Store result here
+ @param to_length Size of "to" buffer
+ @param to_cs Character set of result string
+ @param from Copy from here
+ @param from_length Length of the "from" string
+ @param from_cs Character set of the "from" string
+ @param errors[OUT] Number of conversion errors
+
+ @return Number of bytes copied to 'to' string
+*/
+
+uint32
+my_convert(char *to, uint32 to_length, const CHARSET_INFO *to_cs,
+ const char *from, uint32 from_length,
+ const CHARSET_INFO *from_cs, uint *errors)
+{
+ uint32 length, length2;
+ /*
+ If any of the character sets is not ASCII compatible,
+ immediately switch to slow mb_wc->wc_mb method.
+ */
+ if ((to_cs->state | from_cs->state) & MY_CS_NONASCII)
+ return my_convert_internal(to, to_length, to_cs,
+ from, from_length, from_cs, errors);
+
+ length= length2= MY_MIN(to_length, from_length);
+
+#if defined(__i386__)
+ /*
+ Special loop for i386, it allows to refer to a
+ non-aligned memory block as UINT32, which makes
+ it possible to copy four bytes at once. This
+ gives about 10% performance improvement comparing
+ to byte-by-byte loop.
+ */
+ for ( ; length >= 4; length-= 4, from+= 4, to+= 4)
+ {
+ if ((*(uint32*)from) & 0x80808080)
+ break;
+ *((uint32*) to)= *((const uint32*) from);
+ }
+#endif /* __i386__ */
+
+ for (; ; *to++= *from++, length--)
+ {
+ if (!length)
+ {
+ *errors= 0;
+ return length2;
+ }
+ if (*((unsigned char*) from) > 0x7F) /* A non-ASCII character */
+ {
+ uint32 copied_length= length2 - length;
+ to_length-= copied_length;
+ from_length-= copied_length;
+ return copied_length + my_convert_internal(to, to_length, to_cs,
+ from, from_length, from_cs,
+ errors);
+ }
+ }
+
+ DBUG_ASSERT(FALSE); // Should never get to here
+ return 0; // Make compiler happy
+}
diff --git a/strings/decimal.c b/strings/decimal.c
index f318a234d3f..75d72890557 100644
--- a/strings/decimal.c
+++ b/strings/decimal.c
@@ -396,7 +396,7 @@ int decimal2string(const decimal_t *from, char *to, int *to_len,
for (; frac>0; frac-=DIG_PER_DEC1)
{
dec1 x=*buf++;
- for (i=min(frac, DIG_PER_DEC1); i; i--)
+ for (i=MY_MIN(frac, DIG_PER_DEC1); i; i--)
{
dec1 y=x/DIG_MASK;
*s1++='0'+(uchar)y;
@@ -419,7 +419,7 @@ int decimal2string(const decimal_t *from, char *to, int *to_len,
for (buf=buf0+ROUND_UP(intg); intg>0; intg-=DIG_PER_DEC1)
{
dec1 x=*--buf;
- for (i=min(intg, DIG_PER_DEC1); i; i--)
+ for (i=MY_MIN(intg, DIG_PER_DEC1); i; i--)
{
dec1 y=x/10;
*--s='0'+(uchar)(x-y*10);
@@ -1511,8 +1511,8 @@ decimal_round(const decimal_t *from, decimal_t *to, int scale,
if (to != from)
{
- dec1 *p0= buf0+intg0+max(frac1, frac0);
- dec1 *p1= buf1+intg0+max(frac1, frac0);
+ dec1 *p0= buf0+intg0+MY_MAX(frac1, frac0);
+ dec1 *p1= buf1+intg0+MY_MAX(frac1, frac0);
DBUG_ASSERT(p0 - buf0 <= len);
DBUG_ASSERT(p1 - buf1 <= len);
@@ -1523,7 +1523,7 @@ decimal_round(const decimal_t *from, decimal_t *to, int scale,
buf0=to->buf;
buf1=to->buf;
to->sign=from->sign;
- to->intg=min(intg0, len)*DIG_PER_DEC1;
+ to->intg=MY_MIN(intg0, len)*DIG_PER_DEC1;
}
if (frac0 > frac1)
@@ -1625,7 +1625,7 @@ decimal_round(const decimal_t *from, decimal_t *to, int scale,
scale=frac0*DIG_PER_DEC1;
error=E_DEC_TRUNCATED; /* XXX */
}
- for (buf1=to->buf+intg0+max(frac0,0); buf1 > to->buf; buf1--)
+ for (buf1=to->buf+intg0+MY_MAX(frac0,0); buf1 > to->buf; buf1--)
{
buf1[0]=buf1[-1];
}
@@ -1644,7 +1644,7 @@ decimal_round(const decimal_t *from, decimal_t *to, int scale,
/* making 'zero' with the proper scale */
dec1 *p0= to->buf + frac0 + 1;
to->intg=1;
- to->frac= max(scale, 0);
+ to->frac= MY_MAX(scale, 0);
to->sign= 0;
for (buf1= to->buf; buf1<p0; buf1++)
*buf1= 0;
@@ -1693,11 +1693,11 @@ int decimal_result_size(decimal_t *from1, decimal_t *from2, char op, int param)
{
switch (op) {
case '-':
- return ROUND_UP(max(from1->intg, from2->intg)) +
- ROUND_UP(max(from1->frac, from2->frac));
+ return ROUND_UP(MY_MAX(from1->intg, from2->intg)) +
+ ROUND_UP(MY_MAX(from1->frac, from2->frac));
case '+':
- return ROUND_UP(max(from1->intg, from2->intg)+1) +
- ROUND_UP(max(from1->frac, from2->frac));
+ return ROUND_UP(MY_MAX(from1->intg, from2->intg)+1) +
+ ROUND_UP(MY_MAX(from1->frac, from2->frac));
case '*':
return ROUND_UP(from1->intg+from2->intg)+
ROUND_UP(from1->frac)+ROUND_UP(from2->frac);
@@ -1712,7 +1712,7 @@ static int do_add(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
{
int intg1=ROUND_UP(from1->intg), intg2=ROUND_UP(from2->intg),
frac1=ROUND_UP(from1->frac), frac2=ROUND_UP(from2->frac),
- frac0=max(frac1, frac2), intg0=max(intg1, intg2), error;
+ frac0=MY_MAX(frac1, frac2), intg0=MY_MAX(intg1, intg2), error;
dec1 *buf1, *buf2, *buf0, *stop, *stop2, x, carry;
sanity(to);
@@ -1737,7 +1737,7 @@ static int do_add(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
buf0=to->buf+intg0+frac0;
to->sign=from1->sign;
- to->frac=max(from1->frac, from2->frac);
+ to->frac=MY_MAX(from1->frac, from2->frac);
to->intg=intg0*DIG_PER_DEC1;
if (unlikely(error))
{
@@ -1748,7 +1748,7 @@ static int do_add(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
set_if_smaller(intg2, intg0);
}
- /* part 1 - max(frac) ... min (frac) */
+ /* part 1 - MY_MAX(frac) ... min (frac) */
if (frac1 > frac2)
{
buf1=from1->buf+intg1+frac1;
@@ -1766,14 +1766,14 @@ static int do_add(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
while (buf1 > stop)
*--buf0=*--buf1;
- /* part 2 - min(frac) ... min(intg) */
+ /* part 2 - MY_MIN(frac) ... MY_MIN(intg) */
carry=0;
while (buf1 > stop2)
{
ADD(*--buf0, *--buf1, *--buf2, carry);
}
- /* part 3 - min(intg) ... max(intg) */
+ /* part 3 - MY_MIN(intg) ... MY_MAX(intg) */
buf1= intg1 > intg2 ? ((stop=from1->buf)+intg1-intg2) :
((stop=from2->buf)+intg2-intg1) ;
while (buf1 > stop)
@@ -1794,7 +1794,7 @@ static int do_sub(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
{
int intg1=ROUND_UP(from1->intg), intg2=ROUND_UP(from2->intg),
frac1=ROUND_UP(from1->frac), frac2=ROUND_UP(from2->frac);
- int frac0=max(frac1, frac2), error;
+ int frac0=MY_MAX(frac1, frac2), error;
dec1 *buf1, *buf2, *buf0, *stop1, *stop2, *start1, *start2;
my_bool carry=0;
@@ -1870,7 +1870,7 @@ static int do_sub(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
FIX_INTG_FRAC_ERROR(to->len, intg1, frac0, error);
buf0=to->buf+intg1+frac0;
- to->frac=max(from1->frac, from2->frac);
+ to->frac=MY_MAX(from1->frac, from2->frac);
to->intg=intg1*DIG_PER_DEC1;
if (unlikely(error))
{
@@ -1881,7 +1881,7 @@ static int do_sub(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
}
carry=0;
- /* part 1 - max(frac) ... min (frac) */
+ /* part 1 - MY_MAX(frac) ... min (frac) */
if (frac1 > frac2)
{
buf1=start1+intg1+frac1;
@@ -1905,7 +1905,7 @@ static int do_sub(const decimal_t *from1, const decimal_t *from2, decimal_t *to)
}
}
- /* part 2 - min(frac) ... intg2 */
+ /* part 2 - MY_MIN(frac) ... intg2 */
while (buf2 > start2)
{
SUB(*--buf0, *--buf1, *--buf2, carry);
@@ -2168,11 +2168,11 @@ static int do_div_mod(const decimal_t *from1, const decimal_t *from2,
{
/* we're calculating N1 % N2.
The result will have
- frac=max(frac1, frac2), as for subtraction
+ frac=MY_MAX(frac1, frac2), as for subtraction
intg=intg2
*/
to->sign=from1->sign;
- to->frac=max(from1->frac, from2->frac);
+ to->frac=MY_MAX(from1->frac, from2->frac);
frac0=0;
}
else
@@ -2305,7 +2305,7 @@ static int do_div_mod(const decimal_t *from1, const decimal_t *from2,
/*
now the result is in tmp1, it has
intg=prec1-frac1
- frac=max(frac1, frac2)=to->frac
+ frac=MY_MAX(frac1, frac2)=to->frac
*/
if (dcarry)
*--start1=dcarry;
@@ -2343,7 +2343,7 @@ static int do_div_mod(const decimal_t *from1, const decimal_t *from2,
}
DBUG_ASSERT(intg0 <= ROUND_UP(from2->intg));
stop1=start1+frac0+intg0;
- to->intg=min(intg0*DIG_PER_DEC1, from2->intg);
+ to->intg=MY_MIN(intg0*DIG_PER_DEC1, from2->intg);
}
if (unlikely(intg0+frac0 > to->len))
{
diff --git a/strings/dtoa.c b/strings/dtoa.c
index 6b216056f66..f3498a7bb1e 100644
--- a/strings/dtoa.c
+++ b/strings/dtoa.c
@@ -132,7 +132,7 @@ size_t my_fcvt(double x, int precision, char *to, my_bool *error)
if (len <= decpt)
*dst++= '.';
- for (i= precision - max(0, (len - decpt)); i > 0; i--)
+ for (i= precision - MY_MAX(0, (len - decpt)); i > 0; i--)
*dst++= '0';
}
@@ -221,7 +221,7 @@ size_t my_gcvt(double x, my_gcvt_arg_type type, int width, char *to,
if (x < 0.)
width--;
- res= dtoa(x, 4, type == MY_GCVT_ARG_DOUBLE ? width : min(width, FLT_DIG),
+ res= dtoa(x, 4, type == MY_GCVT_ARG_DOUBLE ? width : MY_MIN(width, FLT_DIG),
&decpt, &sign, &end, buf, sizeof(buf));
if (decpt == DTOA_OVERFLOW)
{
@@ -2182,7 +2182,7 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
1 ==> like 0, but with Steele & White stopping rule;
e.g. with IEEE P754 arithmetic , mode 0 gives
1e23 whereas mode 1 gives 9.999999999999999e22.
- 2 ==> max(1,ndigits) significant digits. This gives a
+ 2 ==> MY_MAX(1,ndigits) significant digits. This gives a
return value similar to that of ecvt, except
that trailing zeros are suppressed.
3 ==> through ndigits past the decimal point. This
diff --git a/strings/my_vsnprintf.c b/strings/my_vsnprintf.c
index 2073d5a93d9..a05f60decf9 100644
--- a/strings/my_vsnprintf.c
+++ b/strings/my_vsnprintf.c
@@ -96,7 +96,7 @@ static const char *get_length_arg(const char *fmt, ARGS_INFO *args_arr,
uint *arg_count, size_t *length, uint *flags)
{
fmt= get_length(fmt+1, length, flags);
- *arg_count= max(*arg_count, (uint) *length);
+ *arg_count= MY_MAX(*arg_count, (uint) *length);
(*length)--;
DBUG_ASSERT(*fmt == '$' && *length < MAX_ARGS);
args_arr[*length].arg_type= 'd';
@@ -243,7 +243,7 @@ static char *process_dbl_arg(char *to, char *end, size_t width,
width= FLT_DIG; /* width not set, use default */
else if (width >= NOT_FIXED_DEC)
width= NOT_FIXED_DEC - 1; /* max.precision for my_fcvt() */
- width= min(width, (size_t)(end-to) - 1);
+ width= MY_MIN(width, (size_t)(end-to) - 1);
if (arg_type == 'f')
to+= my_fcvt(par, (int)width , to, NULL);
@@ -292,7 +292,7 @@ static char *process_int_arg(char *to, const char *end, size_t length,
/* If %#d syntax was used, we have to pre-zero/pre-space the string */
if (store_start == buff)
{
- length= min(length, to_length);
+ length= MY_MIN(length, to_length);
if (res_length < length)
{
size_t diff= (length- res_length);
@@ -512,7 +512,7 @@ start:
break;
/* Copy data after the % format expression until next % */
- length= min(end - to , print_arr[i].end - print_arr[i].begin);
+ length= MY_MIN(end - to , print_arr[i].end - print_arr[i].begin);
if (to + length < end)
length++;
to= strnmov(to, print_arr[i].begin, length);
@@ -533,7 +533,7 @@ start:
fmt= get_length(fmt, &arg_index, &unused_flags);
DBUG_ASSERT(*fmt == '$');
fmt++;
- arg_count= max(arg_count, arg_index);
+ arg_count= MY_MAX(arg_count, arg_index);
goto start;
}
diff --git a/strings/str2int.c b/strings/str2int.c
index 64d4e169891..ec89503af5e 100644
--- a/strings/str2int.c
+++ b/strings/str2int.c
@@ -94,7 +94,7 @@ char *str2int(register const char *src, register int radix, long int lower,
machines all, if +|n| is representable, so is -|n|, but on
twos complement machines the converse is not true. So the
"maximum" representable number has a negative representative.
- Limit is set to min(-|lower|,-|upper|); this is the "largest"
+ Limit is set to MY_MIN(-|lower|,-|upper|); this is the "largest"
number we are concerned with. */
/* Calculate Limit using Scale as a scratch variable */